summaryrefslogtreecommitdiffstats
path: root/gluster/swift
diff options
context:
space:
mode:
authorPrashanth Pai <ppai@redhat.com>2016-03-18 18:09:46 +0530
committerThiago da Silva <thiago@redhat.com>2016-04-15 05:27:54 -0700
commit2bd696e392e420a2521dcca0b8613122d8169025 (patch)
tree9cd42e35957c0df07696e160f357019fde78c2d4 /gluster/swift
parent43da7927560811c55838a6a1c2d0ee1a52aada40 (diff)
Enhance object expiration
This change re-introduces object expiration feature with some internal differences from earlier implementation such as: * Creation of zero-byte tracker object is performed directly on the mount point. Earlier HTTP request was sent to object server for the same. This incurred overhead of metadata creation for these zero-byte files which isn't necessarry as all required information is encoded in the path itself. * Crawling of zero-byte tracker objects is done by the object expirer daemon itself and not container server. * Deletion of tracker object is performed by the object expiration daemon directly on mount point. Deletion of actual data object is not carried out by object expiration daemon directly. The object expirer sends a DELETE request to object server which deletes the actual object. This behaviour is not changed. There is no change in behaviour in comparison with older implementation. This is asserted by re-enabling existing functional tests without any changes. Change-Id: I01dc77cc4db3be3147d54e3aa2a19ed182498900 Signed-off-by: Prashanth Pai <ppai@redhat.com> Reviewed-on: http://review.gluster.org/13913 Reviewed-by: Thiago da Silva <thiago@redhat.com> Tested-by: Thiago da Silva <thiago@redhat.com>
Diffstat (limited to 'gluster/swift')
-rw-r--r--gluster/swift/common/DiskDir.py2
-rw-r--r--gluster/swift/common/utils.py27
-rw-r--r--gluster/swift/obj/expirer.py160
-rw-r--r--gluster/swift/obj/server.py78
4 files changed, 242 insertions, 25 deletions
diff --git a/gluster/swift/common/DiskDir.py b/gluster/swift/common/DiskDir.py
index 36504a6..d314a1f 100644
--- a/gluster/swift/common/DiskDir.py
+++ b/gluster/swift/common/DiskDir.py
@@ -559,7 +559,7 @@ class DiskDir(DiskCommon):
self.metadata[X_PUT_TIMESTAMP] = (timestamp, 0)
write_metadata(self.datadir, self.metadata)
- def delete_object(self, name, timestamp):
+ def delete_object(self, name, timestamp, obj_policy_index):
# NOOP - should never be called since object file removal occurs
# within a directory implicitly.
return
diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py
index 8958717..1bbc56c 100644
--- a/gluster/swift/common/utils.py
+++ b/gluster/swift/common/utils.py
@@ -556,7 +556,7 @@ def dir_is_object(metadata):
return metadata.get(X_OBJECT_TYPE, "") == DIR_OBJECT
-def rmobjdir(dir_path):
+def rmobjdir(dir_path, marker_dir_check=True):
"""
Removes the directory as long as there are no objects stored in it. This
works for containers also.
@@ -580,18 +580,19 @@ def rmobjdir(dir_path):
for directory in dirs:
fullpath = os.path.join(path, directory)
- try:
- metadata = read_metadata(fullpath)
- except GlusterFileSystemIOError as err:
- if err.errno in (errno.ENOENT, errno.ESTALE):
- # Ignore removal from another entity.
- continue
- raise
- else:
- if dir_is_object(metadata):
- # Wait, this is an object created by the caller
- # We cannot delete
- return False
+ if marker_dir_check:
+ try:
+ metadata = read_metadata(fullpath)
+ except GlusterFileSystemIOError as err:
+ if err.errno in (errno.ENOENT, errno.ESTALE):
+ # Ignore removal from another entity.
+ continue
+ raise
+ else:
+ if dir_is_object(metadata):
+ # Wait, this is an object created by the caller
+ # We cannot delete
+ return False
# Directory is not an object created by the caller
# so we can go ahead and delete it.
diff --git a/gluster/swift/obj/expirer.py b/gluster/swift/obj/expirer.py
new file mode 100644
index 0000000..564a2c9
--- /dev/null
+++ b/gluster/swift/obj/expirer.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2016 Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This import will monkey-patch Ring and other classes.
+# Do not remove.
+import gluster.swift.common.constraints # noqa
+
+import errno
+import os
+
+from gluster.swift.common.utils import rmobjdir
+
+from swift.obj.expirer import ObjectExpirer as SwiftObjectExpirer
+from swift.common.http import HTTP_NOT_FOUND
+from swift.common.internal_client import InternalClient, UnexpectedResponse
+from swift.common.utils import ThreadPool
+
+EXCLUDE_DIRS = ('.trashcan', '.glusterfs')
+
+
+class GlusterSwiftInternalClient(InternalClient):
+
+ def __init__(self, conf_path, user_agent, request_tries,
+ allow_modify_pipeline=False, devices=None):
+ super(GlusterSwiftInternalClient, self).__init__(
+ conf_path, user_agent, request_tries, allow_modify_pipeline)
+ self.devices = devices
+
+ def get_account_info(self, account):
+ # Supposed to return container count and object count in gsexpiring
+ # account. This is used by object expirer only for logging.
+ return (0, 0)
+
+ def delete_container(self, account, container, acceptable_statuses=None):
+ container_path = os.path.join(self.devices, account, container)
+ try:
+ os.rmdir(container_path)
+ except OSError as err:
+ if err.errno != errno.ENOENT:
+ raise
+
+ def iter_containers(self, account):
+ account_path = os.path.join(self.devices, account)
+ for container in os.listdir(account_path):
+ if container in EXCLUDE_DIRS:
+ continue
+ container_path = os.path.join(account_path, container)
+ if os.path.isdir(container_path):
+ yield {'name': container.encode('utf8')}
+
+ def iter_objects(self, account, container):
+ container_path = os.path.join(self.devices, account, container)
+ # TODO: Use a slightly better implementation of os.walk()
+ for (root, dirs, files) in os.walk(container_path):
+ for f in files:
+ obj_path = os.path.join(root, f)
+ obj = obj_path[(len(container_path) + 1):]
+ yield {'name': obj.encode('utf8')}
+
+
+class ObjectExpirer(SwiftObjectExpirer):
+
+ def __init__(self, conf, logger=None, swift=None):
+
+ conf_path = conf.get('__file__') or '/etc/swift/object-expirer.conf'
+ self.devices = conf.get('devices', '/mnt/gluster-object')
+ # Do not retry DELETEs on getting 404. Hence default is set to 1.
+ request_tries = int(conf.get('request_tries') or 1)
+ # Use our extended version of InternalClient
+ swift = GlusterSwiftInternalClient(
+ conf_path, 'Gluster Swift Object Expirer', request_tries,
+ devices=self.devices)
+ # Let the parent class initialize self.swift
+ super(ObjectExpirer, self).__init__(conf, logger=logger, swift=swift)
+
+ self.reseller_prefix = conf.get('reseller_prefix', 'AUTH').strip()
+ if not self.reseller_prefix.endswith('_'):
+ self.reseller_prefix = self.reseller_prefix + '_'
+
+ # nthread=0 is intentional. This ensures that no green pool is
+ # used. Call to force_run_in_thread() will ensure that the method
+ # passed as arg is run in a real external thread using eventlet.tpool
+ # which has a threadpool of 20 threads (default)
+ self.threadpool = ThreadPool(nthreads=0)
+
+ def _delete_tracker_object(self, container, obj):
+ container_path = os.path.join(self.devices,
+ self.expiring_objects_account,
+ container)
+ tracker_object_path = os.path.join(container_path, obj)
+
+ try:
+ os.unlink(tracker_object_path)
+ except OSError as err:
+ if err.errno in (errno.ENOENT, errno.ESTALE):
+ # Ignore removal from another entity.
+ return
+ elif err.errno == errno.EISDIR:
+ # Handle race: Was a file during crawl, but now it's a
+ # directory. There are no 'directory marker' objects in
+ # gsexpiring volume.
+ return
+ else:
+ raise
+
+ # This part of code is very similar to DiskFile._unlinkold()
+ dirname = os.path.dirname(tracker_object_path)
+ while dirname and dirname != container_path:
+ if not rmobjdir(dirname, marker_dir_check=False):
+ # If a directory with objects has been found, we can stop
+ # garbage collection
+ break
+ else:
+ # Traverse upwards till the root of container
+ dirname = os.path.dirname(dirname)
+
+ def pop_queue(self, container, obj):
+ """
+ In Swift, this method removes tracker object entry directly from
+ container database. In gluster-swift, this method deletes tracker
+ object directly from filesystem.
+ """
+ self.threadpool.force_run_in_thread(self._delete_tracker_object,
+ container, obj)
+
+ def delete_actual_object(self, actual_obj, timestamp):
+ """
+ Swift's expirer will re-attempt expiring if the source object is not
+ available (404 or ANY other error) up to self.reclaim_age seconds
+ before it gives up and deletes the entry in the queue.
+
+ Don't do this in gluster-swift. GlusterFS isn't eventually consistent
+ and has no concept of hand-off nodes. If actual data object doesn't
+ exist (404), remove tracker object from the queue (filesystem).
+
+ However if DELETE fails due a reason other than 404, do not remove
+ tracker object yet, follow Swift's behaviour of waiting till
+ self.reclaim_age seconds.
+
+ This method is just a wrapper around parent class's method. All this
+ wrapper does is ignore 404 failures.
+ """
+ try:
+ super(ObjectExpirer, self).delete_actual_object(
+ actual_obj, timestamp)
+ except UnexpectedResponse as err:
+ if err.resp.status_int != HTTP_NOT_FOUND:
+ raise
diff --git a/gluster/swift/obj/server.py b/gluster/swift/obj/server.py
index 1d8d418..3e27cc3 100644
--- a/gluster/swift/obj/server.py
+++ b/gluster/swift/obj/server.py
@@ -14,16 +14,19 @@
# limitations under the License.
""" Object Server for Gluster for Swift """
+import errno
+import os
from swift.common.swob import HTTPConflict, HTTPNotImplemented
-from swift.common.utils import public, timing_stats, replication
-from gluster.swift.common.exceptions import AlreadyExistsAsFile, \
- AlreadyExistsAsDir
+from swift.common.utils import public, timing_stats, replication, mkdirs
from swift.common.request_helpers import split_and_validate_path
-
from swift.obj import server
from gluster.swift.obj.diskfile import DiskFileManager
+from gluster.swift.common.fs_utils import do_ismount
+from gluster.swift.common.ring import Ring
+from gluster.swift.common.exceptions import AlreadyExistsAsFile, \
+ AlreadyExistsAsDir
class GlusterSwiftDiskFileRouter(object):
@@ -55,6 +58,9 @@ class ObjectController(server.ObjectController):
"""
# Replaces Swift's DiskFileRouter object reference with ours.
self._diskfile_router = GlusterSwiftDiskFileRouter(conf, self.logger)
+ self.devices = conf.get('devices', '/mnt/gluster-object')
+ self.swift_dir = conf.get('swift_dir', '/etc/swift')
+ self.object_ring = self.get_object_ring()
def container_update(self, *args, **kwargs):
"""
@@ -65,15 +71,65 @@ class ObjectController(server.ObjectController):
"""
return
- def delete_at_update(self, *args, **kwargs):
- """
- Update the expiring objects container when objects are updated.
+ def get_object_ring(self):
+ return Ring(self.swift_dir, ring_name='object')
- For Gluster, this is a no-op as there are no container DB entries
- to be created that tracks objects to be expired. Objects to be
- expired will be determined by crawling the filesystem directly.
+ def _create_expiring_tracker_object(self, object_path):
+ try:
+
+ # Check if gsexpiring volume is present in ring
+ if not any(d.get('device', None) == self.expiring_objects_account
+ for d in self.object_ring.devs):
+ raise Exception("%s volume not in ring" %
+ self.expiring_objects_account)
+
+ # Check if gsexpiring is mounted.
+ expiring_objects_account_path = \
+ os.path.join(self.devices, self.expiring_objects_account)
+ mount_check = self._diskfile_router['junk'].mount_check
+ if mount_check and not do_ismount(expiring_objects_account_path):
+ raise Exception("Path %s doesn't exist or is not a mount "
+ "point" % expiring_objects_account_path)
+
+ # Create object directory
+ object_dir = os.path.dirname(object_path)
+ try:
+ mkdirs(object_dir)
+ except OSError as err:
+ mkdirs(object_dir) # handle race
+
+ # Create zero-byte file
+ try:
+ os.mknod(object_path)
+ except OSError as err:
+ if err.errno != errno.EEXIST:
+ raise
+ except Exception as e:
+ self.logger.error("Creation of tracker object %s failed: %s" %
+ (object_path, str(e)))
+
+ def async_update(self, op, account, container, obj, host, partition,
+ contdevice, headers_out, objdevice, policy):
"""
- return
+ In Openstack Swift, this method is called by:
+ * container_update (a no-op in gluster-swift)
+ * delete_at_update (to PUT objects into .expiring_objects account)
+
+ The Swift's version of async_update only sends the request to
+ container-server to PUT the object. The container-server calls
+ container_update method which makes an entry for the object in it's
+ database. No actual object is created on disk.
+
+ But in gluster-swift container_update is a no-op, so we'll
+ have to PUT an actual object. We override async_update to create a
+ container first and then the corresponding "tracker object" which
+ tracks expired objects scheduled for deletion.
+ """
+ object_path = os.path.join(self.devices, account, container, obj)
+
+ threadpool = self._diskfile_router[policy].threadpools[objdevice]
+ threadpool.run_in_thread(self._create_expiring_tracker_object,
+ object_path)
@public
@timing_stats()