Enhance object expiration

This change re-introduces object expiration feature with some internal differences from earlier implementation such as: * Creation of zero-byte tracker object is performed directly on the mount point. Earlier HTTP request was sent to object server for the same. This incurred overhead of metadata creation for these zero-byte files which isn't necessarry as all required information is encoded in the path itself. * Crawling of zero-byte tracker objects is done by the object expirer daemon itself and not container server. * Deletion of tracker object is performed by the object expiration daemon directly on mount point. Deletion of actual data object is not carried out by object expiration daemon directly. The object expirer sends a DELETE request to object server which deletes the actual object. This behaviour is not changed. There is no change in behaviour in comparison with older implementation. This is asserted by re-enabling existing functional tests without any changes. Change-Id: I01dc77cc4db3be3147d54e3aa2a19ed182498900 Signed-off-by: Prashanth Pai <ppai@redhat.com> Reviewed-on: http://review.gluster.org/13913 Reviewed-by: Thiago da Silva <thiago@redhat.com> Tested-by: Thiago da Silva <thiago@redhat.com>
author: Prashanth Pai <ppai@redhat.com> 2016-03-18 18:09:46 +0530
committer: Thiago da Silva <thiago@redhat.com> 2016-04-15 05:27:54 -0700
commit: 2bd696e392e420a2521dcca0b8613122d8169025 (patch)
tree: 9cd42e35957c0df07696e160f357019fde78c2d4 /gluster/swift/obj/expirer.py
parent: 43da7927560811c55838a6a1c2d0ee1a52aada40 (diff)
1 files changed, 160 insertions, 0 deletions
diff --git a/gluster/swift/obj/expirer.py b/gluster/swift/obj/expirer.py
new file mode 100644
index 0000000..564a2c9
--- /dev/null
+++ b/gluster/swift/obj/expirer.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2016 Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This import will monkey-patch Ring and other classes.
+# Do not remove.
+import gluster.swift.common.constraints  # noqa
+
+import errno
+import os
+
+from gluster.swift.common.utils import rmobjdir
+
+from swift.obj.expirer import ObjectExpirer as SwiftObjectExpirer
+from swift.common.http import HTTP_NOT_FOUND
+from swift.common.internal_client import InternalClient, UnexpectedResponse
+from swift.common.utils import ThreadPool
+
+EXCLUDE_DIRS = ('.trashcan', '.glusterfs')
+
+
+class GlusterSwiftInternalClient(InternalClient):
+
+    def __init__(self, conf_path, user_agent, request_tries,
+                 allow_modify_pipeline=False, devices=None):
+        super(GlusterSwiftInternalClient, self).__init__(
+            conf_path, user_agent, request_tries, allow_modify_pipeline)
+        self.devices = devices
+
+    def get_account_info(self, account):
+        # Supposed to return container count and object count in gsexpiring
+        # account. This is used by object expirer only for logging.
+        return (0, 0)
+
+    def delete_container(self, account, container, acceptable_statuses=None):
+        container_path = os.path.join(self.devices, account, container)
+        try:
+            os.rmdir(container_path)
+        except OSError as err:
+            if err.errno != errno.ENOENT:
+                raise
+
+    def iter_containers(self, account):
+        account_path = os.path.join(self.devices, account)
+        for container in os.listdir(account_path):
+            if container in EXCLUDE_DIRS:
+                continue
+            container_path = os.path.join(account_path, container)
+            if os.path.isdir(container_path):
+                yield {'name': container.encode('utf8')}
+
+    def iter_objects(self, account, container):
+        container_path = os.path.join(self.devices, account, container)
+        # TODO: Use a slightly better implementation of os.walk()
+        for (root, dirs, files) in os.walk(container_path):
+            for f in files:
+                obj_path = os.path.join(root, f)
+                obj = obj_path[(len(container_path) + 1):]
+                yield {'name': obj.encode('utf8')}
+
+
+class ObjectExpirer(SwiftObjectExpirer):
+
+    def __init__(self, conf, logger=None, swift=None):
+
+        conf_path = conf.get('__file__') or '/etc/swift/object-expirer.conf'
+        self.devices = conf.get('devices', '/mnt/gluster-object')
+        # Do not retry DELETEs on getting 404. Hence default is set to 1.
+        request_tries = int(conf.get('request_tries') or 1)
+        # Use our extended version of InternalClient
+        swift = GlusterSwiftInternalClient(
+            conf_path, 'Gluster Swift Object Expirer', request_tries,
+            devices=self.devices)
+        # Let the parent class initialize self.swift
+        super(ObjectExpirer, self).__init__(conf, logger=logger, swift=swift)
+
+        self.reseller_prefix = conf.get('reseller_prefix', 'AUTH').strip()
+        if not self.reseller_prefix.endswith('_'):
+            self.reseller_prefix = self.reseller_prefix + '_'
+
+        # nthread=0 is intentional. This ensures that no green pool is
+        # used. Call to force_run_in_thread() will ensure that the method
+        # passed as arg is run in a real external thread using eventlet.tpool
+        # which has a threadpool of 20 threads (default)
+        self.threadpool = ThreadPool(nthreads=0)
+
+    def _delete_tracker_object(self, container, obj):
+        container_path = os.path.join(self.devices,
+                                      self.expiring_objects_account,
+                                      container)
+        tracker_object_path = os.path.join(container_path, obj)
+
+        try:
+            os.unlink(tracker_object_path)
+        except OSError as err:
+            if err.errno in (errno.ENOENT, errno.ESTALE):
+                # Ignore removal from another entity.
+                return
+            elif err.errno == errno.EISDIR:
+                # Handle race: Was a file during crawl, but now it's a
+                # directory. There are no 'directory marker' objects in
+                # gsexpiring volume.
+                return
+            else:
+                raise
+
+        # This part of code is very similar to DiskFile._unlinkold()
+        dirname = os.path.dirname(tracker_object_path)
+        while dirname and dirname != container_path:
+            if not rmobjdir(dirname, marker_dir_check=False):
+                # If a directory with objects has been found, we can stop
+                # garbage collection
+                break
+            else:
+                # Traverse upwards till the root of container
+                dirname = os.path.dirname(dirname)
+
+    def pop_queue(self, container, obj):
+        """
+        In Swift, this method removes tracker object entry directly from
+        container database. In gluster-swift, this method deletes tracker
+        object directly from filesystem.
+        """
+        self.threadpool.force_run_in_thread(self._delete_tracker_object,
+                                            container, obj)
+
+    def delete_actual_object(self, actual_obj, timestamp):
+        """
+        Swift's expirer will re-attempt expiring if the source object is not
+        available (404 or ANY other error) up to self.reclaim_age seconds
+        before it gives up and deletes the entry in the queue.
+
+        Don't do this in gluster-swift. GlusterFS isn't eventually consistent
+        and has no concept of hand-off nodes. If actual data object doesn't
+        exist (404), remove tracker object from the queue (filesystem).
+
+        However if DELETE fails due a reason other than 404, do not remove
+        tracker object yet, follow Swift's behaviour of waiting till
+        self.reclaim_age seconds.
+
+        This method is just a wrapper around parent class's method. All this
+        wrapper does is ignore 404 failures.
+        """
+        try:
+            super(ObjectExpirer, self).delete_actual_object(
+                actual_obj, timestamp)
+        except UnexpectedResponse as err:
+            if err.resp.status_int != HTTP_NOT_FOUND:
+                raise
author	Prashanth Pai <ppai@redhat.com>	2016-03-18 18:09:46 +0530
committer	Thiago da Silva <thiago@redhat.com>	2016-04-15 05:27:54 -0700
commit	2bd696e392e420a2521dcca0b8613122d8169025 (patch)
tree	9cd42e35957c0df07696e160f357019fde78c2d4 /gluster/swift/obj/expirer.py
parent	43da7927560811c55838a6a1c2d0ee1a52aada40 (diff)