diff options
Diffstat (limited to 'gluster/swift/common/utils.py')
-rw-r--r-- | gluster/swift/common/utils.py | 234 |
1 files changed, 122 insertions, 112 deletions
diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py index 5547ea4..6503644 100644 --- a/gluster/swift/common/utils.py +++ b/gluster/swift/common/utils.py @@ -24,9 +24,8 @@ from eventlet import sleep import cPickle as pickle from swift.common.utils import normalize_timestamp from gluster.swift.common.fs_utils import do_rename, do_fsync, os_path, \ - do_stat, do_listdir, do_walk, dir_empty, rmdirs + do_stat, do_listdir, do_walk, do_rmdir from gluster.swift.common import Glusterfs -from gluster.swift.common.exceptions import FileOrDirNotFoundError X_CONTENT_TYPE = 'Content-Type' X_CONTENT_LENGTH = 'Content-Length' @@ -56,11 +55,23 @@ PICKLE_PROTOCOL = 2 CHUNK_SIZE = 65536 -def read_metadata(path): +class GlusterFileSystemOSError(OSError): + # Having our own class means the name will show up in the stack traces + # recorded in the log files. + pass + + +class GlusterFileSystemIOError(IOError): + # Having our own class means the name will show up in the stack traces + # recorded in the log files. + pass + + +def read_metadata(path_or_fd): """ Helper function to read the pickled metadata from a File/Directory. - :param path: File/Directory to read metadata from. + :param path_or_fd: File/Directory path or fd from which to read metadata. :returns: dictionary of metadata """ @@ -69,7 +80,7 @@ def read_metadata(path): key = 0 while metadata is None: try: - metadata_s += xattr.getxattr(path, + metadata_s += xattr.getxattr(path_or_fd, '%s%s' % (METADATA_KEY, (key or ''))) except IOError as err: if err.errno == errno.ENODATA: @@ -79,18 +90,17 @@ def read_metadata(path): # unpickle operation, we consider the metadata lost, and # drop the existing data so that the internal state can be # recreated. - clean_metadata(path) + clean_metadata(path_or_fd) # We either could not find any metadata key, or we could find # some keys, but were not successful in performing the # unpickling (missing keys perhaps)? Either way, just report # to the caller we have no metadata. metadata = {} else: - logging.exception("xattr.getxattr failed on %s key %s err: %s", - path, key, str(err)) # Note that we don't touch the keys on errors fetching the # data since it could be a transient state. - raise + raise GlusterFileSystemIOError( + err.errno, 'xattr.getxattr("%s", %s)' % (path_or_fd, key)) else: try: # If this key provides all or the remaining part of the pickle @@ -109,38 +119,39 @@ def read_metadata(path): return metadata -def write_metadata(path, metadata): +def write_metadata(path_or_fd, metadata): """ Helper function to write pickled metadata for a File/Directory. - :param path: File/Directory path to write the metadata - :param metadata: dictionary to metadata write + :param path_or_fd: File/Directory path or fd to write the metadata + :param metadata: dictionary of metadata write """ assert isinstance(metadata, dict) metastr = pickle.dumps(metadata, PICKLE_PROTOCOL) key = 0 while metastr: try: - xattr.setxattr(path, + xattr.setxattr(path_or_fd, '%s%s' % (METADATA_KEY, key or ''), metastr[:MAX_XATTR_SIZE]) except IOError as err: - logging.exception("setxattr failed on %s key %s err: %s", - path, key, str(err)) - raise + raise GlusterFileSystemIOError( + err.errno, + 'xattr.setxattr("%s", %s, metastr)' % (path_or_fd, key)) metastr = metastr[MAX_XATTR_SIZE:] key += 1 -def clean_metadata(path): +def clean_metadata(path_or_fd): key = 0 while True: try: - xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or ''))) + xattr.removexattr(path_or_fd, '%s%s' % (METADATA_KEY, (key or ''))) except IOError as err: if err.errno == errno.ENODATA: break - raise + raise GlusterFileSystemIOError( + err.errno, 'xattr.removexattr("%s", %s)' % (path_or_fd, key)) key += 1 @@ -150,9 +161,9 @@ def check_user_xattr(path): try: xattr.setxattr(path, 'user.test.key1', 'value1') except IOError as err: - logging.exception("check_user_xattr: set failed on %s err: %s", - path, str(err)) - raise + raise GlusterFileSystemIOError( + err.errno, + 'xattr.setxattr("%s", "user.test.key1", "value1")' % (path,)) try: xattr.removexattr(path, 'user.test.key1') except IOError as err: @@ -245,7 +256,7 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0, object_count += 1 - if Glusterfs._do_getsize and reg_file: + if reg_file and Glusterfs._do_getsize: bytes_used += os_path.getsize(os.path.join(path, obj_name)) sleep() @@ -254,7 +265,6 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0, def update_list(path, cont_path, dirs=[], files=[], object_count=0, bytes_used=0, obj_list=[]): - if files: object_count, bytes_used = _update_list(path, cont_path, files, True, object_count, bytes_used, @@ -266,22 +276,13 @@ def update_list(path, cont_path, dirs=[], files=[], object_count=0, return object_count, bytes_used -class ContainerDetails(object): - def __init__(self, bytes_used, object_count, obj_list, dir_list): - self.bytes_used = bytes_used - self.object_count = object_count - self.obj_list = obj_list - self.dir_list = dir_list - - -def _get_container_details_from_fs(cont_path): +def get_container_details(cont_path): """ get container details by traversing the filesystem """ bytes_used = 0 object_count = 0 obj_list = [] - dir_list = [] if os_path.isdir(cont_path): for (path, dirs, files) in do_walk(cont_path): @@ -289,36 +290,12 @@ def _get_container_details_from_fs(cont_path): files, object_count, bytes_used, obj_list) - dir_list.append((path, do_stat(path).st_mtime)) sleep() - return ContainerDetails(bytes_used, object_count, obj_list, dir_list) + return obj_list, object_count, bytes_used -def get_container_details(cont_path): - """ - Return object_list, object_count and bytes_used. - """ - cd = _get_container_details_from_fs(cont_path) - - return cd.obj_list, cd.object_count, cd.bytes_used - - -class AccountDetails(object): - """ A simple class to store the three pieces of information associated - with an account: - - 1. The last known modification time - 2. The count of containers in the following list - 3. The list of containers - """ - def __init__(self, mtime, container_count, container_list): - self.mtime = mtime - self.container_count = container_count - self.container_list = container_list - - -def _get_account_details_from_fs(acc_path): +def get_account_details(acc_path): """ Return container_list and container_count. """ @@ -326,35 +303,40 @@ def _get_account_details_from_fs(acc_path): container_count = 0 acc_stats = do_stat(acc_path) - is_dir = stat.S_ISDIR(acc_stats.st_mode) - if is_dir: - for name in do_listdir(acc_path): - if name.lower() == TEMP_DIR \ - or name.lower() == ASYNCDIR \ - or not os_path.isdir(os.path.join(acc_path, name)): - continue - container_count += 1 - container_list.append(name) + if acc_stats: + is_dir = stat.S_ISDIR(acc_stats.st_mode) + if is_dir: + for name in do_listdir(acc_path): + if name.lower() == TEMP_DIR \ + or name.lower() == ASYNCDIR \ + or not os_path.isdir(os.path.join(acc_path, name)): + continue + container_count += 1 + container_list.append(name) - return AccountDetails(acc_stats.st_mtime, container_count, container_list) + return container_list, container_count -def get_account_details(acc_path): - """ - Return container_list and container_count. +def _get_etag(path): """ - ad = _get_account_details_from_fs(acc_path) + FIXME: It would be great to have a translator that returns the md5sum() of + the file as an xattr that can be simply fetched. - return ad.container_list, ad.container_count - - -def _get_etag(path): + Since we don't have that we should yield after each chunk read and + computed so that we don't consume the worker thread. + """ etag = md5() with open(path, 'rb') as fp: while True: chunk = fp.read(CHUNK_SIZE) if chunk: etag.update(chunk) + if len(chunk) >= CHUNK_SIZE: + # It is likely that we have more data to be read from the + # file. Yield the co-routine cooperatively to avoid + # consuming the worker during md5sum() calculations on + # large files. + sleep() else: break return etag.hexdigest() @@ -364,14 +346,11 @@ def get_object_metadata(obj_path): """ Return metadata of object. """ - try: - stats = do_stat(obj_path) - except OSError as e: - if e.errno != errno.ENOENT: - raise + stats = do_stat(obj_path) + if not stats: metadata = {} else: - is_dir = (stats.st_mode & 0040000) != 0 + is_dir = stat.S_ISDIR(stats.st_mode) metadata = { X_TYPE: OBJECT, X_TIMESTAMP: normalize_timestamp(stats.st_ctime), @@ -444,12 +423,14 @@ def create_object_metadata(obj_path): def create_container_metadata(cont_path): metadata = get_container_metadata(cont_path) - return restore_metadata(cont_path, metadata) + rmd = restore_metadata(cont_path, metadata) + return rmd def create_account_metadata(acc_path): metadata = get_account_metadata(acc_path) - return restore_metadata(acc_path, metadata) + rmd = restore_metadata(acc_path, metadata) + return rmd def write_pickle(obj, dest, tmp=None, pickle_protocol=0): @@ -498,42 +479,71 @@ def dir_is_object(metadata): def rmobjdir(dir_path): """ - Removes the directory as long as there are - no objects stored in it. This works for containers also. + Removes the directory as long as there are no objects stored in it. This + works for containers also. """ try: - if dir_empty(dir_path): - rmdirs(dir_path) - return True - except FileOrDirNotFoundError: - # No such directory exists - return False + do_rmdir(dir_path) + except OSError as err: + if err.errno == errno.ENOENT: + # No such directory exists + return False + if err.errno != errno.ENOTEMPTY: + raise + # Handle this non-empty directories below. + else: + return True + # We have a directory that is not empty, walk it to see if it is filled + # with empty sub-directories that are not user created objects + # (gratuitously created as a result of other object creations). for (path, dirs, files) in do_walk(dir_path, topdown=False): for directory in dirs: fullpath = os.path.join(path, directory) - metadata = read_metadata(fullpath) - if not dir_is_object(metadata): - # Directory is not an object created by the caller - # so we can go ahead and delete it. - try: - if dir_empty(fullpath): - rmdirs(fullpath) - else: - # Non-object dir is not empty! - return False - except FileOrDirNotFoundError: - # No such dir! - return False + try: + metadata = read_metadata(fullpath) + except OSError as err: + if err.errno == errno.ENOENT: + # Ignore removal from another entity. + continue + raise else: - # Wait, this is an object created by the caller - # We cannot delete - return False - rmdirs(dir_path) - return True + if dir_is_object(metadata): + # Wait, this is an object created by the caller + # We cannot delete + return False + + # Directory is not an object created by the caller + # so we can go ahead and delete it. + try: + do_rmdir(fullpath) + except OSError as err: + if err.errno == errno.ENOTEMPTY: + # Directory is not empty, it might have objects in it + return False + if err.errno == errno.ENOENT: + # No such directory exists, already removed, ignore + continue + raise + + try: + do_rmdir(dir_path) + except OSError as err: + if err.errno == errno.ENOTEMPTY: + # Directory is not empty, race with object creation + return False + if err.errno == errno.ENOENT: + # No such directory exists, already removed, ignore + return True + raise + else: + return True # Over-ride Swift's utils.write_pickle with ours +# +# FIXME: Is this even invoked anymore given we don't perform container or +# account updates? import swift.common.utils swift.common.utils.write_pickle = write_pickle |