summaryrefslogtreecommitdiffstats
path: root/gluster/swift
diff options
context:
space:
mode:
authorPeter Portante <peter.portante@redhat.com>2013-07-08 14:10:45 -0400
committerLuis Pabon <lpabon@redhat.com>2013-07-10 15:49:26 -0700
commit677d30716978615d0499344ac0a62c2755a486cf (patch)
treeb86a156f120bdcd6f10d6b9998ef20b80cf50639 /gluster/swift
parentf8d876248d29fa7f4857ac8ca02324795ba53353 (diff)
Final forward port of PDQ performance patches
Change-Id: I4ef131b3cc7648d4571a4d854029efb1aff8b901 Signed-off-by: Peter Portante <peter.portante@redhat.com> Reviewed-on: http://review.gluster.org/5305 Reviewed-by: Luis Pabon <lpabon@redhat.com> Tested-by: Luis Pabon <lpabon@redhat.com>
Diffstat (limited to 'gluster/swift')
-rw-r--r--gluster/swift/common/DiskDir.py6
-rw-r--r--gluster/swift/common/DiskFile.py536
-rw-r--r--gluster/swift/common/Glusterfs.py173
-rw-r--r--gluster/swift/common/constraints.py17
-rw-r--r--gluster/swift/common/exceptions.py16
-rw-r--r--gluster/swift/common/fs_utils.py262
-rw-r--r--gluster/swift/common/utils.py234
7 files changed, 928 insertions, 316 deletions
diff --git a/gluster/swift/common/DiskDir.py b/gluster/swift/common/DiskDir.py
index c1fb674..b4a8341 100644
--- a/gluster/swift/common/DiskDir.py
+++ b/gluster/swift/common/DiskDir.py
@@ -483,12 +483,6 @@ class DiskDir(DiskCommon):
# within a directory implicitly.
return
- def empty(self):
- try:
- return dir_empty(self.datadir)
- except FileOrDirNotFoundError:
- return True
-
def delete_db(self, timestamp):
"""
Delete the container (directory) if empty.
diff --git a/gluster/swift/common/DiskFile.py b/gluster/swift/common/DiskFile.py
index 1ae5f7c..623248a 100644
--- a/gluster/swift/common/DiskFile.py
+++ b/gluster/swift/common/DiskFile.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012 Red Hat, Inc.
+# Copyright (c) 2013 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -15,29 +15,241 @@
import os
import stat
+import fcntl
import errno
import random
+import logging
from hashlib import md5
+from eventlet import sleep
from contextlib import contextmanager
-from swift.common.utils import renamer
+from swift.common.utils import TRUE_VALUES, fallocate
from swift.common.exceptions import DiskFileNotExist, DiskFileError
-from gluster.swift.common.exceptions import AlreadyExistsAsDir
-from gluster.swift.common.fs_utils import mkdirs, do_open, do_close, \
+from gluster.swift.common.exceptions import GlusterFileSystemOSError
+from gluster.swift.common.fs_utils import do_fstat, do_open, do_close, \
do_unlink, do_chown, os_path, do_fsync, do_fchown, do_stat
from gluster.swift.common.utils import read_metadata, write_metadata, \
- validate_object, create_object_metadata, rmobjdir, dir_is_object
+ validate_object, create_object_metadata, rmobjdir, dir_is_object, \
+ get_object_metadata
from gluster.swift.common.utils import X_CONTENT_LENGTH, X_CONTENT_TYPE, \
X_TIMESTAMP, X_TYPE, X_OBJECT_TYPE, FILE, OBJECT, DIR_TYPE, \
FILE_TYPE, DEFAULT_UID, DEFAULT_GID, DIR_NON_OBJECT, DIR_OBJECT
+from ConfigParser import ConfigParser, NoSectionError, NoOptionError
from swift.obj.server import DiskFile
+# FIXME: Hopefully we'll be able to move to Python 2.7+ where O_CLOEXEC will
+# be back ported. See http://www.python.org/dev/peps/pep-0433/
+O_CLOEXEC = 02000000
DEFAULT_DISK_CHUNK_SIZE = 65536
# keep these lower-case
DISALLOWED_HEADERS = set('content-length content-type deleted etag'.split())
+def _random_sleep():
+ sleep(random.uniform(0.5, 0.15))
+
+
+def _lock_parent(full_path):
+ parent_path, _ = full_path.rsplit(os.path.sep, 1)
+ try:
+ fd = os.open(parent_path, os.O_RDONLY | O_CLOEXEC)
+ except OSError as err:
+ if err.errno == errno.ENOENT:
+ # Cannot lock the parent because it does not exist, let the caller
+ # handle this situation.
+ return False
+ raise
+ else:
+ while True:
+ # Spin sleeping for 1/10th of a second until we get the lock.
+ # FIXME: Consider adding a final timeout just abort the operation.
+ try:
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError as err:
+ if err.errno == errno.EAGAIN:
+ _random_sleep()
+ else:
+ # Don't leak an open file on an exception
+ os.close(fd)
+ raise
+ except Exception:
+ # Don't leak an open file for any other exception
+ os.close(fd)
+ raise
+ else:
+ break
+ return fd
+
+
+def _make_directory_locked(full_path, uid, gid, metadata=None):
+ fd = _lock_parent(full_path)
+ if fd is False:
+ # Parent does not exist either, pass this situation on to the caller
+ # to handle.
+ return False, metadata
+ try:
+ # Check for directory existence
+ stats = do_stat(full_path)
+ if stats:
+ # It now exists, having acquired the lock of its parent directory,
+ # but verify it is actually a directory
+ is_dir = stat.S_ISDIR(stats.st_mode)
+ if not is_dir:
+ # It is not a directory!
+ raise DiskFileError("_make_directory_locked: non-directory"
+ " found at path %s when expecting a"
+ " directory", full_path)
+ return True, metadata
+
+ # We know the parent directory exists, and we have it locked, attempt
+ # the creation of the target directory.
+ return _make_directory_unlocked(full_path, uid, gid, metadata=metadata)
+ finally:
+ # We're done here, be sure to remove our lock and close our open FD.
+ try:
+ fcntl.flock(fd, fcntl.LOCK_UN)
+ except:
+ pass
+ os.close(fd)
+
+
+def _make_directory_unlocked(full_path, uid, gid, metadata=None):
+ """
+ Make a directory and change the owner ship as specified, and potentially
+ creating the object metadata if requested.
+ """
+ try:
+ os.mkdir(full_path)
+ except OSError as err:
+ if err.errno == errno.ENOENT:
+ # Tell the caller some directory of the parent path does not
+ # exist.
+ return False, metadata
+ elif err.errno == errno.EEXIST:
+ # Possible race, in that the caller invoked this method when it
+ # had previously determined the file did not exist.
+ #
+ # FIXME: When we are confident, remove this stat() call as it is
+ # not necessary.
+ try:
+ stats = os.stat(full_path)
+ except OSError as serr:
+ # FIXME: Ideally we'd want to return an appropriate error
+ # message and code in the PUT Object REST API response.
+ raise DiskFileError("_make_directory_unlocked: os.mkdir failed"
+ " because path %s already exists, and"
+ " a subsequent os.stat on that same"
+ " path failed (%s)" % (full_path,
+ str(serr)))
+ else:
+ is_dir = stat.S_ISDIR(stats.st_mode)
+ if not is_dir:
+ # FIXME: Ideally we'd want to return an appropriate error
+ # message and code in the PUT Object REST API response.
+ raise DiskFileError("_make_directory_unlocked: os.mkdir"
+ " failed on path %s because it already"
+ " exists but not as a directory" % (
+ full_path))
+ return True, metadata
+ elif err.errno == errno.ENOTDIR:
+ # FIXME: Ideally we'd want to return an appropriate error
+ # message and code in the PUT Object REST API response.
+ raise DiskFileError("_make_directory_unlocked: os.mkdir failed"
+ " because some part of path %s is not in fact"
+ " a directory" % (full_path))
+ elif err.errno == errno.EIO:
+ # Sometimes Fuse will return an EIO error when it does not know
+ # how to handle an unexpected, but transient situation. It is
+ # possible the directory now exists, stat() it to find out after a
+ # short period of time.
+ _random_sleep()
+ try:
+ stats = os.stat(full_path)
+ except OSError as serr:
+ if serr.errno == errno.ENOENT:
+ errmsg = "_make_directory_unlocked: os.mkdir failed on" \
+ " path %s (EIO), and a subsequent os.stat on" \
+ " that same path did not find the file." % (
+ full_path,)
+ else:
+ errmsg = "_make_directory_unlocked: os.mkdir failed on" \
+ " path %s (%s), and a subsequent os.stat on" \
+ " that same path failed as well (%s)" % (
+ full_path, str(err), str(serr))
+ raise DiskFileError(errmsg)
+ else:
+ # The directory at least exists now
+ is_dir = stat.S_ISDIR(stats.st_mode)
+ if is_dir:
+ # Dump the stats to the log with the original exception.
+ logging.warn("_make_directory_unlocked: os.mkdir initially"
+ " failed on path %s (%s) but a stat()"
+ " following that succeeded: %r" % (full_path,
+ str(err),
+ stats))
+ # Assume another entity took care of the proper setup.
+ return True, metadata
+ else:
+ raise DiskFileError("_make_directory_unlocked: os.mkdir"
+ " initially failed on path %s (%s) but"
+ " now we see that it exists but is not"
+ " a directory (%r)" % (full_path,
+ str(err),
+ stats))
+ else:
+ # Some other potentially rare exception occurred that does not
+ # currently warrant a special log entry to help diagnose.
+ raise DiskFileError("_make_directory_unlocked: os.mkdir failed on"
+ " path %s (%s)" % (full_path, str(err)))
+ else:
+ if metadata:
+ # We were asked to set the initial metadata for this object.
+ metadata_orig = get_object_metadata(full_path)
+ metadata_orig.update(metadata)
+ write_metadata(full_path, metadata_orig)
+ metadata = metadata_orig
+
+ # We created it, so we are reponsible for always setting the proper
+ # ownership.
+ do_chown(full_path, uid, gid)
+ return True, metadata
+
+
+_fs_conf = ConfigParser()
+if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
+ try:
+ _mkdir_locking = _fs_conf.get('DEFAULT', 'mkdir_locking', "no") \
+ in TRUE_VALUES
+ except (NoSectionError, NoOptionError):
+ _mkdir_locking = False
+ try:
+ _use_put_mount = _fs_conf.get('DEFAULT', 'use_put_mount', "no") \
+ in TRUE_VALUES
+ except (NoSectionError, NoOptionError):
+ _use_put_mount = False
+ try:
+ _relaxed_writes = _fs_conf.get('DEFAULT', 'relaxed_writes', "no") \
+ in TRUE_VALUES
+ except (NoSectionError, NoOptionError):
+ _relaxed_writes = False
+ try:
+ _preallocate = _fs_conf.get('DEFAULT', 'preallocate', "no") \
+ in TRUE_VALUES
+ except (NoSectionError, NoOptionError):
+ _preallocate = False
+else:
+ _mkdir_locking = False
+ _use_put_mount = False
+ _relaxed_writes = False
+ _preallocate = False
+
+if _mkdir_locking:
+ make_directory = _make_directory_locked
+else:
+ make_directory = _make_directory_unlocked
+
+
def _adjust_metadata(metadata):
# Fix up the metadata to ensure it has a proper value for the
# Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE
@@ -106,6 +318,11 @@ class Gluster_DiskFile(DiskFile):
self.datadir = os.path.join(path, device, self.name)
self.device_path = os.path.join(path, device)
self._container_path = os.path.join(path, device, container)
+ if _use_put_mount:
+ self.put_datadir = os.path.join(self.device_path + '_PUT',
+ self.name)
+ else:
+ self.put_datadir = self.datadir
self._is_dir = False
self.tmppath = None
self.logger = logger
@@ -123,15 +340,16 @@ class Gluster_DiskFile(DiskFile):
# Don't store a value for data_file until we know it exists.
self.data_file = None
- data_file = os.path.join(self.datadir, self._obj)
+ data_file = os.path.join(self.put_datadir, self._obj)
try:
stats = do_stat(data_file)
- except OSError as ose:
- if ose.errno == errno.ENOENT or ose.errno == errno.ENOTDIR:
+ except OSError as err:
+ if err.errno == errno.ENOTDIR:
+ return
+ else:
+ if not stats:
return
- else:
- raise
self.data_file = data_file
self._is_dir = stat.S_ISDIR(stats.st_mode)
@@ -162,8 +380,9 @@ class Gluster_DiskFile(DiskFile):
:param verify_file: Defaults to True. If false, will not check
file to see if it needs quarantining.
"""
- #Marker directory
+ # Marker directory
if self._is_dir:
+ assert not self.fp
return
if self.fp:
do_close(self.fp)
@@ -178,20 +397,61 @@ class Gluster_DiskFile(DiskFile):
"""
return not self.data_file
- def _create_dir_object(self, dir_path):
- stats = None
- try:
- stats = do_stat(dir_path)
- except OSError:
- pass
+ def _create_dir_object(self, dir_path, metadata=None):
+ """
+ Create a directory object at the specified path. No check is made to
+ see if the directory object already exists, that is left to the
+ caller (this avoids a potentially duplicate stat() system call).
+
+ The "dir_path" must be relative to its container, self._container_path.
+
+ The "metadata" object is an optional set of metadata to apply to the
+ newly created directory object. If not present, no initial metadata is
+ applied.
+
+ The algorithm used is as follows:
- if not stats:
- mkdirs(dir_path)
- do_chown(dir_path, self.uid, self.gid)
- create_object_metadata(dir_path)
- elif not stat.S_ISDIR(stats.st_mode):
- raise DiskFileError("Cannot overwrite "
- "file %s with a directory" % dir_path)
+ 1. An attempt is made to create the directory, assuming the parent
+ directory already exists
+
+ * Directory creation races are detected, returning success in
+ those cases
+
+ 2. If the directory creation fails because some part of the path to
+ the directory does not exist, then a search back up the path is
+ performed to find the first existing ancestor directory, and then
+ the missing parents are successively created, finally creating
+ the target directory
+ """
+ full_path = os.path.join(self._container_path, dir_path)
+ cur_path = full_path
+ stack = []
+ while True:
+ md = None if cur_path != full_path else metadata
+ ret, newmd = make_directory(cur_path, self.uid, self.gid, md)
+ if ret:
+ break
+ # Some path of the parent did not exist, so loop around and
+ # create that, pushing this parent on the stack.
+ if os.path.sep not in cur_path:
+ raise DiskFileError("DiskFile._create_dir_object(): failed to"
+ " create directory path while exhausting"
+ " path elements to create: %s" % full_path)
+ cur_path, child = cur_path.rsplit(os.path.sep, 1)
+ assert child
+ stack.append(child)
+
+ child = stack.pop() if stack else None
+ while child:
+ cur_path = os.path.join(cur_path, child)
+ md = None if cur_path != full_path else metadata
+ ret, newmd = make_directory(cur_path, self.uid, self.gid, md)
+ if not ret:
+ raise DiskFileError("DiskFile._create_dir_object(): failed to"
+ " create directory path to target, %s,"
+ " on subpath: %s" % (full_path, cur_path))
+ child = stack.pop() if stack else None
+ return True, newmd
def put_metadata(self, metadata, tombstone=False):
"""
@@ -227,39 +487,112 @@ class Gluster_DiskFile(DiskFile):
if dir_is_object(metadata):
if not self.data_file:
- self.data_file = os.path.join(self.datadir, self._obj)
- self._create_dir_object(self.data_file)
- self.put_metadata(metadata)
+ # Does not exist, create it
+ data_file = os.path.join(self._obj_path, self._obj)
+ _, self.metadata = self._create_dir_object(data_file, metadata)
+ self.data_file = os.path.join(self._container_path, data_file)
+ elif not self.is_dir:
+ # Exists, but as a file
+ raise DiskFileError('DiskFile.put(): directory creation failed'
+ ' since the target, %s, already exists as'
+ ' a file' % self.data_file)
return
- # Check if directory already exists.
if self._is_dir:
# A pre-existing directory already exists on the file
# system, perhaps gratuitously created when another
# object was created, or created externally to Swift
# REST API servicing (UFO use case).
- msg = 'File object exists as a directory: %s' % self.data_file
- raise AlreadyExistsAsDir(msg)
+ raise DiskFileError('DiskFile.put(): file creation failed since'
+ ' the target, %s, already exists as a'
+ ' directory' % self.data_file)
+
+ # Write out metadata before fsync() to ensure it is also forced to
+ # disk.
+ write_metadata(fd, metadata)
+
+ if not _relaxed_writes:
+ do_fsync(fd)
+ if X_CONTENT_LENGTH in metadata:
+ # Don't bother doing this before fsync in case the OS gets any
+ # ideas to issue partial writes.
+ fsize = int(metadata[X_CONTENT_LENGTH])
+ self.drop_cache(fd, 0, fsize)
+
+ # At this point we know that the object's full directory path exists,
+ # so we can just rename it directly without using Swift's
+ # swift.common.utils.renamer(), which makes the directory path and
+ # adds extra stat() calls.
+ data_file = os.path.join(self.put_datadir, self._obj)
+ while True:
+ try:
+ os.rename(self.tmppath, data_file)
+ except OSError as err:
+ if err.errno in (errno.ENOENT, errno.EIO):
+ # FIXME: Why either of these two error conditions is
+ # happening is unknown at this point. This might be a FUSE
+ # issue of some sort or a possible race condition. So
+ # let's sleep on it, and double check the environment
+ # after a good nap.
+ _random_sleep()
+ # Tease out why this error occurred. The man page for
+ # rename reads:
+ # "The link named by tmppath does not exist; or, a
+ # directory component in data_file does not exist;
+ # or, tmppath or data_file is an empty string."
+ assert len(self.tmppath) > 0 and len(data_file) > 0
+ tpstats = do_stat(self.tmppath)
+ tfstats = do_fstat(fd)
+ assert tfstats
+ if not tpstats or tfstats.st_ino != tpstats.st_ino:
+ # Temporary file name conflict
+ raise DiskFileError('DiskFile.put(): temporary file,'
+ ' %s, was already renamed'
+ ' (targeted for %s)' % (
+ self.tmppath, data_file))
+ else:
+ # Data file target name now has a bad path!
+ dfstats = do_stat(self.put_datadir)
+ if not dfstats:
+ raise DiskFileError('DiskFile.put(): path to'
+ ' object, %s, no longer exists'
+ ' (targeted for %s)' % (
+ self.put_datadir,
+ data_file))
+ else:
+ is_dir = stat.S_ISDIR(dfstats.st_mode)
+ if not is_dir:
+ raise DiskFileError('DiskFile.put(): path to'
+ ' object, %s, no longer a'
+ ' directory (targeted for'
+ ' %s)' % (self.put_datadir,
+ data_file))
+ else:
+ # Let's retry since everything looks okay
+ logging.warn("DiskFile.put(): os.rename('%s',"
+ "'%s') initially failed (%s) but"
+ " a stat('%s') following that"
+ " succeeded: %r" % (
+ self.tmppath, data_file,
+ str(err), self.put_datadir,
+ dfstats))
+ continue
+ else:
+ raise GlusterFileSystemOSError(
+ err.errno, "%s, os.rename('%s', '%s')" % (
+ err.strerror, self.tmppath, data_file))
+ else:
+ # Success!
+ break
+
+ # Avoid the unlink() system call as part of the mkstemp context cleanup
+ self.tmppath = None
- write_metadata(self.tmppath, metadata)
- if X_CONTENT_LENGTH in metadata:
- self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
- do_fsync(fd)
- if self._obj_path:
- dir_objs = self._obj_path.split('/')
- assert len(dir_objs) >= 1
- tmp_path = self._container_path
- for dir_name in dir_objs:
- tmp_path = os.path.join(tmp_path, dir_name)
- self._create_dir_object(tmp_path)
-
- do_fchown(fd, self.uid, self.gid)
- newpath = os.path.join(self.datadir, self._obj)
- renamer(self.tmppath, newpath)
self.metadata = metadata
- self.data_file = newpath
self.filter_metadata()
- return
+
+ # Mark that it actually exists now
+ self.data_file = os.path.join(self.datadir, self._obj)
def unlinkold(self, timestamp):
"""
@@ -313,7 +646,7 @@ class Gluster_DiskFile(DiskFile):
def get_data_file_size(self):
"""
Returns the os_path.getsize for the file. Raises an exception if this
- file does not match the Content-Length stored in the metadata. Or if
+ file does not match the Content-Length stored in the metadata, or if
self.data_file does not exist.
:returns: file size as an int
@@ -346,33 +679,94 @@ class Gluster_DiskFile(DiskFile):
self.metadata.pop(X_OBJECT_TYPE)
@contextmanager
- def mkstemp(self):
- """Contextmanager to make a temporary file."""
-
- # Creating intermidiate directories and corresponding metadata.
- # For optimization, check if the subdirectory already exists,
- # if exists, then it means that it also has its metadata.
- # Not checking for container, since the container should already
- # exist for the call to come here.
- if not os_path.exists(self.datadir):
- path = self._container_path
- subdir_list = self._obj_path.split(os.path.sep)
- for i in range(len(subdir_list)):
- path = os.path.join(path, subdir_list[i])
- if not os_path.exists(path):
- self._create_dir_object(path)
-
- tmpfile = '.' + self._obj + '.' + md5(self._obj +
- str(random.random())).hexdigest()
-
- self.tmppath = os.path.join(self.datadir, tmpfile)
- fd = do_open(self.tmppath, os.O_RDWR | os.O_CREAT | os.O_EXCL)
+ def mkstemp(self, size=None):
+ """
+ Contextmanager to make a temporary file, optionally of a specified
+ initial size.
+
+ For Gluster, we first optimistically create the temporary file using
+ the "rsync-friendly" .NAME.random naming. If we find that some path to
+ the file does not exist, we then create that path and then create the
+ temporary file again. If we get file name conflict, we'll retry using
+ different random suffixes 1,000 times before giving up.
+ """
+ data_file = os.path.join(self.put_datadir, self._obj)
+
+ # Assume the full directory path exists to the file already, and
+ # construct the proper name for the temporary file.
+ for i in range(0, 1000):
+ tmpfile = '.' + self._obj + '.' + md5(self._obj +
+ str(random.random())).hexdigest()
+ tmppath = os.path.join(self.put_datadir, tmpfile)
+ try:
+ fd = do_open(tmppath,
+ os.O_WRONLY | os.O_CREAT | os.O_EXCL | O_CLOEXEC)
+ except GlusterFileSystemOSError as gerr:
+ if gerr.errno == errno.EEXIST:
+ # Retry with a different random number.
+ continue
+ if gerr.errno == errno.EIO:
+ # FIXME: Possible FUSE issue or race condition, let's
+ # sleep on it and retry the operation.
+ _random_sleep()
+ logging.warn("DiskFile.mkstemp(): %s ... retrying in"
+ " 0.1 secs", gerr)
+ continue
+ if gerr.errno != errno.ENOENT:
+ # FIXME: Other cases we should handle?
+ raise
+ if not self._obj_path:
+ # No directory hierarchy and the create failed telling us
+ # the container or volume directory does not exist. This
+ # could be a FUSE issue or some race condition, so let's
+ # sleep a bit and retry.
+ _random_sleep()
+ logging.warn("DiskFile.mkstemp(): %s ... retrying in"
+ " 0.1 secs", gerr)
+ continue
+ if i != 0:
+ # Got ENOENT after previously making the path. This could
+ # also be a FUSE issue or some race condition, nap and
+ # retry.
+ _random_sleep()
+ logging.warn("DiskFile.mkstemp(): %s ... retrying in"
+ " 0.1 secs" % gerr)
+ continue
+ # It looks like the path to the object does not already exist
+ self._create_dir_object(self._obj_path)
+ continue
+ else:
+ break
+ else:
+ # We failed after 1,000 attempts to create the temporary file.
+ raise DiskFileError('DiskFile.mkstemp(): failed to successfully'
+ ' create a temporary file without running'
+ ' into a name conflict after 1,000 attempts'
+ ' for: %s' % (data_file,))
+
+ self.tmppath = tmppath
+
try:
+ # Ensure it is properly owned before we make it available.
+ do_fchown(fd, self.uid, self.gid)
+ if _preallocate and size:
+ # For XFS, fallocate() turns off speculative pre-allocation
+ # until a write is issued either to the last block of the file
+ # before the EOF or beyond the EOF. This means that we are
+ # less likely to fragment free space with pre-allocated
+ # extents that get truncated back to the known file size.
+ # However, this call also turns holes into allocated but
+ # unwritten extents, so that allocation occurs before the
+ # write, not during XFS writeback. This effectively defeats
+ # any allocation optimizations the filesystem can make at
+ # writeback time.
+ fallocate(fd, size)
yield fd
finally:
try:
do_close(fd)
except OSError:
pass
- tmppath, self.tmppath = self.tmppath, None
- do_unlink(tmppath)
+ if self.tmppath:
+ tmppath, self.tmppath = self.tmppath, None
+ do_unlink(tmppath)
diff --git a/gluster/swift/common/Glusterfs.py b/gluster/swift/common/Glusterfs.py
index 4506ef4..d1b456a 100644
--- a/gluster/swift/common/Glusterfs.py
+++ b/gluster/swift/common/Glusterfs.py
@@ -18,10 +18,13 @@ import fcntl
import time
import errno
import logging
+import urllib
from ConfigParser import ConfigParser, NoSectionError, NoOptionError
from swift.common.utils import TRUE_VALUES, search_tree
-from gluster.swift.common.fs_utils import mkdirs
+from gluster.swift.common.fs_utils import do_ismount
+from gluster.swift.common.exceptions import GlusterfsException, \
+ FailureToMountError
#
# Read the fs.conf file once at startup (module load)
@@ -32,6 +35,8 @@ OBJECT_ONLY = True
RUN_DIR = '/var/run/swift'
SWIFT_DIR = '/etc/swift'
_do_getsize = False
+_allow_mount_per_server = False
+
if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')):
try:
MOUNT_IP = _fs_conf.get('DEFAULT', 'mount_ip', MOUNT_IP)
@@ -55,6 +60,14 @@ if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')):
except (NoSectionError, NoOptionError):
pass
+ try:
+ _allow_mount_per_server = _fs_conf.get('DEFAULT',
+ 'allow_mount_per_server',
+ _allow_mount_per_server
+ ) in TRUE_VALUES
+ except (NoSectionError, NoOptionError):
+ pass
+
NAME = 'glusterfs'
@@ -70,7 +83,97 @@ def _busy_wait(full_mount_path):
return False
+def _get_unique_id():
+ # Each individual server will attempt to get a free lock file
+ # sequentially numbered, storing the pid of the holder of that
+ # file, That number represents the numbered mount point to use
+ # for its operations.
+ if not _allow_mount_per_server:
+ return 0
+ try:
+ os.mkdir(RUN_DIR)
+ except OSError as err:
+ if err.errno == errno.EEXIST:
+ pass
+ unique_id = 0
+ lock_file_template = os.path.join(RUN_DIR,
+ 'swift.object-server-%03d.lock')
+ for i in range(1, 201):
+ lock_file = lock_file_template % i
+ fd = os.open(lock_file, os.O_CREAT | os.O_RDWR)
+ try:
+ fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError as ex:
+ os.close(fd)
+ if ex.errno in (errno.EACCES, errno.EAGAIN):
+ # This means that some other process has it locked, so they
+ # own the lock.
+ continue
+ raise
+ except:
+ os.close(fd)
+ raise
+ else:
+ # We got the lock, write our PID into it, but don't close the
+ # file, it will be closed when our process exists
+ os.lseek(fd, 0, os.SEEK_SET)
+ pid = str(os.getpid()) + '\n'
+ os.write(fd, pid)
+ unique_id = i
+ break
+ return unique_id
+
+
+_unique_id = None
+
+
+def _get_drive_mount_point_name(drive):
+ """
+ Get the GlusterFS mount point name to use for this worker for the target
+ drive name.
+
+ If unique is False, then we just map the drive directly to the mount point
+ name. If unique is True, then we determine a unique mount point name that
+ maps to our server PID.
+ """
+ if not _allow_mount_per_server:
+ # One-to-one mapping of drive to mount point name
+ mount_point = drive
+ else:
+ global _unique_id
+ if _unique_id is None:
+ _unique_id = _get_unique_id()
+ mount_point = ("%s_%03d" % (drive, _unique_id)) \
+ if _unique_id else drive
+ return mount_point
+
+
def mount(root, drive):
+ """
+ Verify that the path to the device is a mount point and mounted. This
+ allows us to fast fail on drives that have been unmounted because of
+ issues, and also prevents us for accidentally filling up the root
+ partition.
+
+ This method effectively replaces the swift.common.constraints.check_mount
+ method in behavior, adding the ability to auto-mount the volume, which is
+ dubious (FIXME).
+
+ :param root: base path where the devices are mounted
+ :param drive: drive name to be checked
+ :returns: True if it is a valid mounted device, False otherwise
+ """
+ if not (urllib.quote_plus(drive) == drive):
+ return False
+
+ mount_point = _get_drive_mount_point_name(drive)
+ full_mount_path = os.path.join(root, mount_point)
+ if do_ismount(full_mount_path):
+ # Don't bother checking volume if it is already a mount point. Allows
+ # us to use local file systems for unit tests and some functional test
+ # environments to isolate behaviors from GlusterFS itself.
+ return True
+
# FIXME: Possible thundering herd problem here
el = _get_export_list()
@@ -81,15 +184,34 @@ def mount(root, drive):
logging.error('No export found in %r matching drive, %s', el, drive)
return False
- # NOTE: root is typically the default value of /mnt/gluster-object
- full_mount_path = os.path.join(root, drive)
- if not os.path.isdir(full_mount_path):
- mkdirs(full_mount_path)
+ try:
+ os.makedirs(full_mount_path)
+ except OSError as err:
+ if err.errno == errno.EEXIST:
+ pass
+ else:
+ logging.exception('Could not create mount path hierarchy:'
+ ' %s' % full_mount_path)
+ return False
- lck_file = os.path.join(RUN_DIR, '%s.lock' % drive)
+ mnt_cmd = 'mount -t glusterfs %s:%s %s' % (MOUNT_IP, export,
+ full_mount_path)
- if not os.path.exists(RUN_DIR):
- mkdirs(RUN_DIR)
+ if _allow_mount_per_server:
+ if os.system(mnt_cmd):
+ logging.exception('Mount failed %s' % (mnt_cmd))
+ return True
+
+ lck_file = os.path.join(RUN_DIR, '%s.lock' % mount_point)
+
+ try:
+ os.mkdir(RUN_DIR)
+ except OSError as err:
+ if err.errno == errno.EEXIST:
+ pass
+ else:
+ logging.exception('Could not create RUN_DIR: %s' % full_mount_path)
+ return False
fd = os.open(lck_file, os.O_CREAT | os.O_RDWR)
with os.fdopen(fd, 'r+b') as f:
@@ -100,12 +222,8 @@ def mount(root, drive):
# This means that some other process is mounting the
# filesystem, so wait for the mount process to complete
return _busy_wait(full_mount_path)
- else:
- raise ex
- mnt_cmd = 'mount -t glusterfs %s:%s %s' % (MOUNT_IP, export,
- full_mount_path)
if os.system(mnt_cmd) or not _busy_wait(full_mount_path):
- logging.error('Mount failed %s: %s', NAME, mnt_cmd)
+ logging.error('Mount failed %s', mnt_cmd)
return False
return True
@@ -115,7 +233,8 @@ def unmount(full_mount_path):
umnt_cmd = 'umount %s 2>> /dev/null' % full_mount_path
if os.system(umnt_cmd):
- logging.error('Unable to unmount %s %s' % (full_mount_path, NAME))
+ raise FailureToMountError(
+ 'Unable to unmount %s' % (full_mount_path))
def _get_export_list():
@@ -124,7 +243,8 @@ def _get_export_list():
export_list = []
if os.system(cmnd + ' >> /dev/null'):
- logging.error('Getting volume info failed for %s', NAME)
+ logging.error('Getting volume info failed, make sure to have'
+ ' passwordless ssh on %s', MOUNT_IP)
else:
fp = os.popen(cmnd)
while True:
@@ -139,18 +259,25 @@ def _get_export_list():
def get_mnt_point(vol_name, conf_dir=SWIFT_DIR, conf_file="object-server*"):
- """Read the object-server's configuration file and return
- the device value"""
+ """
+ Read the object-server's configuration file and return
+ the device value.
+ :param vol_name: target GlusterFS volume name
+ :param conf_dir: Swift configuration directory root
+ :param conf_file: configuration file name for which to search
+ :returns full path to given target volume name
+ :raises GlusterfsException if unable to fetch mount point root from
+ configuration files
+ """
mnt_dir = ''
conf_files = search_tree(conf_dir, conf_file, '.conf')
if not conf_files:
- raise Exception("Config file not found")
-
+ raise GlusterfsException("Config file, %s, in directory, %s, "
+ "not found" % (conf_file, conf_dir))
_conf = ConfigParser()
if _conf.read(conf_files[0]):
- try:
- mnt_dir = _conf.get('DEFAULT', 'devices', '')
- except (NoSectionError, NoOptionError):
- raise
+ mnt_dir = _conf.get('DEFAULT', 'devices', '')
return os.path.join(mnt_dir, vol_name)
+ else:
+ raise GlusterfsException("Config file, %s, is empty" % conf_files[0])
diff --git a/gluster/swift/common/constraints.py b/gluster/swift/common/constraints.py
index ce1df31..9a4d57e 100644
--- a/gluster/swift/common/constraints.py
+++ b/gluster/swift/common/constraints.py
@@ -91,23 +91,8 @@ def gluster_check_object_creation(req, object_name):
# Replace the original check object creation with ours
swift.common.constraints.check_object_creation = gluster_check_object_creation
-# Save the original check mount
-__check_mount = swift.common.constraints.check_mount
-
-
-# Define our new one which invokes the original
-def gluster_check_mount(root, drive):
- # FIXME: Potential performance optimization here to not call the original
- # check mount which makes two stat calls. We could do what they do with
- # just one.
- if __check_mount(root, drive):
- return True
-
- return Glusterfs.mount(root, drive)
-
-
# Replace the original check mount with ours
-swift.common.constraints.check_mount = gluster_check_mount
+swift.common.constraints.check_mount = Glusterfs.mount
# Save the original Ring class
__Ring = _ring.Ring
diff --git a/gluster/swift/common/exceptions.py b/gluster/swift/common/exceptions.py
index 75a95ec..ec74353 100644
--- a/gluster/swift/common/exceptions.py
+++ b/gluster/swift/common/exceptions.py
@@ -14,10 +14,22 @@
# limitations under the License.
+class GlusterFileSystemOSError(OSError):
+ pass
+
+
+class GlusterFileSystemIOError(IOError):
+ pass
+
+
class GlusterfsException(Exception):
pass
+class FailureToMountError(GlusterfsException):
+ pass
+
+
class FileOrDirNotFoundError(GlusterfsException):
pass
@@ -28,3 +40,7 @@ class NotDirectoryError(GlusterfsException):
class AlreadyExistsAsDir(GlusterfsException):
pass
+
+
+class AlreadyExistsAsFile(GlusterfsException):
+ pass
diff --git a/gluster/swift/common/fs_utils.py b/gluster/swift/common/fs_utils.py
index 1f8415c..ea5c180 100644
--- a/gluster/swift/common/fs_utils.py
+++ b/gluster/swift/common/fs_utils.py
@@ -16,10 +16,13 @@
import logging
import os
import errno
+import stat
+import random
import os.path as os_path # noqa
from eventlet import tpool
+from eventlet import sleep
from gluster.swift.common.exceptions import FileOrDirNotFoundError, \
- NotDirectoryError
+ NotDirectoryError, GlusterFileSystemOSError, GlusterFileSystemIOError
def do_walk(*args, **kwargs):
@@ -30,78 +33,191 @@ def do_write(fd, msg):
try:
cnt = os.write(fd, msg)
except OSError as err:
- logging.exception("Write failed, err: %s", str(err))
- raise
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.write("%s", ...)' % (err.strerror, fd))
return cnt
+def do_ismount(path):
+ """
+ Test whether a path is a mount point.
+
+ This is code hijacked from C Python 2.6.8, adapted to remove the extra
+ lstat() system call.
+ """
+ try:
+ s1 = os.lstat(path)
+ except os.error as err:
+ if err.errno == errno.ENOENT:
+ # It doesn't exist -- so not a mount point :-)
+ return False
+ else:
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.lstat("%s")' % (err.strerror, path))
+
+ if stat.S_ISLNK(s1.st_mode):
+ # A symlink can never be a mount point
+ return False
+
+ try:
+ s2 = os.lstat(os.path.join(path, '..'))
+ except os.error as err:
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.lstat("%s")' % (err.strerror,
+ os.path.join(path, '..')))
+
+ dev1 = s1.st_dev
+ dev2 = s2.st_dev
+ if dev1 != dev2:
+ # path/.. on a different device as path
+ return True
+
+ ino1 = s1.st_ino
+ ino2 = s2.st_ino
+ if ino1 == ino2:
+ # path/.. is the same i-node as path
+ return True
+
+ return False
+
+
+def do_mkdir(path):
+ try:
+ os.mkdir(path)
+ except OSError as err:
+ if err.errno == errno.EEXIST:
+ logging.warn("fs_utils: os.mkdir - path %s already exists", path)
+ else:
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.mkdir("%s")' % (err.strerror, path))
+
+
def do_listdir(path):
try:
buf = os.listdir(path)
except OSError as err:
- logging.exception("Listdir failed on %s err: %s", path, err.strerror)
- raise
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.listdir("%s")' % (err.strerror, path))
return buf
+def dir_empty(path):
+ """
+ Return true if directory is empty (or does not exist), false otherwise.
+
+ :param path: Directory path
+ :returns: True/False
+ """
+ try:
+ files = do_listdir(path)
+ return not files
+ except GlusterFileSystemOSError as err:
+ if err.errno == errno.ENOENT:
+ raise FileOrDirNotFoundError()
+ if err.errno == errno.ENOTDIR:
+ raise NotDirectoryError()
+ raise
+
+
+def do_rmdir(path):
+ try:
+ os.rmdir(path)
+ except OSError as err:
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.rmdir("%s")' % (err.strerror, path))
+
+
def do_chown(path, uid, gid):
try:
os.chown(path, uid, gid)
except OSError as err:
- logging.exception("Chown failed on %s err: %s", path, err.strerror)
- raise
- return True
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.chown("%s", %s, %s)' % (
+ err.strerror, path, uid, gid))
def do_fchown(fd, uid, gid):
try:
os.fchown(fd, uid, gid)
except OSError as err:
- logging.exception("fchown failed on %d err: %s", fd, err.strerror)
- raise
- return True
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.fchown(%s, %s, %s)' % (
+ err.strerror, fd, uid, gid))
+
+
+_STAT_ATTEMPTS = 10
def do_stat(path):
+ serr = None
+ for i in range(0, _STAT_ATTEMPTS):
+ try:
+ stats = os.stat(path)
+ except OSError as err:
+ if err.errno == errno.EIO:
+ # Retry EIO assuming it is a transient error from FUSE after a
+ # short random sleep
+ serr = err
+ sleep(random.uniform(0.001, 0.005))
+ continue
+ if err.errno == errno.ENOENT:
+ stats = None
+ else:
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.stat("%s")[%d attempts]' % (
+ err.strerror, path, i))
+ if i > 0:
+ logging.warn("fs_utils.do_stat():"
+ " os.stat('%s') retried %d times (%s)",
+ path, i, 'success' if stats else 'failure')
+ return stats
+ else:
+ raise GlusterFileSystemOSError(
+ serr.errno, '%s, os.stat("%s")[%d attempts]' % (
+ serr.strerror, path, _STAT_ATTEMPTS))
+
+
+def do_fstat(fd):
try:
- #Check for fd.
- if isinstance(path, int):
- buf = os.fstat(path)
- else:
- buf = os.stat(path)
+ stats = os.fstat(fd)
except OSError as err:
- logging.exception("Stat failed on %s err: %s", path, err.strerror)
- raise
- return buf
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.fstat(%s)' % (err.strerror, fd))
+ return stats
-def do_open(path, mode):
- if isinstance(mode, int):
+def do_open(path, flags, **kwargs):
+ if isinstance(flags, int):
try:
- fd = os.open(path, mode)
+ fd = os.open(path, flags, **kwargs)
except OSError as err:
- logging.exception("Open failed on %s err: %s", path, str(err))
- raise
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.open("%s", %x, %r)' % (
+ err.strerror, path, flags, kwargs))
+ return fd
else:
try:
- fd = open(path, mode)
+ fp = open(path, flags, **kwargs)
except IOError as err:
- logging.exception("Open failed on %s err: %s", path, str(err))
- raise
- return fd
+ raise GlusterFileSystemIOError(
+ err.errno, '%s, open("%s", %s, %r)' % (
+ err.strerror, path, flags, kwargs))
+ return fp
def do_close(fd):
- #fd could be file or int type.
- try:
- if isinstance(fd, int):
- os.close(fd)
- else:
+ if isinstance(fd, file):
+ try:
fd.close()
- except OSError as err:
- logging.exception("Close failed on %s err: %s", fd, err.strerror)
- raise
- return True
+ except IOError as err:
+ raise GlusterFileSystemIOError(
+ err.errno, '%s, os.close(%s)' % (err.strerror, fd))
+ else:
+ try:
+ os.close(fd)
+ except OSError as err:
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.close(%s)' % (err.strerror, fd))
def do_unlink(path, log=True):
@@ -109,21 +225,28 @@ def do_unlink(path, log=True):
os.unlink(path)
except OSError as err:
if err.errno != errno.ENOENT:
- if log:
- logging.exception("Unlink failed on %s err: %s",
- path, err.strerror)
- raise
- return True
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.unlink("%s")' % (err.strerror, path))
+ elif log:
+ logging.warn("fs_utils: os.unlink failed on non-existent path: %s",
+ path)
def do_rename(old_path, new_path):
try:
os.rename(old_path, new_path)
except OSError as err:
- logging.exception("Rename failed on %s to %s err: %s",
- old_path, new_path, err.strerror)
- raise
- return True
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.rename("%s", "%s")' % (
+ err.strerror, old_path, new_path))
+
+
+def do_fsync(fd):
+ try:
+ tpool.execute(os.fsync, fd)
+ except OSError as err:
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.fsync("%s")' % (err.strerror, fd))
def mkdirs(path):
@@ -133,47 +256,10 @@ def mkdirs(path):
:param path: path to create
"""
- if not os.path.isdir(path):
- try:
- os.makedirs(path)
- except OSError as err:
- if err.errno != errno.EEXIST:
- logging.exception("Makedirs failed on %s err: %s",
- path, err.strerror)
- raise
- return True
-
-
-def dir_empty(path):
- """
- Return true if directory/container is empty.
- :param path: Directory path.
- :returns: True/False.
- """
- if os.path.isdir(path):
- files = do_listdir(path)
- return not files
- elif not os.path.exists(path):
- raise FileOrDirNotFoundError()
- raise NotDirectoryError()
-
-
-def rmdirs(path):
- if not os.path.isdir(path):
- return False
try:
- os.rmdir(path)
+ os.makedirs(path)
except OSError as err:
- if err.errno != errno.ENOENT:
- logging.error("rmdirs failed on %s, err: %s", path, err.strerror)
- return False
- return True
-
-
-def do_fsync(fd):
- try:
- tpool.execute(os.fsync, fd)
- except OSError as err:
- logging.exception("fsync failed with err: %s", err.strerror)
- raise
- return True
+ if err.errno == errno.EEXIST and os.path.isdir(path):
+ return
+ raise GlusterFileSystemOSError(
+ err.errno, '%s, os.makedirs("%s")' % (err.strerror, path))
diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py
index 5547ea4..6503644 100644
--- a/gluster/swift/common/utils.py
+++ b/gluster/swift/common/utils.py
@@ -24,9 +24,8 @@ from eventlet import sleep
import cPickle as pickle
from swift.common.utils import normalize_timestamp
from gluster.swift.common.fs_utils import do_rename, do_fsync, os_path, \
- do_stat, do_listdir, do_walk, dir_empty, rmdirs
+ do_stat, do_listdir, do_walk, do_rmdir
from gluster.swift.common import Glusterfs
-from gluster.swift.common.exceptions import FileOrDirNotFoundError
X_CONTENT_TYPE = 'Content-Type'
X_CONTENT_LENGTH = 'Content-Length'
@@ -56,11 +55,23 @@ PICKLE_PROTOCOL = 2
CHUNK_SIZE = 65536
-def read_metadata(path):
+class GlusterFileSystemOSError(OSError):
+ # Having our own class means the name will show up in the stack traces
+ # recorded in the log files.
+ pass
+
+
+class GlusterFileSystemIOError(IOError):
+ # Having our own class means the name will show up in the stack traces
+ # recorded in the log files.
+ pass
+
+
+def read_metadata(path_or_fd):
"""
Helper function to read the pickled metadata from a File/Directory.
- :param path: File/Directory to read metadata from.
+ :param path_or_fd: File/Directory path or fd from which to read metadata.
:returns: dictionary of metadata
"""
@@ -69,7 +80,7 @@ def read_metadata(path):
key = 0
while metadata is None:
try:
- metadata_s += xattr.getxattr(path,
+ metadata_s += xattr.getxattr(path_or_fd,
'%s%s' % (METADATA_KEY, (key or '')))
except IOError as err:
if err.errno == errno.ENODATA:
@@ -79,18 +90,17 @@ def read_metadata(path):
# unpickle operation, we consider the metadata lost, and
# drop the existing data so that the internal state can be
# recreated.
- clean_metadata(path)
+ clean_metadata(path_or_fd)
# We either could not find any metadata key, or we could find
# some keys, but were not successful in performing the
# unpickling (missing keys perhaps)? Either way, just report
# to the caller we have no metadata.
metadata = {}
else:
- logging.exception("xattr.getxattr failed on %s key %s err: %s",
- path, key, str(err))
# Note that we don't touch the keys on errors fetching the
# data since it could be a transient state.
- raise
+ raise GlusterFileSystemIOError(
+ err.errno, 'xattr.getxattr("%s", %s)' % (path_or_fd, key))
else:
try:
# If this key provides all or the remaining part of the pickle
@@ -109,38 +119,39 @@ def read_metadata(path):
return metadata
-def write_metadata(path, metadata):
+def write_metadata(path_or_fd, metadata):
"""
Helper function to write pickled metadata for a File/Directory.
- :param path: File/Directory path to write the metadata
- :param metadata: dictionary to metadata write
+ :param path_or_fd: File/Directory path or fd to write the metadata
+ :param metadata: dictionary of metadata write
"""
assert isinstance(metadata, dict)
metastr = pickle.dumps(metadata, PICKLE_PROTOCOL)
key = 0
while metastr:
try:
- xattr.setxattr(path,
+ xattr.setxattr(path_or_fd,
'%s%s' % (METADATA_KEY, key or ''),
metastr[:MAX_XATTR_SIZE])
except IOError as err:
- logging.exception("setxattr failed on %s key %s err: %s",
- path, key, str(err))
- raise
+ raise GlusterFileSystemIOError(
+ err.errno,
+ 'xattr.setxattr("%s", %s, metastr)' % (path_or_fd, key))
metastr = metastr[MAX_XATTR_SIZE:]
key += 1
-def clean_metadata(path):
+def clean_metadata(path_or_fd):
key = 0
while True:
try:
- xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or '')))
+ xattr.removexattr(path_or_fd, '%s%s' % (METADATA_KEY, (key or '')))
except IOError as err:
if err.errno == errno.ENODATA:
break
- raise
+ raise GlusterFileSystemIOError(
+ err.errno, 'xattr.removexattr("%s", %s)' % (path_or_fd, key))
key += 1
@@ -150,9 +161,9 @@ def check_user_xattr(path):
try:
xattr.setxattr(path, 'user.test.key1', 'value1')
except IOError as err:
- logging.exception("check_user_xattr: set failed on %s err: %s",
- path, str(err))
- raise
+ raise GlusterFileSystemIOError(
+ err.errno,
+ 'xattr.setxattr("%s", "user.test.key1", "value1")' % (path,))
try:
xattr.removexattr(path, 'user.test.key1')
except IOError as err:
@@ -245,7 +256,7 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0,
object_count += 1
- if Glusterfs._do_getsize and reg_file:
+ if reg_file and Glusterfs._do_getsize:
bytes_used += os_path.getsize(os.path.join(path, obj_name))
sleep()
@@ -254,7 +265,6 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0,
def update_list(path, cont_path, dirs=[], files=[], object_count=0,
bytes_used=0, obj_list=[]):
-
if files:
object_count, bytes_used = _update_list(path, cont_path, files, True,
object_count, bytes_used,
@@ -266,22 +276,13 @@ def update_list(path, cont_path, dirs=[], files=[], object_count=0,
return object_count, bytes_used
-class ContainerDetails(object):
- def __init__(self, bytes_used, object_count, obj_list, dir_list):
- self.bytes_used = bytes_used
- self.object_count = object_count
- self.obj_list = obj_list
- self.dir_list = dir_list
-
-
-def _get_container_details_from_fs(cont_path):
+def get_container_details(cont_path):
"""
get container details by traversing the filesystem
"""
bytes_used = 0
object_count = 0
obj_list = []
- dir_list = []
if os_path.isdir(cont_path):
for (path, dirs, files) in do_walk(cont_path):
@@ -289,36 +290,12 @@ def _get_container_details_from_fs(cont_path):
files, object_count,
bytes_used, obj_list)
- dir_list.append((path, do_stat(path).st_mtime))
sleep()
- return ContainerDetails(bytes_used, object_count, obj_list, dir_list)
+ return obj_list, object_count, bytes_used
-def get_container_details(cont_path):
- """
- Return object_list, object_count and bytes_used.
- """
- cd = _get_container_details_from_fs(cont_path)
-
- return cd.obj_list, cd.object_count, cd.bytes_used
-
-
-class AccountDetails(object):
- """ A simple class to store the three pieces of information associated
- with an account:
-
- 1. The last known modification time
- 2. The count of containers in the following list
- 3. The list of containers
- """
- def __init__(self, mtime, container_count, container_list):
- self.mtime = mtime
- self.container_count = container_count
- self.container_list = container_list
-
-
-def _get_account_details_from_fs(acc_path):
+def get_account_details(acc_path):
"""
Return container_list and container_count.
"""
@@ -326,35 +303,40 @@ def _get_account_details_from_fs(acc_path):
container_count = 0
acc_stats = do_stat(acc_path)
- is_dir = stat.S_ISDIR(acc_stats.st_mode)
- if is_dir:
- for name in do_listdir(acc_path):
- if name.lower() == TEMP_DIR \
- or name.lower() == ASYNCDIR \
- or not os_path.isdir(os.path.join(acc_path, name)):
- continue
- container_count += 1
- container_list.append(name)
+ if acc_stats:
+ is_dir = stat.S_ISDIR(acc_stats.st_mode)
+ if is_dir:
+ for name in do_listdir(acc_path):
+ if name.lower() == TEMP_DIR \
+ or name.lower() == ASYNCDIR \
+ or not os_path.isdir(os.path.join(acc_path, name)):
+ continue
+ container_count += 1
+ container_list.append(name)
- return AccountDetails(acc_stats.st_mtime, container_count, container_list)
+ return container_list, container_count
-def get_account_details(acc_path):
- """
- Return container_list and container_count.
+def _get_etag(path):
"""
- ad = _get_account_details_from_fs(acc_path)
+ FIXME: It would be great to have a translator that returns the md5sum() of
+ the file as an xattr that can be simply fetched.
- return ad.container_list, ad.container_count
-
-
-def _get_etag(path):
+ Since we don't have that we should yield after each chunk read and
+ computed so that we don't consume the worker thread.
+ """
etag = md5()
with open(path, 'rb') as fp:
while True:
chunk = fp.read(CHUNK_SIZE)
if chunk:
etag.update(chunk)
+ if len(chunk) >= CHUNK_SIZE:
+ # It is likely that we have more data to be read from the
+ # file. Yield the co-routine cooperatively to avoid
+ # consuming the worker during md5sum() calculations on
+ # large files.
+ sleep()
else:
break
return etag.hexdigest()
@@ -364,14 +346,11 @@ def get_object_metadata(obj_path):
"""
Return metadata of object.
"""
- try:
- stats = do_stat(obj_path)
- except OSError as e:
- if e.errno != errno.ENOENT:
- raise
+ stats = do_stat(obj_path)
+ if not stats:
metadata = {}
else:
- is_dir = (stats.st_mode & 0040000) != 0
+ is_dir = stat.S_ISDIR(stats.st_mode)
metadata = {
X_TYPE: OBJECT,
X_TIMESTAMP: normalize_timestamp(stats.st_ctime),
@@ -444,12 +423,14 @@ def create_object_metadata(obj_path):
def create_container_metadata(cont_path):
metadata = get_container_metadata(cont_path)
- return restore_metadata(cont_path, metadata)
+ rmd = restore_metadata(cont_path, metadata)
+ return rmd
def create_account_metadata(acc_path):
metadata = get_account_metadata(acc_path)
- return restore_metadata(acc_path, metadata)
+ rmd = restore_metadata(acc_path, metadata)
+ return rmd
def write_pickle(obj, dest, tmp=None, pickle_protocol=0):
@@ -498,42 +479,71 @@ def dir_is_object(metadata):
def rmobjdir(dir_path):
"""
- Removes the directory as long as there are
- no objects stored in it. This works for containers also.
+ Removes the directory as long as there are no objects stored in it. This
+ works for containers also.
"""
try:
- if dir_empty(dir_path):
- rmdirs(dir_path)
- return True
- except FileOrDirNotFoundError:
- # No such directory exists
- return False
+ do_rmdir(dir_path)
+ except OSError as err:
+ if err.errno == errno.ENOENT:
+ # No such directory exists
+ return False
+ if err.errno != errno.ENOTEMPTY:
+ raise
+ # Handle this non-empty directories below.
+ else:
+ return True
+ # We have a directory that is not empty, walk it to see if it is filled
+ # with empty sub-directories that are not user created objects
+ # (gratuitously created as a result of other object creations).
for (path, dirs, files) in do_walk(dir_path, topdown=False):
for directory in dirs:
fullpath = os.path.join(path, directory)
- metadata = read_metadata(fullpath)
- if not dir_is_object(metadata):
- # Directory is not an object created by the caller
- # so we can go ahead and delete it.
- try:
- if dir_empty(fullpath):
- rmdirs(fullpath)
- else:
- # Non-object dir is not empty!
- return False
- except FileOrDirNotFoundError:
- # No such dir!
- return False
+ try:
+ metadata = read_metadata(fullpath)
+ except OSError as err:
+ if err.errno == errno.ENOENT:
+ # Ignore removal from another entity.
+ continue
+ raise
else:
- # Wait, this is an object created by the caller
- # We cannot delete
- return False
- rmdirs(dir_path)
- return True
+ if dir_is_object(metadata):
+ # Wait, this is an object created by the caller
+ # We cannot delete
+ return False
+
+ # Directory is not an object created by the caller
+ # so we can go ahead and delete it.
+ try:
+ do_rmdir(fullpath)
+ except OSError as err:
+ if err.errno == errno.ENOTEMPTY:
+ # Directory is not empty, it might have objects in it
+ return False
+ if err.errno == errno.ENOENT:
+ # No such directory exists, already removed, ignore
+ continue
+ raise
+
+ try:
+ do_rmdir(dir_path)
+ except OSError as err:
+ if err.errno == errno.ENOTEMPTY:
+ # Directory is not empty, race with object creation
+ return False
+ if err.errno == errno.ENOENT:
+ # No such directory exists, already removed, ignore
+ return True
+ raise
+ else:
+ return True
# Over-ride Swift's utils.write_pickle with ours
+#
+# FIXME: Is this even invoked anymore given we don't perform container or
+# account updates?
import swift.common.utils
swift.common.utils.write_pickle = write_pickle