summaryrefslogtreecommitdiffstats
path: root/gluster/swift/obj/diskfile.py
diff options
context:
space:
mode:
authorPeter Portante <peter.portante@redhat.com>2013-08-20 18:32:28 -0400
committerLuis Pabon <lpabon@redhat.com>2013-08-28 19:23:07 -0700
commit4735980723dbdc10e86dab15a34a2eab9073b693 (patch)
tree950d7e587e56de925f97d4279ef7130119230ac4 /gluster/swift/obj/diskfile.py
parentb00f10f5ca6f1300d6ffb4f579778499e906576a (diff)
Fix infinite loop for temp file renames on ENOENT
For whatever reason, it appears that GlusterFS, or perhaps FUSE can continuously return ENOENT on a rename system call even when we have double checked that there is no reason to do so. That is a bug for that sub system. However, our response to that bug can result in an infinite loop, which is bad. This code reduces that to 10 attempts. In addition, we restructed the open retry loop to match, providing module constants for the upper bounds of both retry loops. BUG: XXXXXX (https://bugzilla.redhat.com/show_bug.cgi?id=XXXXXX) Change-Id: Ia2d6dd427daba3ea0461863c5ffe3aef27c88f9b Signed-off-by: Peter Portante <peter.portante@redhat.com> Reviewed-on: http://review.gluster.org/5670 Reviewed-by: Luis Pabon <lpabon@redhat.com> Tested-by: Luis Pabon <lpabon@redhat.com>
Diffstat (limited to 'gluster/swift/obj/diskfile.py')
-rw-r--r--gluster/swift/obj/diskfile.py86
1 files changed, 45 insertions, 41 deletions
diff --git a/gluster/swift/obj/diskfile.py b/gluster/swift/obj/diskfile.py
index ce69b6d..ea39659 100644
--- a/gluster/swift/obj/diskfile.py
+++ b/gluster/swift/obj/diskfile.py
@@ -50,6 +50,9 @@ DEFAULT_BYTES_PER_SYNC = (512 * 1024 * 1024)
# keep these lower-case
DISALLOWED_HEADERS = set('content-length content-type deleted etag'.split())
+MAX_RENAME_ATTEMPTS = 10
+MAX_OPEN_ATTEMPTS = 10
+
def _random_sleep():
sleep(random.uniform(0.5, 0.15))
@@ -233,15 +236,9 @@ if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
in TRUE_VALUES
except (NoSectionError, NoOptionError):
_use_put_mount = False
- try:
- _relaxed_writes = _fs_conf.get('DEFAULT', 'relaxed_writes', "no") \
- in TRUE_VALUES
- except (NoSectionError, NoOptionError):
- _relaxed_writes = False
else:
_mkdir_locking = False
_use_put_mount = False
- _relaxed_writes = False
if _mkdir_locking:
make_directory = _make_directory_locked
@@ -324,27 +321,28 @@ class DiskWriter(SwiftDiskWriter):
# disk.
write_metadata(self.fd, metadata)
- if not _relaxed_writes:
- # We call fsync() before calling drop_cache() to lower the
- # amount of redundant work the drop cache code will perform on
- # the pages (now that after fsync the pages will be all
- # clean).
- do_fsync(self.fd)
- # From the Department of the Redundancy Department, make sure
- # we call drop_cache() after fsync() to avoid redundant work
- # (pages all clean).
- drop_buffer_cache(self.fd, 0, self.upload_size)
+ # We call fsync() before calling drop_cache() to lower the
+ # amount of redundant work the drop cache code will perform on
+ # the pages (now that after fsync the pages will be all
+ # clean).
+ do_fsync(self.fd)
+ # From the Department of the Redundancy Department, make sure
+ # we call drop_cache() after fsync() to avoid redundant work
+ # (pages all clean).
+ drop_buffer_cache(self.fd, 0, self.upload_size)
# At this point we know that the object's full directory path
# exists, so we can just rename it directly without using Swift's
# swift.common.utils.renamer(), which makes the directory path and
# adds extra stat() calls.
data_file = os.path.join(df.put_datadir, df._obj)
+ attempts = 1
while True:
try:
os.rename(self.tmppath, data_file)
except OSError as err:
- if err.errno in (errno.ENOENT, errno.EIO):
+ if err.errno in (errno.ENOENT, errno.EIO) \
+ and attempts < MAX_RENAME_ATTEMPTS:
# FIXME: Why either of these two error conditions is
# happening is unknown at this point. This might be a
# FUSE issue of some sort or a possible race
@@ -368,7 +366,7 @@ class DiskWriter(SwiftDiskWriter):
self.tmppath, data_file))
else:
# Data file target name now has a bad path!
- dfstats = do_stat(self.put_datadir)
+ dfstats = do_stat(df.put_datadir)
if not dfstats:
raise DiskFileError(
'DiskFile.put(): path to object, %s, no'
@@ -393,6 +391,7 @@ class DiskWriter(SwiftDiskWriter):
self.tmppath, data_file,
str(err), df.put_datadir,
dfstats))
+ attempts += 1
continue
else:
raise GlusterFileSystemOSError(
@@ -557,8 +556,7 @@ class DiskFile(SwiftDiskFile):
see if the directory object already exists, that is left to the caller
(this avoids a potentially duplicate stat() system call).
- The "dir_path" must be relative to its container,
- self._container_path.
+ The "dir_path" must be relative to its container, self._container_path.
The "metadata" object is an optional set of metadata to apply to the
newly created directory object. If not present, no initial metadata is
@@ -624,7 +622,8 @@ class DiskFile(SwiftDiskFile):
# Assume the full directory path exists to the file already, and
# construct the proper name for the temporary file.
- for i in range(0, 1000):
+ attempts = 1
+ while True:
tmpfile = '.' + self._obj + '.' + md5(self._obj +
str(random.random())).hexdigest()
tmppath = os.path.join(self.put_datadir, tmpfile)
@@ -635,20 +634,29 @@ class DiskFile(SwiftDiskFile):
if gerr.errno == errno.ENOSPC:
# Raise DiskFileNoSpace to be handled by upper layers
raise DiskFileNoSpace()
+ if gerr.errno not in (errno.ENOENT, errno.EEXIST, errno.EIO):
+ # FIXME: Other cases we should handle?
+ raise
+ if attempts >= MAX_OPEN_ATTEMPTS:
+ # We failed after N attempts to create the temporary
+ # file.
+ raise DiskFileError('DiskFile.mkstemp(): failed to'
+ ' successfully create a temporary file'
+ ' without running into a name conflict'
+ ' after %d of %d attempts for: %s' % (
+ attempts, MAX_OPEN_ATTEMPTS,
+ data_file))
if gerr.errno == errno.EEXIST:
# Retry with a different random number.
- continue
- if gerr.errno == errno.EIO:
+ attempts += 1
+ elif gerr.errno == errno.EIO:
# FIXME: Possible FUSE issue or race condition, let's
# sleep on it and retry the operation.
_random_sleep()
logging.warn("DiskFile.mkstemp(): %s ... retrying in"
" 0.1 secs", gerr)
- continue
- if gerr.errno != errno.ENOENT:
- # FIXME: Other cases we should handle?
- raise
- if not self._obj_path:
+ attempts += 1
+ elif not self._obj_path:
# No directory hierarchy and the create failed telling us
# the container or volume directory does not exist. This
# could be a FUSE issue or some race condition, so let's
@@ -656,26 +664,22 @@ class DiskFile(SwiftDiskFile):
_random_sleep()
logging.warn("DiskFile.mkstemp(): %s ... retrying in"
" 0.1 secs", gerr)
- continue
- if i != 0:
+ attempts += 1
+ elif attempts > 1:
# Got ENOENT after previously making the path. This could
# also be a FUSE issue or some race condition, nap and
# retry.
_random_sleep()
logging.warn("DiskFile.mkstemp(): %s ... retrying in"
" 0.1 secs" % gerr)
- continue
- # It looks like the path to the object does not already exist
- self._create_dir_object(self._obj_path)
- continue
+ attempts += 1
+ else:
+ # It looks like the path to the object does not already
+ # exist; don't count this as an attempt, though, since
+ # we perform the open() system call optimistically.
+ self._create_dir_object(self._obj_path)
else:
break
- else:
- # We failed after 1,000 attempts to create the temporary file.
- raise DiskFileError('DiskFile.mkstemp(): failed to successfully'
- ' create a temporary file without running'
- ' into a name conflict after 1,000 attempts'
- ' for: %s' % (data_file,))
dw = None
try:
# Ensure it is properly owned before we make it available.
@@ -769,8 +773,8 @@ class DiskFile(SwiftDiskFile):
:raises DiskFileError: on file size mismatch.
:raises DiskFileNotExist: on file not existing (including deleted)
"""
- #Marker directory.
if self._is_dir:
+ # Directories have no size.
return 0
try:
file_size = 0