summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Portante <peter.portante@redhat.com>2013-08-20 18:32:28 -0400
committerPeter Portante <pportant@redhat.com>2013-09-08 12:31:16 -0700
commitbd62696fa9fe694a1053e6c23f8c7ee0c66b76b6 (patch)
treebfcf828704ce5b91662f8597c1c56414d0eb33cb
parent01586181f2db85c01b3cb6c8743f4d4167848248 (diff)
Fix infinite loop for temp file renames on ENOENTv1.8.0-7grizzly
This is a port from master branch fix http://review.gluster.org/5670 to grizzly branch. For whatever reason, it appears that GlusterFS, or perhaps FUSE can continuously return ENOENT on a rename system call even when we have double checked that there is no reason to do so. That is a bug for that sub system. However, our response to that bug can result in an infinite loop, which is bad. This code reduces that to 10 attempts. In addition, we restructed the open retry loop to match, providing module constants for the upper bounds of both retry loops. BUG: 1005379 (https://bugzilla.redhat.com/show_bug.cgi?id=1005379) Change-Id: Ia2d6dd427daba3ea0461863c5ffe3aef27c88f9b Signed-off-by: Peter Portante <peter.portante@redhat.com> Reviewed-on: http://review.gluster.org/5670 Reviewed-by: Luis Pabon <lpabon@redhat.com> Tested-by: Luis Pabon <lpabon@redhat.com> Signed-off-by: Luis Pabon <lpabon@redhat.com> Reviewed-on: http://review.gluster.org/5848 Reviewed-by: Peter Portante <pportant@redhat.com> Tested-by: Peter Portante <pportant@redhat.com>
-rw-r--r--gluster/swift/common/DiskFile.py74
-rw-r--r--test/unit/common/test_diskfile.py195
-rw-r--r--test/unit/common/test_utils.py5
3 files changed, 235 insertions, 39 deletions
diff --git a/gluster/swift/common/DiskFile.py b/gluster/swift/common/DiskFile.py
index d64726b..62a9998 100644
--- a/gluster/swift/common/DiskFile.py
+++ b/gluster/swift/common/DiskFile.py
@@ -47,6 +47,9 @@ DEFAULT_DISK_CHUNK_SIZE = 65536
# keep these lower-case
DISALLOWED_HEADERS = set('content-length content-type deleted etag'.split())
+MAX_RENAME_ATTEMPTS = 10
+MAX_OPEN_ATTEMPTS = 10
+
def _random_sleep():
sleep(random.uniform(0.5, 0.15))
@@ -231,11 +234,6 @@ if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
except (NoSectionError, NoOptionError):
_use_put_mount = False
try:
- _relaxed_writes = _fs_conf.get('DEFAULT', 'relaxed_writes', "no") \
- in TRUE_VALUES
- except (NoSectionError, NoOptionError):
- _relaxed_writes = False
- try:
_preallocate = _fs_conf.get('DEFAULT', 'preallocate', "no") \
in TRUE_VALUES
except (NoSectionError, NoOptionError):
@@ -243,7 +241,6 @@ if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
else:
_mkdir_locking = False
_use_put_mount = False
- _relaxed_writes = False
_preallocate = False
if _mkdir_locking:
@@ -513,24 +510,25 @@ class Gluster_DiskFile(DiskFile):
# disk.
write_metadata(fd, metadata)
- if not _relaxed_writes:
- do_fsync(fd)
- if X_CONTENT_LENGTH in metadata:
- # Don't bother doing this before fsync in case the OS gets any
- # ideas to issue partial writes.
- fsize = int(metadata[X_CONTENT_LENGTH])
- self.drop_cache(fd, 0, fsize)
+ do_fsync(fd)
+ if X_CONTENT_LENGTH in metadata:
+ # Don't bother doing this before fsync in case the OS gets any
+ # ideas to issue partial writes.
+ fsize = int(metadata[X_CONTENT_LENGTH])
+ self.drop_cache(fd, 0, fsize)
# At this point we know that the object's full directory path exists,
# so we can just rename it directly without using Swift's
# swift.common.utils.renamer(), which makes the directory path and
# adds extra stat() calls.
data_file = os.path.join(self.put_datadir, self._obj)
+ attempts = 1
while True:
try:
os.rename(self.tmppath, data_file)
except OSError as err:
- if err.errno in (errno.ENOENT, errno.EIO):
+ if err.errno in (errno.ENOENT, errno.EIO) \
+ and attempts < MAX_RENAME_ATTEMPTS:
# FIXME: Why either of these two error conditions is
# happening is unknown at this point. This might be a FUSE
# issue of some sort or a possible race condition. So
@@ -578,6 +576,7 @@ class Gluster_DiskFile(DiskFile):
self.tmppath, data_file,
str(err), self.put_datadir,
dfstats))
+ attempts += 1
continue
else:
raise GlusterFileSystemOSError(
@@ -696,7 +695,8 @@ class Gluster_DiskFile(DiskFile):
# Assume the full directory path exists to the file already, and
# construct the proper name for the temporary file.
- for i in range(0, 1000):
+ attempts = 1
+ while True:
tmpfile = '.' + self._obj + '.' + md5(self._obj +
str(random.random())).hexdigest()
tmppath = os.path.join(self.put_datadir, tmpfile)
@@ -709,20 +709,29 @@ class Gluster_DiskFile(DiskFile):
excp = DiskFileNoSpace()
excp.drive = os.path.basename(self.device_path)
raise excp
+ if gerr.errno not in (errno.ENOENT, errno.EEXIST, errno.EIO):
+ # FIXME: Other cases we should handle?
+ raise
+ if attempts >= MAX_OPEN_ATTEMPTS:
+ # We failed after N attempts to create the temporary
+ # file.
+ raise DiskFileError('DiskFile.mkstemp(): failed to'
+ ' successfully create a temporary file'
+ ' without running into a name conflict'
+ ' after %d of %d attempts for: %s' % (
+ attempts, MAX_OPEN_ATTEMPTS,
+ data_file))
if gerr.errno == errno.EEXIST:
# Retry with a different random number.
- continue
- if gerr.errno == errno.EIO:
+ attempts += 1
+ elif gerr.errno == errno.EIO:
# FIXME: Possible FUSE issue or race condition, let's
# sleep on it and retry the operation.
_random_sleep()
logging.warn("DiskFile.mkstemp(): %s ... retrying in"
" 0.1 secs", gerr)
- continue
- if gerr.errno != errno.ENOENT:
- # FIXME: Other cases we should handle?
- raise
- if not self._obj_path:
+ attempts += 1
+ elif not self._obj_path:
# No directory hierarchy and the create failed telling us
# the container or volume directory does not exist. This
# could be a FUSE issue or some race condition, so let's
@@ -730,27 +739,22 @@ class Gluster_DiskFile(DiskFile):
_random_sleep()
logging.warn("DiskFile.mkstemp(): %s ... retrying in"
" 0.1 secs", gerr)
- continue
- if i != 0:
+ attempts += 1
+ elif attempts > 1:
# Got ENOENT after previously making the path. This could
# also be a FUSE issue or some race condition, nap and
# retry.
_random_sleep()
logging.warn("DiskFile.mkstemp(): %s ... retrying in"
" 0.1 secs" % gerr)
- continue
- # It looks like the path to the object does not already exist
- self._create_dir_object(self._obj_path)
- continue
+ attempts += 1
+ else:
+ # It looks like the path to the object does not already
+ # exist; don't count this as an attempt, though, since
+ # we perform the open() system call optimistically.
+ self._create_dir_object(self._obj_path)
else:
break
- else:
- # We failed after 1,000 attempts to create the temporary file.
- raise DiskFileError('DiskFile.mkstemp(): failed to successfully'
- ' create a temporary file without running'
- ' into a name conflict after 1,000 attempts'
- ' for: %s' % (data_file,))
-
self.tmppath = tmppath
try:
diff --git a/test/unit/common/test_diskfile.py b/test/unit/common/test_diskfile.py
index 410f113..564978f 100644
--- a/test/unit/common/test_diskfile.py
+++ b/test/unit/common/test_diskfile.py
@@ -25,11 +25,12 @@ import mock
from mock import patch
from hashlib import md5
-import gluster.swift.common.utils
-import gluster.swift.common.DiskFile
from swift.common.utils import normalize_timestamp
-from gluster.swift.common.DiskFile import Gluster_DiskFile
from swift.common.exceptions import DiskFileNotExist, DiskFileError
+
+from gluster.swift.common.DiskFile import Gluster_DiskFile, GlusterFileSystemOSError
+import gluster.swift.common.utils
+import gluster.swift.common.DiskFile
from gluster.swift.common.utils import DEFAULT_UID, DEFAULT_GID, X_TYPE, \
X_OBJECT_TYPE, DIR_OBJECT
from test_utils import _initxattr, _destroyxattr
@@ -596,6 +597,7 @@ class TestDiskFile(unittest.TestCase):
'ETag': etag,
'Content-Length': '5',
}
+
def mock_open(*args, **kwargs):
raise OSError(errno.ENOSPC, os.strerror(errno.ENOSPC))
@@ -611,6 +613,193 @@ class TestDiskFile(unittest.TestCase):
finally:
shutil.rmtree(td)
+ def test_put_rename_ENOENT(self):
+ td = tempfile.mkdtemp()
+ the_cont = os.path.join(td, "vol0", "bar")
+ try:
+ os.makedirs(the_cont)
+ gdf = Gluster_DiskFile(td, "vol0", "p57", "ufo47", "bar", "z", self.lg)
+ assert gdf._obj == "z"
+ assert gdf._obj_path == ""
+ assert gdf.name == "bar"
+ assert gdf.datadir == the_cont
+ assert gdf.data_file is None
+
+ body = '1234\n'
+ etag = md5()
+ etag.update(body)
+ etag = etag.hexdigest()
+ metadata = {
+ 'X-Timestamp': '1234',
+ 'Content-Type': 'file',
+ 'ETag': etag,
+ 'Content-Length': '5',
+ }
+
+ def mock_sleep(*args, **kwargs):
+ # Return without sleep, no need to dely unit tests
+ return
+
+ def mock_rename(*args, **kwargs):
+ raise OSError(errno.ENOENT, os.strerror(errno.ENOENT))
+
+ with mock.patch("gluster.swift.common.DiskFile.sleep", mock_sleep):
+ with mock.patch("os.rename", mock_rename):
+ try:
+ with gdf.mkstemp() as fd:
+ assert gdf.tmppath is not None
+ tmppath = gdf.tmppath
+ os.write(fd, body)
+ gdf.put(fd, metadata)
+ except GlusterFileSystemOSError:
+ pass
+ else:
+ self.fail("Expected exception DiskFileError")
+ finally:
+ shutil.rmtree(td)
+
+
+ def test_put_rename_ENOENT_filename_conflict(self):
+ td = tempfile.mkdtemp()
+ the_cont = os.path.join(td, "vol0", "bar")
+ try:
+ os.makedirs(the_cont)
+ gdf = Gluster_DiskFile(td, "vol0", "p57", "ufo47", "bar", "z", self.lg)
+ self.assertEqual(gdf._obj, "z")
+ self.assertEqual(gdf._obj_path, "")
+ self.assertEqual(gdf.name, "bar")
+ self.assertEqual(gdf.datadir, the_cont)
+ self.assertEqual(gdf.data_file, None)
+
+ body = '1234\n'
+ etag = md5()
+ etag.update(body)
+ etag = etag.hexdigest()
+ metadata = {
+ 'X-Timestamp': '1234',
+ 'Content-Type': 'file',
+ 'ETag': etag,
+ 'Content-Length': '5',
+ }
+ class MockOSStat:
+ pass
+
+ def mock_sleep(*args, **kwargs):
+ # Return without sleep, no need to dely unit tests
+ return
+
+ def mock_rename(*args, **kwargs):
+ raise OSError(errno.ENOENT, os.strerror(errno.ENOENT))
+
+ def mock_do_stat(*args, **kwars):
+ stat = MockOSStat()
+ stat.st_ino = 1
+ return stat
+
+ def mock_do_fstat(*args, **kwars):
+ stat = MockOSStat()
+ stat.st_ino = 2
+ return stat
+
+ with mock.patch("gluster.swift.common.DiskFile.sleep", mock_sleep):
+ with mock.patch("os.rename", mock_rename):
+ with mock.patch("gluster.swift.common.DiskFile.do_stat", mock_do_stat):
+ with mock.patch("gluster.swift.common.DiskFile.do_fstat", mock_do_fstat):
+ with gdf.mkstemp() as fd:
+ self.assertNotEqual(gdf.tmppath, None)
+ os.write(fd, body)
+ self.assertRaises(DiskFileError, gdf.put, fd, metadata)
+ finally:
+ shutil.rmtree(td)
+
+ def test_put_rename_ENOENT_bad_path_datafile_target(self):
+ td = tempfile.mkdtemp()
+ the_cont = os.path.join(td, "vol0", "bar")
+ try:
+ os.makedirs(the_cont)
+ gdf = Gluster_DiskFile(td, "vol0", "p57", "ufo47", "bar", "z", self.lg)
+ self.assertEqual(gdf._obj, "z")
+ self.assertEqual(gdf._obj_path, "")
+ self.assertEqual(gdf.name, "bar")
+ self.assertEqual(gdf.datadir, the_cont)
+ self.assertEqual(gdf.data_file, None)
+
+ body = '1234\n'
+ etag = md5()
+ etag.update(body)
+ etag = etag.hexdigest()
+ metadata = {
+ 'X-Timestamp': '1234',
+ 'Content-Type': 'file',
+ 'ETag': etag,
+ 'Content-Length': '5',
+ }
+
+ def mock_sleep(*args, **kwargs):
+ # Return without sleep, no need to dely unit tests
+ return
+
+ def mock_rename(*args, **kwargs):
+ raise OSError(errno.ENOENT, os.strerror(errno.ENOENT))
+
+ with mock.patch("gluster.swift.common.DiskFile.sleep", mock_sleep):
+ with mock.patch("os.rename", mock_rename):
+ with gdf.mkstemp() as fd:
+ self.assertNotEqual(gdf.tmppath, None)
+ os.write(fd, body)
+
+ # Purpusely make the datadir non-existent
+ nonexistdir = tempfile.mkdtemp()
+ shutil.rmtree(nonexistdir)
+ gdf.put_datadir = nonexistdir
+
+ self.assertRaises(DiskFileError, gdf.put, fd, metadata)
+ finally:
+ shutil.rmtree(td)
+
+ def test_put_rename_ENOENT_target_no_longer_a_dir(self):
+ td = tempfile.mkdtemp()
+ the_cont = os.path.join(td, "vol0", "bar")
+ try:
+ os.makedirs(the_cont)
+ gdf = Gluster_DiskFile(td, "vol0", "p57", "ufo47", "bar", "z", self.lg)
+ self.assertEqual(gdf._obj, "z")
+ self.assertEqual(gdf._obj_path, "")
+ self.assertEqual(gdf.name, "bar")
+ self.assertEqual(gdf.datadir, the_cont)
+ self.assertEqual(gdf.data_file, None)
+
+ body = '1234\n'
+ etag = md5()
+ etag.update(body)
+ etag = etag.hexdigest()
+ metadata = {
+ 'X-Timestamp': '1234',
+ 'Content-Type': 'file',
+ 'ETag': etag,
+ 'Content-Length': '5',
+ }
+
+ def mock_sleep(*args, **kwargs):
+ # Return without sleep, no need to dely unit tests
+ return
+
+ def mock_rename(*args, **kwargs):
+ raise OSError(errno.ENOENT, os.strerror(errno.ENOENT))
+
+ def mock_stat_S_ISDIR(*args, **kwargs):
+ return False
+
+ with mock.patch("gluster.swift.common.DiskFile.sleep", mock_sleep):
+ with mock.patch("os.rename", mock_rename):
+ with mock.patch("gluster.swift.common.DiskFile.stat.S_ISDIR", mock_stat_S_ISDIR):
+ with gdf.mkstemp() as fd:
+ self.assertNotEqual(gdf.tmppath, None)
+ os.write(fd, body)
+ self.assertRaises(DiskFileError, gdf.put, fd, metadata)
+ finally:
+ shutil.rmtree(td)
+
def test_put_obj_path(self):
the_obj_path = os.path.join("b", "a")
the_file = os.path.join(the_obj_path, "z")
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index 4aae4c3..e86a0e9 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -585,10 +585,12 @@ class TestUtils(unittest.TestCase):
os.chdir(orig_cwd)
shutil.rmtree(td)
- def test_get_container_details(self):
+ def test_get_container_details_and_size(self):
orig_cwd = os.getcwd()
+ __do_getsize = Glusterfs._do_getsize
td = tempfile.mkdtemp()
try:
+ Glusterfs._do_getsize = False
tf = tarfile.open("common/data/container_tree.tar.bz2", "r:bz2")
os.chdir(td)
tf.extractall()
@@ -610,6 +612,7 @@ class TestUtils(unittest.TestCase):
full_dir3: os.path.getmtime(full_dir3),
}
finally:
+ Glusterfs._do_getsize = __do_getsize
os.chdir(orig_cwd)
shutil.rmtree(td)