summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohammed Junaid <junaid@redhat.com>2013-06-27 13:13:34 +0530
committerPeter Portante <pportant@redhat.com>2013-06-27 17:21:18 -0700
commitcc97abca0482d3023c9d2fa9d0028098d08e818b (patch)
tree0ac33d40a0dd0cb1a4e61146773d3ce49d8f7fc4
parentc7ce3beec168cfc530da9e8d11fc1a0e8c80bcce (diff)
object-storage: remove stat of directories
It was painfully slow to gather a list of all the files in a directory tree when there were as many directories as files, since we also stat'd all of the directories as well. We only did that to cache using memcache the result, to know when it changed. However, there was no way for a memcache object to be passed down to this code. So we have removed the memcache support entirely, and removed the stat of the directories as a result. See BZ 911448 (https://bugzilla.redhat.com/show_bug.cgi?id=911448). Change-Id: I2625f82eca36c31eeffa84dc76ef7f3b48aafec5 Signed-off-by: Mohammed Junaid <junaid@redhat.com> Reviewed-on: http://review.gluster.org/5252 Reviewed-by: Peter Portante <pportant@redhat.com> Tested-by: Peter Portante <pportant@redhat.com>
-rw-r--r--gluster/swift/common/utils.py74
-rw-r--r--test/unit/common/test_utils.py251
2 files changed, 24 insertions, 301 deletions
diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py
index a10d27f..eeebf46 100644
--- a/gluster/swift/common/utils.py
+++ b/gluster/swift/common/utils.py
@@ -53,11 +53,6 @@ DEFAULT_UID = -1
DEFAULT_GID = -1
PICKLE_PROTOCOL = 2
CHUNK_SIZE = 65536
-MEMCACHE_KEY_PREFIX = 'gluster.swift.'
-MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + \
- 'account.details.'
-MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + \
- 'container.details.'
def read_metadata(path):
@@ -292,27 +287,12 @@ def _get_container_details_from_fs(cont_path):
return ContainerDetails(bytes_used, object_count, obj_list, dir_list)
-def get_container_details(cont_path, memcache=None):
+def get_container_details(cont_path):
"""
Return object_list, object_count and bytes_used.
"""
- mkey = ''
- if memcache:
- mkey = MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + cont_path
- cd = memcache.get(mkey)
- if cd:
- if not cd.dir_list:
- cd = None
- else:
- for (path, mtime) in cd.dir_list:
- if mtime != do_stat(path).st_mtime:
- cd = None
- else:
- cd = None
- if not cd:
- cd = _get_container_details_from_fs(cont_path)
- if memcache:
- memcache.set(mkey, cd)
+ cd = _get_container_details_from_fs(cont_path)
+
return cd.obj_list, cd.object_count, cd.bytes_used
@@ -330,12 +310,14 @@ class AccountDetails(object):
self.container_list = container_list
-def _get_account_details_from_fs(acc_path, acc_stats):
+def _get_account_details_from_fs(acc_path):
+ """
+ Return container_list and container_count.
+ """
container_list = []
container_count = 0
- if not acc_stats:
- acc_stats = do_stat(acc_path)
+ acc_stats = do_stat(acc_path)
is_dir = (acc_stats.st_mode & 0040000) != 0
if is_dir:
for name in do_listdir(acc_path):
@@ -349,29 +331,12 @@ def _get_account_details_from_fs(acc_path, acc_stats):
return AccountDetails(acc_stats.st_mtime, container_count, container_list)
-def get_account_details(acc_path, memcache=None):
+def get_account_details(acc_path):
"""
Return container_list and container_count.
"""
- acc_stats = None
- mkey = ''
- if memcache:
- mkey = MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + acc_path
- ad = memcache.get(mkey)
- if ad:
- # FIXME: Do we really need to stat the file? If we are object
- # only, then we can track the other Swift HTTP APIs that would
- # modify the account and invalidate the cached entry there. If we
- # are not object only, are we even called on this path?
- acc_stats = do_stat(acc_path)
- if ad.mtime != acc_stats.st_mtime:
- ad = None
- else:
- ad = None
- if not ad:
- ad = _get_account_details_from_fs(acc_path, acc_stats)
- if memcache:
- memcache.set(mkey, ad)
+ ad = _get_account_details_from_fs(acc_path)
+
return ad.container_list, ad.container_count
@@ -422,12 +387,11 @@ def _add_timestamp(metadata_i):
return metadata
-def get_container_metadata(cont_path, memcache=None):
+def get_container_metadata(cont_path):
objects = []
object_count = 0
bytes_used = 0
- objects, object_count, bytes_used = get_container_details(cont_path,
- memcache)
+ objects, object_count, bytes_used = get_container_details(cont_path)
metadata = {X_TYPE: CONTAINER,
X_TIMESTAMP: normalize_timestamp(
os_path.getctime(cont_path)),
@@ -438,10 +402,10 @@ def get_container_metadata(cont_path, memcache=None):
return _add_timestamp(metadata)
-def get_account_metadata(acc_path, memcache=None):
+def get_account_metadata(acc_path):
containers = []
container_count = 0
- containers, container_count = get_account_details(acc_path, memcache)
+ containers, container_count = get_account_details(acc_path)
metadata = {X_TYPE: ACCOUNT,
X_TIMESTAMP: normalize_timestamp(
os_path.getctime(acc_path)),
@@ -470,13 +434,13 @@ def create_object_metadata(obj_path):
return restore_metadata(obj_path, metadata)
-def create_container_metadata(cont_path, memcache=None):
- metadata = get_container_metadata(cont_path, memcache)
+def create_container_metadata(cont_path):
+ metadata = get_container_metadata(cont_path)
return restore_metadata(cont_path, metadata)
-def create_account_metadata(acc_path, memcache=None):
- metadata = get_account_metadata(acc_path, memcache)
+def create_account_metadata(acc_path):
+ metadata = get_account_metadata(acc_path)
return restore_metadata(acc_path, metadata)
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index 06f1a46..566c70e 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -422,7 +422,7 @@ class TestUtils(unittest.TestCase):
os.rmdir(td)
def test_get_container_metadata(self):
- def _mock_get_container_details(path, memcache=None):
+ def _mock_get_container_details(path):
o_list = [ 'a', 'b', 'c' ]
o_count = 3
b_used = 47
@@ -445,7 +445,7 @@ class TestUtils(unittest.TestCase):
os.rmdir(td)
def test_get_account_metadata(self):
- def _mock_get_account_details(path, memcache=None):
+ def _mock_get_account_details(path):
c_list = [ '123', 'abc' ]
c_count = 2
return c_list, c_count
@@ -520,247 +520,6 @@ class TestUtils(unittest.TestCase):
finally:
os.rmdir(td)
- def test_container_details_uncached(self):
- the_path = "/tmp/bar"
- def mock_get_container_details_from_fs(cont_path):
- bu = 5
- oc = 1
- ol = ['foo',]
- dl = [('a',100),]
- return utils.ContainerDetails(bu, oc, ol, dl)
- orig_gcdff = utils._get_container_details_from_fs
- utils._get_container_details_from_fs = mock_get_container_details_from_fs
- try:
- retval = utils.get_container_details(the_path)
- cd = mock_get_container_details_from_fs(the_path)
- assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
- finally:
- utils._get_container_details_from_fs = orig_gcdff
-
- def test_container_details_cached_hit(self):
- mc = SimMemcache()
- the_path = "/tmp/bar"
- def mock_get_container_details_from_fs(cont_path, bu_p=5):
- bu = bu_p
- oc = 1
- ol = ['foo',]
- dl = [('a',100),]
- return utils.ContainerDetails(bu, oc, ol, dl)
- def mock_do_stat(path):
- class MockStat(object):
- def __init__(self, mtime):
- self.st_mtime = mtime
- return MockStat(100)
- cd = mock_get_container_details_from_fs(the_path, bu_p=6)
- mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path, cd)
- orig_gcdff = utils._get_container_details_from_fs
- utils._get_container_details_from_fs = mock_get_container_details_from_fs
- orig_ds = utils.do_stat
- utils.do_stat = mock_do_stat
- try:
- retval = utils.get_container_details(the_path, memcache=mc)
- # If it did not properly use memcache, the default mocked version
- # of get details from fs would return 5 bytes used instead of the
- # 6 we specified above.
- cd = mock_get_container_details_from_fs(the_path, bu_p=6)
- assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
- finally:
- utils._get_container_details_from_fs = orig_gcdff
- utils.do_stat = orig_ds
-
- def test_container_details_cached_miss_key(self):
- mc = SimMemcache()
- the_path = "/tmp/bar"
- def mock_get_container_details_from_fs(cont_path, bu_p=5):
- bu = bu_p
- oc = 1
- ol = ['foo',]
- dl = [('a',100),]
- return utils.ContainerDetails(bu, oc, ol, dl)
- def mock_do_stat(path):
- # Be sure we don't miss due to mtimes not matching
- self.fail("do_stat should not have been called")
- cd = mock_get_container_details_from_fs(the_path + "u", bu_p=6)
- mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path + "u", cd)
- orig_gcdff = utils._get_container_details_from_fs
- utils._get_container_details_from_fs = mock_get_container_details_from_fs
- orig_ds = utils.do_stat
- utils.do_stat = mock_do_stat
- try:
- retval = utils.get_container_details(the_path, memcache=mc)
- cd = mock_get_container_details_from_fs(the_path)
- assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
- mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path
- assert mkey in mc._d
- finally:
- utils._get_container_details_from_fs = orig_gcdff
- utils.do_stat = orig_ds
-
- def test_container_details_cached_miss_dir_list(self):
- mc = SimMemcache()
- the_path = "/tmp/bar"
- def mock_get_container_details_from_fs(cont_path, bu_p=5):
- bu = bu_p
- oc = 1
- ol = ['foo',]
- dl = []
- return utils.ContainerDetails(bu, oc, ol, dl)
- def mock_do_stat(path):
- # Be sure we don't miss due to mtimes not matching
- self.fail("do_stat should not have been called")
- cd = mock_get_container_details_from_fs(the_path, bu_p=6)
- mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path, cd)
- orig_gcdff = utils._get_container_details_from_fs
- utils._get_container_details_from_fs = mock_get_container_details_from_fs
- orig_ds = utils.do_stat
- utils.do_stat = mock_do_stat
- try:
- retval = utils.get_container_details(the_path, memcache=mc)
- cd = mock_get_container_details_from_fs(the_path)
- assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
- mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path
- assert mkey in mc._d
- assert 5 == mc._d[mkey].bytes_used
- finally:
- utils._get_container_details_from_fs = orig_gcdff
- utils.do_stat = orig_ds
-
- def test_container_details_cached_miss_mtime(self):
- mc = SimMemcache()
- the_path = "/tmp/bar"
- def mock_get_container_details_from_fs(cont_path, bu_p=5):
- bu = bu_p
- oc = 1
- ol = ['foo',]
- dl = [('a',100),]
- return utils.ContainerDetails(bu, oc, ol, dl)
- def mock_do_stat(path):
- # Be sure we miss due to mtimes not matching
- class MockStat(object):
- def __init__(self, mtime):
- self.st_mtime = mtime
- return MockStat(200)
- cd = mock_get_container_details_from_fs(the_path, bu_p=6)
- mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path, cd)
- orig_gcdff = utils._get_container_details_from_fs
- utils._get_container_details_from_fs = mock_get_container_details_from_fs
- orig_ds = utils.do_stat
- utils.do_stat = mock_do_stat
- try:
- retval = utils.get_container_details(the_path, memcache=mc)
- cd = mock_get_container_details_from_fs(the_path)
- assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
- mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path
- assert mkey in mc._d
- assert 5 == mc._d[mkey].bytes_used
- finally:
- utils._get_container_details_from_fs = orig_gcdff
- utils.do_stat = orig_ds
-
- def test_account_details_uncached(self):
- the_path = "/tmp/bar"
- def mock_get_account_details_from_fs(acc_path, acc_stats):
- mt = 100
- cc = 2
- cl = ['a', 'b']
- return utils.AccountDetails(mt, cc, cl)
- orig_gcdff = utils._get_account_details_from_fs
- utils._get_account_details_from_fs = mock_get_account_details_from_fs
- try:
- retval = utils.get_account_details(the_path)
- ad = mock_get_account_details_from_fs(the_path, None)
- assert retval == (ad.container_list, ad.container_count)
- finally:
- utils._get_account_details_from_fs = orig_gcdff
-
- def test_account_details_cached_hit(self):
- mc = SimMemcache()
- the_path = "/tmp/bar"
- def mock_get_account_details_from_fs(acc_path, acc_stats):
- mt = 100
- cc = 2
- cl = ['a', 'b']
- return utils.AccountDetails(mt, cc, cl)
- def mock_do_stat(path):
- class MockStat(object):
- def __init__(self, mtime):
- self.st_mtime = mtime
- return MockStat(100)
- ad = mock_get_account_details_from_fs(the_path, None)
- ad.container_list = ['x', 'y']
- mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + the_path, ad)
- orig_gcdff = utils._get_account_details_from_fs
- orig_ds = utils.do_stat
- utils._get_account_details_from_fs = mock_get_account_details_from_fs
- utils.do_stat = mock_do_stat
- try:
- retval = utils.get_account_details(the_path, memcache=mc)
- assert retval == (ad.container_list, ad.container_count)
- wrong_ad = mock_get_account_details_from_fs(the_path, None)
- assert wrong_ad != ad
- finally:
- utils._get_account_details_from_fs = orig_gcdff
- utils.do_stat = orig_ds
-
- def test_account_details_cached_miss(self):
- mc = SimMemcache()
- the_path = "/tmp/bar"
- def mock_get_account_details_from_fs(acc_path, acc_stats):
- mt = 100
- cc = 2
- cl = ['a', 'b']
- return utils.AccountDetails(mt, cc, cl)
- def mock_do_stat(path):
- class MockStat(object):
- def __init__(self, mtime):
- self.st_mtime = mtime
- return MockStat(100)
- ad = mock_get_account_details_from_fs(the_path, None)
- ad.container_list = ['x', 'y']
- mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + the_path + 'u', ad)
- orig_gcdff = utils._get_account_details_from_fs
- orig_ds = utils.do_stat
- utils._get_account_details_from_fs = mock_get_account_details_from_fs
- utils.do_stat = mock_do_stat
- try:
- retval = utils.get_account_details(the_path, memcache=mc)
- correct_ad = mock_get_account_details_from_fs(the_path, None)
- assert retval == (correct_ad.container_list, correct_ad.container_count)
- assert correct_ad != ad
- finally:
- utils._get_account_details_from_fs = orig_gcdff
- utils.do_stat = orig_ds
-
- def test_account_details_cached_miss_mtime(self):
- mc = SimMemcache()
- the_path = "/tmp/bar"
- def mock_get_account_details_from_fs(acc_path, acc_stats):
- mt = 100
- cc = 2
- cl = ['a', 'b']
- return utils.AccountDetails(mt, cc, cl)
- def mock_do_stat(path):
- class MockStat(object):
- def __init__(self, mtime):
- self.st_mtime = mtime
- return MockStat(100)
- ad = mock_get_account_details_from_fs(the_path, None)
- ad.container_list = ['x', 'y']
- ad.mtime = 200
- mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + the_path, ad)
- orig_gcdff = utils._get_account_details_from_fs
- orig_ds = utils.do_stat
- utils._get_account_details_from_fs = mock_get_account_details_from_fs
- utils.do_stat = mock_do_stat
- try:
- retval = utils.get_account_details(the_path, memcache=mc)
- correct_ad = mock_get_account_details_from_fs(the_path, None)
- assert retval == (correct_ad.container_list, correct_ad.container_count)
- assert correct_ad != ad
- finally:
- utils._get_account_details_from_fs = orig_gcdff
- utils.do_stat = orig_ds
-
def test_get_account_details_from_fs(self):
orig_cwd = os.getcwd()
td = tempfile.mkdtemp()
@@ -769,7 +528,7 @@ class TestUtils(unittest.TestCase):
os.chdir(td)
tf.extractall()
- ad = utils._get_account_details_from_fs(td, None)
+ ad = utils._get_account_details_from_fs(td)
assert ad.mtime == os.path.getmtime(td)
assert ad.container_count == 3
assert set(ad.container_list) == set(['c1', 'c2', 'c3'])
@@ -887,14 +646,14 @@ class TestUtils(unittest.TestCase):
def test_get_account_details_from_fs_notadir_w_stats(self):
tf = tempfile.NamedTemporaryFile()
- ad = utils._get_account_details_from_fs(tf.name, os.stat(tf.name))
+ ad = utils._get_account_details_from_fs(tf.name)
assert ad.mtime == os.path.getmtime(tf.name)
assert ad.container_count == 0
assert ad.container_list == []
def test_get_account_details_from_fs_notadir(self):
tf = tempfile.NamedTemporaryFile()
- ad = utils._get_account_details_from_fs(tf.name, None)
+ ad = utils._get_account_details_from_fs(tf.name)
assert ad.mtime == os.path.getmtime(tf.name)
assert ad.container_count == 0
assert ad.container_list == []