summaryrefslogtreecommitdiffstats
path: root/ufo
diff options
context:
space:
mode:
authorMohammed Junaid <junaid@redhat.com>2013-04-07 06:05:56 +0530
committerAnand Avati <avati@redhat.com>2013-04-12 13:48:43 -0700
commitbbaa273468f8e5377027aedcabcaa076dd7fec7e (patch)
tree89447270516e604cd61d58a2ac9229716086cd6d /ufo
parentf34343d3751cd73e8eabe6d5544fb1f58b316595 (diff)
object-storage: turn off stat() for container list
Turn of stat() system calls used to fetch the file size during a container listing operation since these system calls can swamp Gluster and the result is most often not used. When a GET or HEAD request is made on a container, stat() system calls are made during the Python standard library method, os.walk, to determine if a given directory entry is another directory to recurse into, and then utils._update_list() will stat() each file to get it size, and finally utils.get_container_details_from_fs() will stat() each directory encountered. For most installations we have seen so far, we don't need the container listing to accurately return the size of all the objects in the container, so we can reduce the number of stat() system calls by not fetching the size of the object. For now, turn it off by default, and provide an /etc/swift/fs.conf configuration parameter to turn it back on: accurate_size_in_listing = yes The default for the above is "no". Change-Id: I7dde11e14bb32ecafa3eabb08852f1ffc4366b35 BUG: 903396 Signed-off-by: Mohammed Junaid <junaid@redhat.com> Reviewed-on: http://review.gluster.org/4787 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'ufo')
-rw-r--r--ufo/etc/fs.conf-gluster8
-rw-r--r--ufo/gluster/swift/common/Glusterfs.py7
-rw-r--r--ufo/gluster/swift/common/utils.py2
-rw-r--r--ufo/test/unit/common/test_utils.py43
4 files changed, 56 insertions, 4 deletions
diff --git a/ufo/etc/fs.conf-gluster b/ufo/etc/fs.conf-gluster
index 54822a43c4e..71a9b03130b 100644
--- a/ufo/etc/fs.conf-gluster
+++ b/ufo/etc/fs.conf-gluster
@@ -7,3 +7,11 @@ mount_ip = localhost
# methods besides UFO (not object only), which disables a caching
# optimizations in order to keep in sync with file system changes.
object_only = no
+
+# Performance optimization parameter. When turned off, the filesystem will
+# see a reduced number of stat calls, resulting in substantially faster
+# response time for GET and HEAD container requests on containers with large
+# numbers of objects, at the expense of an accurate count of combined bytes
+# used by all objects in the container. For most installations "off" works
+# fine.
+accurate_size_in_listing = off \ No newline at end of file
diff --git a/ufo/gluster/swift/common/Glusterfs.py b/ufo/gluster/swift/common/Glusterfs.py
index 6cbdf6c3551..1053610b386 100644
--- a/ufo/gluster/swift/common/Glusterfs.py
+++ b/ufo/gluster/swift/common/Glusterfs.py
@@ -27,6 +27,7 @@ MOUNT_IP = 'localhost'
OBJECT_ONLY = False
RUN_DIR='/var/run/swift'
SWIFT_DIR = '/etc/swift'
+_do_getsize = False
if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
try:
MOUNT_IP = _fs_conf.get('DEFAULT', 'mount_ip', 'localhost')
@@ -41,6 +42,12 @@ if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
except (NoSectionError, NoOptionError):
pass
+ try:
+ _do_getsize = _fs_conf.get('DEFAULT', 'accurate_size_in_listing', \
+ "no") in TRUE_VALUES
+ except (NoSectionError, NoOptionError):
+ pass
+
NAME = 'glusterfs'
diff --git a/ufo/gluster/swift/common/utils.py b/ufo/gluster/swift/common/utils.py
index 7e9f8a60f74..f2cd8dea10c 100644
--- a/ufo/gluster/swift/common/utils.py
+++ b/ufo/gluster/swift/common/utils.py
@@ -241,7 +241,7 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0,
object_count += 1
- if reg_file:
+ if Glusterfs._do_getsize and reg_file:
bytes_used += os_path.getsize(os.path.join(path, obj_name))
sleep()
diff --git a/ufo/test/unit/common/test_utils.py b/ufo/test/unit/common/test_utils.py
index 92ce9aef30f..c645509fa24 100644
--- a/ufo/test/unit/common/test_utils.py
+++ b/ufo/test/unit/common/test_utils.py
@@ -26,7 +26,7 @@ import tarfile
import shutil
from collections import defaultdict
from swift.common.utils import normalize_timestamp
-from gluster.swift.common import utils
+from gluster.swift.common import utils, Glusterfs
#
# Somewhat hacky way of emulating the operation of xattr calls. They are made
@@ -755,7 +755,7 @@ class TestUtils(unittest.TestCase):
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
- def test_get_container_details_from_fs(self):
+ def test_get_account_details_from_fs(self):
orig_cwd = os.getcwd()
td = tempfile.mkdtemp()
try:
@@ -779,7 +779,39 @@ class TestUtils(unittest.TestCase):
assert cd.obj_list == []
assert cd.dir_list == []
- def test_get_account_details_from_fs(self):
+ def test_get_container_details_from_fs(self):
+ orig_cwd = os.getcwd()
+ td = tempfile.mkdtemp()
+ try:
+ tf = tarfile.open("common/data/container_tree.tar.bz2", "r:bz2")
+ os.chdir(td)
+ tf.extractall()
+
+ cd = utils._get_container_details_from_fs(td)
+ assert cd.bytes_used == 0, repr(cd.bytes_used)
+ assert cd.object_count == 8, repr(cd.object_count)
+ assert set(cd.obj_list) == set(['file1', 'file3', 'file2',
+ 'dir3', 'dir1', 'dir2',
+ 'dir1/file1', 'dir1/file2'
+ ]), repr(cd.obj_list)
+
+ full_dir1 = os.path.join(td, 'dir1')
+ full_dir2 = os.path.join(td, 'dir2')
+ full_dir3 = os.path.join(td, 'dir3')
+ exp_dir_dict = { td: os.path.getmtime(td),
+ full_dir1: os.path.getmtime(full_dir1),
+ full_dir2: os.path.getmtime(full_dir2),
+ full_dir3: os.path.getmtime(full_dir3),
+ }
+ for d,m in cd.dir_list:
+ assert d in exp_dir_dict
+ assert exp_dir_dict[d] == m
+ finally:
+ os.chdir(orig_cwd)
+ shutil.rmtree(td)
+
+
+ def test_get_container_details_from_fs_do_getsize_true(self):
orig_cwd = os.getcwd()
td = tempfile.mkdtemp()
try:
@@ -787,6 +819,9 @@ class TestUtils(unittest.TestCase):
os.chdir(td)
tf.extractall()
+ __do_getsize = Glusterfs._do_getsize
+ Glusterfs._do_getsize = True
+
cd = utils._get_container_details_from_fs(td)
assert cd.bytes_used == 30, repr(cd.bytes_used)
assert cd.object_count == 8, repr(cd.object_count)
@@ -794,6 +829,7 @@ class TestUtils(unittest.TestCase):
'dir3', 'dir1', 'dir2',
'dir1/file1', 'dir1/file2'
]), repr(cd.obj_list)
+
full_dir1 = os.path.join(td, 'dir1')
full_dir2 = os.path.join(td, 'dir2')
full_dir3 = os.path.join(td, 'dir3')
@@ -806,6 +842,7 @@ class TestUtils(unittest.TestCase):
assert d in exp_dir_dict
assert exp_dir_dict[d] == m
finally:
+ Glusterfs._do_getsize = __do_getsize
os.chdir(orig_cwd)
shutil.rmtree(td)