diff options
author | Prashanth Pai <ppai@redhat.com> | 2016-03-11 17:56:46 +0530 |
---|---|---|
committer | Thiago da Silva <thiago@redhat.com> | 2016-05-03 12:47:05 -0700 |
commit | fddb5d4a918affe7837d523b56e53e33f3ae5408 (patch) | |
tree | 919445423bd690ff74a64bf7e96dce0e8ad06eb9 | |
parent | 933bc5ade145413b0c7307a12b9d0b4084e7d767 (diff) |
Open object only if it's going to be read
Open()ing an object is necessarry only in two cases:
* Serving a GET request
* Recalculating etag when metadata is stale
(can be triggered by any type of request)
This change ensures that for requests other than GET, a file is not
opened if the metadata is valid (size and etag accurate). Note that
if metadata is stale, the file is still opened and read to compute etag.
This patch does not change the behaviour of triggering metadata
validation and regeneration for non-GET requests.
Change-Id: Icefa4dec7d715ec9e6dd68ae7fe89a0d90fe71b3
Signed-off-by: Prashanth Pai <ppai@redhat.com>
Reviewed-on: http://review.gluster.org/13684
Reviewed-by: Thiago da Silva <thiago@redhat.com>
Tested-by: Thiago da Silva <thiago@redhat.com>
-rw-r--r-- | gluster/swift/obj/diskfile.py | 44 | ||||
-rw-r--r-- | test/unit/obj/test_diskfile.py | 48 |
2 files changed, 86 insertions, 6 deletions
diff --git a/gluster/swift/obj/diskfile.py b/gluster/swift/obj/diskfile.py index 14625d9..2e0837e 100644 --- a/gluster/swift/obj/diskfile.py +++ b/gluster/swift/obj/diskfile.py @@ -613,11 +613,13 @@ class DiskFile(object): raise DiskFileNotExist raise try: - self._stat = do_fstat(self._fd) + if not self._stat: + self._stat = do_fstat(self._fd) self._is_dir = stat.S_ISDIR(self._stat.st_mode) obj_size = self._stat.st_size - self._metadata = read_metadata(self._fd) + if not self._metadata: + self._metadata = read_metadata(self._fd) if not validate_object(self._metadata, self._stat): self._metadata = create_object_metadata(self._fd, self._stat, self._metadata) @@ -731,12 +733,42 @@ class DiskFile(object): This method is invoked by Swift code in POST, PUT, HEAD and DELETE path metadata = disk_file.read_metadata() + The operations performed here is very similar to those made in open(). + This is to avoid opening and closing of file (two syscalls over + network). IOW, this optimization addresses the case where the fd + returned by open() isn't going to be used i.e the file is not read (GET + or metadata recalculation) + :returns: metadata dictionary for an object :raises DiskFileError: this implementation will raise the same errors as the `open()` method. """ - with self.open(): - return self.get_metadata() + try: + self._metadata = read_metadata(self._data_file) + except (OSError, IOError) as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + raise DiskFileNotExist + raise err + + if self._metadata and self._is_object_expired(self._metadata): + raise DiskFileExpired(metadata=self._metadata) + + try: + self._stat = do_stat(self._data_file) + except (OSError, IOError) as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + raise DiskFileNotExist + raise err + + if not validate_object(self._metadata, self._stat): + # Metadata is stale/invalid. So open the object for reading + # to update Etag and other metadata. + with self.open(): + return self._metadata + else: + # Metadata is valid. Don't have to open the file. + self._filter_metadata() + return self._metadata def reader(self, iter_hook=None, keep_cache=False): """ @@ -1046,9 +1078,9 @@ class DiskFile(object): errors as the `create()` method. """ try: - metadata = read_metadata(self._data_file) + metadata = self._metadata or read_metadata(self._data_file) except (IOError, OSError) as err: - if err.errno != errno.ENOENT: + if err.errno not in (errno.ESTALE, errno.ENOENT): raise else: if metadata[X_TIMESTAMP] >= timestamp: diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 9ef186b..b190526 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -205,6 +205,54 @@ class TestDiskFile(unittest.TestCase): self.assertRaises(DiskFileNotOpen, gdf.reader) self.assertRaises(DiskFileNotOpen, gdf.__enter__) + def test_read_metadata_optimize_open_close(self): + the_path = os.path.join(self.td, "vol0", "bar") + the_file = os.path.join(the_path, "z") + os.makedirs(the_path) + with open(the_file, "wb") as fd: + fd.write("1234") + init_md = { + 'X-Type': 'Object', + 'X-Object-Type': 'file', + 'Content-Length': 4, + 'ETag': md5("1234").hexdigest(), + 'X-Timestamp': normalize_timestamp(os.stat(the_file).st_ctime), + 'Content-Type': 'application/octet-stream'} + _metadata[_mapit(the_file)] = init_md + gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z") + assert gdf._obj == "z" + assert gdf._fd is None + assert gdf._disk_file_open is False + assert gdf._metadata is None + assert not gdf._is_dir + + # Case 1 + # Ensure that reading metadata for non-GET requests + # does not incur opening and closing the file when + # metadata is NOT stale. + mock_open = Mock() + mock_close = Mock() + with mock.patch("gluster.swift.obj.diskfile.do_open", mock_open): + with mock.patch("gluster.swift.obj.diskfile.do_close", mock_close): + md = gdf.read_metadata() + self.assertEqual(md, init_md) + self.assertFalse(mock_open.called) + self.assertFalse(mock_close.called) + + # Case 2 + # Ensure that reading metadata for non-GET requests + # still opens and reads the file when metadata is stale + with open(the_file, "a") as fd: + # Append to the existing file to make the stored metadata + # invalid/stale. + fd.write("5678") + md = gdf.read_metadata() + # Check that the stale metadata is recalculated to account for + # change in file content + self.assertNotEqual(md, init_md) + self.assertEqual(md['Content-Length'], 8) + self.assertEqual(md['ETag'], md5("12345678").hexdigest()) + def test_open_and_close(self): mock_close = Mock() |