From ac33dc6dbf1f982cf522556aa938ebfb0e6ddded Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Thu, 27 Aug 2015 11:36:19 +0530 Subject: Refactor read_metadata() method This change: * Simplifies read_metadata() method. * Validates pickle header before attempting to unpickle. This change does NOT fix the security vulnerability itself. That would be sent as a separate change. Change-Id: Id95bd584f3ad00fb075456544495f17f7038f991 Signed-off-by: Prashanth Pai Reviewed-by: Thiago da Silva Tested-by: Thiago da Silva Reviewed-on: http://review.gluster.org/13220 --- gluster/swift/common/utils.py | 91 ++++++++++++++++++++------------------ test/unit/common/test_diskdir.py | 8 ++-- test/unit/common/test_utils.py | 33 +++++++------- tools/object_expirer_functional.sh | 1 + 4 files changed, 69 insertions(+), 64 deletions(-) diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py index 95c8739..9951132 100644 --- a/gluster/swift/common/utils.py +++ b/gluster/swift/common/utils.py @@ -72,67 +72,72 @@ def normalize_timestamp(timestamp): return "%016.05f" % (float(timestamp)) +def serialize_metadata(metadata): + return pickle.dumps(metadata, PICKLE_PROTOCOL) + + +def deserialize_metadata(metastr): + """ + Returns dict populated with metadata if deserializing is successful. + Returns empty dict if deserialzing fails. + """ + if metastr.startswith('\x80\x02}') and metastr.endswith('.'): + # Assert that the metadata was indeed pickled before attempting + # to unpickle. This only *reduces* risk of malicious shell code + # being executed. However, it does NOT fix anything. + try: + return pickle.loads(metastr) + except (pickle.UnpicklingError, EOFError, AttributeError, + IndexError, ImportError, AssertionError): + return {} + else: + return {} + + def read_metadata(path_or_fd): """ - Helper function to read the pickled metadata from a File/Directory. + Helper function to read the serialized metadata from a File/Directory. :param path_or_fd: File/Directory path or fd from which to read metadata. :returns: dictionary of metadata """ - metadata = None - metadata_s = '' + metastr = '' key = 0 - while metadata is None: - try: - metadata_s += do_getxattr(path_or_fd, - '%s%s' % (METADATA_KEY, (key or ''))) - except IOError as err: - if err.errno == errno.ENODATA: - if key > 0: - # No errors reading the xattr keys, but since we have not - # been able to find enough chunks to get a successful - # unpickle operation, we consider the metadata lost, and - # drop the existing data so that the internal state can be - # recreated. - clean_metadata(path_or_fd) - # We either could not find any metadata key, or we could find - # some keys, but were not successful in performing the - # unpickling (missing keys perhaps)? Either way, just report - # to the caller we have no metadata. - metadata = {} - else: - # Note that we don't touch the keys on errors fetching the - # data since it could be a transient state. - raise GlusterFileSystemIOError( - err.errno, 'getxattr("%s", %s)' % (path_or_fd, key)) - else: - try: - # If this key provides all or the remaining part of the pickle - # data, we don't need to keep searching for more keys. This - # means if we only need to store data in N xattr key/value - # pair, we only need to invoke xattr get N times. With large - # keys sizes we are shooting for N = 1. - metadata = pickle.loads(metadata_s) - assert isinstance(metadata, dict) - except EOFError, pickle.UnpicklingError: - # We still are not able recognize this existing data collected - # as a pickled object. Make sure we loop around to try to get - # more from another xattr key. - metadata = None - key += 1 + try: + while True: + metastr += do_getxattr(path_or_fd, '%s%s' % + (METADATA_KEY, (key or ''))) + key += 1 + if len(metastr) < MAX_XATTR_SIZE: + # Prevent further getxattr calls + break + except IOError as err: + if err.errno != errno.ENODATA: + raise + + if not metastr: + return {} + + metadata = deserialize_metadata(metastr) + if not metadata: + # Empty dict i.e deserializing of metadata has failed, probably + # because it is invalid or incomplete or corrupt + clean_metadata(path_or_fd) + + assert isinstance(metadata, dict) return metadata def write_metadata(path_or_fd, metadata): """ - Helper function to write pickled metadata for a File/Directory. + Helper function to write serialized metadata for a File/Directory. :param path_or_fd: File/Directory path or fd to write the metadata :param metadata: dictionary of metadata write """ assert isinstance(metadata, dict) - metastr = pickle.dumps(metadata, PICKLE_PROTOCOL) + metastr = serialize_metadata(metadata) key = 0 while metastr: try: diff --git a/test/unit/common/test_diskdir.py b/test/unit/common/test_diskdir.py index ebc554a..3b95de8 100644 --- a/test/unit/common/test_diskdir.py +++ b/test/unit/common/test_diskdir.py @@ -18,7 +18,6 @@ import os import errno import tempfile -import cPickle as pickle import unittest import shutil import tarfile @@ -26,6 +25,7 @@ import hashlib from time import time from swift.common.utils import normalize_timestamp from gluster.swift.common import utils +from gluster.swift.common.utils import serialize_metadata, deserialize_metadata import gluster.swift.common.Glusterfs from test_utils import _initxattr, _destroyxattr, _setxattr, _getxattr from test.unit import FakeLogger @@ -283,7 +283,7 @@ class TestDiskCommon(unittest.TestCase): def test__dir_exists_read_metadata_exists(self): datadir = os.path.join(self.td, self.fake_drives[0]) fake_md = { "fake": (True,0) } - fake_md_p = pickle.dumps(fake_md, utils.PICKLE_PROTOCOL) + fake_md_p = serialize_metadata(fake_md) _setxattr(datadir, utils.METADATA_KEY, fake_md_p) dc = dd.DiskCommon(self.td, self.fake_drives[0], self.fake_accounts[0], self.fake_logger) @@ -339,7 +339,7 @@ class TestDiskCommon(unittest.TestCase): dc.update_metadata({'X-Container-Meta-foo': '42'}) assert 'X-Container-Meta-foo' in dc.metadata assert dc.metadata['X-Container-Meta-foo'] == '42' - md = pickle.loads(_getxattr(dc.datadir, utils.METADATA_KEY)) + md = deserialize_metadata(_getxattr(dc.datadir, utils.METADATA_KEY)) assert dc.metadata == md, "%r != %r" % (dc.metadata, md) del dc.metadata['X-Container-Meta-foo'] assert dc.metadata == md_copy @@ -1248,7 +1248,7 @@ class TestDiskAccount(unittest.TestCase): datadir = os.path.join(self.td, self.fake_drives[i]) fake_md = { "fake-drv-%d" % i: (True,0) } self.fake_md.append(fake_md) - fake_md_p = pickle.dumps(fake_md, utils.PICKLE_PROTOCOL) + fake_md_p = serialize_metadata(fake_md) _setxattr(datadir, utils.METADATA_KEY, fake_md_p) if i == 2: # Third drive has valid account metadata diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 55bd0cf..1fea1fc 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -19,7 +19,6 @@ import os import unittest import errno import xattr -import cPickle as pickle import tempfile import hashlib import tarfile @@ -27,6 +26,7 @@ import shutil from collections import defaultdict from mock import patch, Mock from gluster.swift.common import utils, Glusterfs +from gluster.swift.common.utils import deserialize_metadata, serialize_metadata from gluster.swift.common.exceptions import GlusterFileSystemOSError,\ GlusterFileSystemIOError from swift.common.exceptions import DiskFileNoSpace @@ -154,7 +154,7 @@ class TestUtils(unittest.TestCase): xkey = _xkey(path, utils.METADATA_KEY) assert len(_xattrs.keys()) == 1 assert xkey in _xattrs - assert orig_d == pickle.loads(_xattrs[xkey]) + assert orig_d == deserialize_metadata(_xattrs[xkey]) assert _xattr_op_cnt['set'] == 1 def test_write_metadata_err(self): @@ -205,13 +205,13 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert len(_xattrs[xkey]) <= utils.MAX_XATTR_SIZE payload += _xattrs[xkey] - assert orig_d == pickle.loads(payload) + assert orig_d == deserialize_metadata(payload) assert _xattr_op_cnt['set'] == 3, "%r" % _xattr_op_cnt def test_clean_metadata(self): path = "/tmp/foo/c" expected_d = {'a': 'y' * 150000} - expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + expected_p = serialize_metadata(expected_d) for i in range(0, 3): xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] @@ -223,7 +223,7 @@ class TestUtils(unittest.TestCase): def test_clean_metadata_err(self): path = "/tmp/foo/c" xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps({'a': 'y'}, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata({'a': 'y'}) _xattr_rem_err[xkey] = errno.EOPNOTSUPP try: utils.clean_metadata(path) @@ -237,7 +237,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/r" expected_d = {'a': 'y'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(expected_d) res_d = utils.read_metadata(path) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt @@ -252,7 +252,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/r" expected_d = {'a': 'y'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(expected_d) _xattr_get_err[xkey] = errno.EOPNOTSUPP try: utils.read_metadata(path) @@ -265,7 +265,7 @@ class TestUtils(unittest.TestCase): def test_read_metadata_multiple(self): path = "/tmp/foo/r" expected_d = {'a': 'y' * 150000} - expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + expected_p = serialize_metadata(expected_d) for i in range(0, 3): xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] @@ -273,12 +273,12 @@ class TestUtils(unittest.TestCase): assert not expected_p res_d = utils.read_metadata(path) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) - assert _xattr_op_cnt['get'] == 3, "%r" % _xattr_op_cnt + assert _xattr_op_cnt['get'] == 4, "%r" % _xattr_op_cnt def test_read_metadata_multiple_one_missing(self): path = "/tmp/foo/r" expected_d = {'a': 'y' * 150000} - expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + expected_p = serialize_metadata(expected_d) for i in range(0, 2): xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] @@ -287,7 +287,6 @@ class TestUtils(unittest.TestCase): res_d = utils.read_metadata(path) assert res_d == {} assert _xattr_op_cnt['get'] == 3, "%r" % _xattr_op_cnt - assert len(_xattrs.keys()) == 0, "Expected 0 keys, found %d" % len(_xattrs.keys()) def test_restore_metadata_none(self): # No initial metadata @@ -303,7 +302,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/i" initial_d = {'a': 'z'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(initial_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(initial_d) res_d = utils.restore_metadata(path, {'b': 'y'}) expected_d = {'a': 'z', 'b': 'y'} assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) @@ -315,7 +314,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/i" initial_d = {'a': 'z'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(initial_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(initial_d) res_d = utils.restore_metadata(path, {}) expected_d = {'a': 'z'} assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) @@ -420,7 +419,7 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['set'] == 1 - md = pickle.loads(_xattrs[xkey]) + md = deserialize_metadata(_xattrs[xkey]) assert r_md == md for key in self.obj_keys: @@ -442,7 +441,7 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['set'] == 1 - md = pickle.loads(_xattrs[xkey]) + md = deserialize_metadata(_xattrs[xkey]) assert r_md == md for key in self.obj_keys: @@ -515,7 +514,7 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['set'] == 1 - md = pickle.loads(_xattrs[xkey]) + md = deserialize_metadata(_xattrs[xkey]) assert r_md == md for key in self.cont_keys: @@ -541,7 +540,7 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['set'] == 1 - md = pickle.loads(_xattrs[xkey]) + md = deserialize_metadata(_xattrs[xkey]) assert r_md == md for key in self.acct_keys: diff --git a/tools/object_expirer_functional.sh b/tools/object_expirer_functional.sh index 2578619..40f2f81 100755 --- a/tools/object_expirer_functional.sh +++ b/tools/object_expirer_functional.sh @@ -31,6 +31,7 @@ cleanup() sudo rm -rf /mnt/gluster-object/test{,2}/* > /dev/null 2>&1 sudo rm -rf /mnt/gluster-object/gsexpiring/* > /dev/null 2>&1 sudo setfattr -x user.swift.metadata /mnt/gluster-object/test{,2} > /dev/null 2>&1 + sudo setfattr -x user.swift.metadata /mnt/gluster-object/gsexpiring > /dev/null 2>&1 } quit() -- cgit