From c5d76cdd2e2e99d4ac65b645b17cf8a43e4ccab4 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Tue, 8 Sep 2015 15:44:09 +0530 Subject: Do not use pickle: Use json Change-Id: Iffdd56704330897fbde21f101c9b2ed03c2ae296 Signed-off-by: Prashanth Pai Reviewed-by: Thiago da Silva Tested-by: Thiago da Silva Reviewed-on: http://review.gluster.org/13221 --- bin/gluster-swift-migrate-metadata | 162 +++++++++++++++++++++++++++++++++++++ etc/fs.conf-gluster | 13 ++- gluster/swift/common/Glusterfs.py | 8 ++ gluster/swift/common/utils.py | 70 ++++++++++++++-- setup.py | 1 + test/functional/tests.py | 12 +++ test/unit/common/test_utils.py | 82 ++++++++++++++++++- 7 files changed, 339 insertions(+), 9 deletions(-) create mode 100755 bin/gluster-swift-migrate-metadata diff --git a/bin/gluster-swift-migrate-metadata b/bin/gluster-swift-migrate-metadata new file mode 100755 index 0000000..2ccf157 --- /dev/null +++ b/bin/gluster-swift-migrate-metadata @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# +# Copyright (c) 2015 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pwd +import sys +import stat +import errno +import xattr +import cPickle as pickle +import multiprocessing + +from optparse import OptionParser +from gluster.swift.common.utils import write_metadata, SafeUnpickler, \ + METADATA_KEY, MAX_XATTR_SIZE + + +ORIGINAL_EUID = os.geteuid() +NOBODY_UID = pwd.getpwnam('nobody').pw_uid + + +def print_msg(s): + global options + if options.verbose: + print(s) + + +def clean_metadata(path, key_count): + """ + Can only be used when you know the key_count. Saves one unnecessarry + removexattr() call. Ignores error when file or metadata isn't found. + """ + for key in xrange(0, key_count): + try: + xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or ''))) + except IOError as err: + if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA): + print_msg("xattr.removexattr(%s, %s%s) failed: %s" % + (path, METADATA_KEY, (key or ''), err.errno)) + + +def process_object(path): + + metastr = '' + key_count = 0 + try: + while True: + metastr += xattr.getxattr(path, '%s%s' % + (METADATA_KEY, (key_count or ''))) + key_count += 1 + if len(metastr) < MAX_XATTR_SIZE: + # Prevent further getxattr calls + break + except IOError as err: + if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA): + print_msg("xattr.getxattr(%s, %s%s) failed: %s" % + (path, METADATA_KEY, (key_count or ''), err.errno)) + + if not metastr: + return + + if metastr.startswith('\x80\x02}') and metastr.endswith('.'): + # It's pickled. If unpickling is successful and metadata is + # not stale write back the metadata by serializing it. + try: + os.seteuid(NOBODY_UID) # Drop privileges + metadata = SafeUnpickler.loads(metastr) + os.seteuid(ORIGINAL_EUID) # Restore privileges + assert isinstance(metadata, dict) + except (pickle.UnpicklingError, EOFError, AttributeError, + IndexError, ImportError, AssertionError): + clean_metadata(path, key_count) + else: + try: + # Remove existing metadata first before writing new metadata + clean_metadata(path, key_count) + write_metadata(path, metadata) + print_msg("%s MIGRATED" % (path)) + except IOError as err: + if err.errno not in (errno.ENOENT, errno.ESTALE): + raise + elif metastr.startswith("{") and metastr.endswith("}"): + # It's not pickled and is already serialized, just return + print_msg("%s SKIPPED" % (path)) + else: + # Metadata is malformed + clean_metadata(path, key_count) + print_msg("%s CLEANED" % (path)) + + +def walktree(top, pool, root=True): + """ + Recursively walk the filesystem tree and migrate metadata of each object + found. Unlike os.walk(), this method performs stat() sys call on a + file/directory at most only once. + """ + + if root: + # The root of volume is account which also contains metadata + pool.apply_async(process_object, (top, )) + + for f in os.listdir(top): + if root and f in (".trashcan", ".glusterfs", "async_pending", "tmp"): + continue + path = os.path.join(top, f) + try: + s = os.stat(path) + except OSError as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + continue + raise + if stat.S_ISLNK(s.st_mode): + pass + elif stat.S_ISDIR(s.st_mode): + pool.apply_async(process_object, (path, )) + # Recurse into directory + walktree(path, pool, root=False) + elif stat.S_ISREG(s.st_mode): + pool.apply_async(process_object, (path, )) + + +if __name__ == '__main__': + + global options + + usage = "usage: %prog [options] volume1_mountpath volume2_mountpath..." + description = """Account, container and object metadata are stored as \ +extended attributes of files and directories. This utility migrates metadata \ +stored in pickled format to JSON format.""" + parser = OptionParser(usage=usage, description=description) + parser.add_option("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="Print object paths as they are processed.") + (options, mount_paths) = parser.parse_args() + + if len(mount_paths) < 1: + print "Mountpoint path(s) missing." + parser.print_usage() + sys.exit(-1) + + pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2) + + for path in mount_paths: + if os.path.isdir(path): + walktree(path, pool) + + pool.close() + pool.join() diff --git a/etc/fs.conf-gluster b/etc/fs.conf-gluster index 6d2a791..31a5e6f 100644 --- a/etc/fs.conf-gluster +++ b/etc/fs.conf-gluster @@ -10,4 +10,15 @@ mount_ip = localhost # numbers of objects, at the expense of an accurate count of combined bytes # used by all objects in the container. For most installations "off" works # fine. -accurate_size_in_listing = off \ No newline at end of file +accurate_size_in_listing = off + +# In older versions of gluster-swift, metadata stored as xattrs of dirs/files +# were serialized using PICKLE format. The PICKLE format is vulnerable to +# exploits in deployments where a user has access to backend filesystem over +# FUSE/SMB. Deserializing pickled metadata can result in malicious code being +# executed if an attacker has stored malicious code as xattr from filesystem +# interface. Although, new metadata is always serialized using JSON format, +# existing metadata already stored in PICKLE format are loaded by default. +# You can turn this option to 'off' once you have migrated all your metadata +# from PICKLE format to JSON format using gluster-swift-migrate-metadata tool. +read_pickled_metadata = on diff --git a/gluster/swift/common/Glusterfs.py b/gluster/swift/common/Glusterfs.py index 0098bff..6a2fdb2 100644 --- a/gluster/swift/common/Glusterfs.py +++ b/gluster/swift/common/Glusterfs.py @@ -37,6 +37,7 @@ _allow_mount_per_server = False _implicit_dir_objects = False _container_update_object_count = False _account_update_container_count = False +_read_pickled_metadata = True if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')): try: @@ -97,6 +98,13 @@ if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')): except (NoSectionError, NoOptionError): pass + try: + _read_pickled_metadata = _fs_conf.get('DEFAULT', + 'read_pickled_metadata', + "on") in TRUE_VALUES + except (NoSectionError, NoOptionError): + pass + NAME = 'glusterfs' diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py index 9951132..b6a5a09 100644 --- a/gluster/swift/common/utils.py +++ b/gluster/swift/common/utils.py @@ -15,13 +15,17 @@ import os import stat +import json import errno import logging from hashlib import md5 from eventlet import sleep import cPickle as pickle +from cStringIO import StringIO +import pickletools from gluster.swift.common.exceptions import GlusterFileSystemIOError from swift.common.exceptions import DiskFileNoSpace +from swift.common.db import utf8encodekeys from gluster.swift.common.fs_utils import do_getctime, do_getmtime, do_stat, \ do_listdir, do_walk, do_rmdir, do_log_rl, get_filename_from_fd, do_open, \ do_isdir, do_getsize, do_getxattr, do_setxattr, do_removexattr, do_read, \ @@ -57,6 +61,39 @@ PICKLE_PROTOCOL = 2 CHUNK_SIZE = 65536 +class SafeUnpickler(object): + """ + Loading a pickled stream is potentially unsafe and exploitable because + the loading process can import modules/classes (via GLOBAL opcode) and + run any callable (via REDUCE opcode). As the metadata stored in Swift + is just a dictionary, we take away these powerful "features", thus + making the loading process safe. Hence, this is very Swift specific + and is not a general purpose safe unpickler. + """ + + __slots__ = 'OPCODE_BLACKLIST' + OPCODE_BLACKLIST = ('GLOBAL', 'REDUCE', 'BUILD', 'OBJ', 'NEWOBJ', 'INST', + 'EXT1', 'EXT2', 'EXT4') + + @classmethod + def find_class(self, module, name): + # Do not allow importing of ANY module. This is really redundant as + # we block those OPCODEs that results in invocation of this method. + raise pickle.UnpicklingError('Potentially unsafe pickle') + + @classmethod + def loads(self, string): + for opcode in pickletools.genops(string): + if opcode[0].name in self.OPCODE_BLACKLIST: + raise pickle.UnpicklingError('Potentially unsafe pickle') + orig_unpickler = pickle.Unpickler(StringIO(string)) + orig_unpickler.find_global = self.find_class + return orig_unpickler.load() + + +pickle.loads = SafeUnpickler.loads + + def normalize_timestamp(timestamp): """ Format a timestamp (string or numeric) into a standardized @@ -73,7 +110,7 @@ def normalize_timestamp(timestamp): def serialize_metadata(metadata): - return pickle.dumps(metadata, PICKLE_PROTOCOL) + return json.dumps(metadata, separators=(',', ':')) def deserialize_metadata(metastr): @@ -81,16 +118,35 @@ def deserialize_metadata(metastr): Returns dict populated with metadata if deserializing is successful. Returns empty dict if deserialzing fails. """ - if metastr.startswith('\x80\x02}') and metastr.endswith('.'): - # Assert that the metadata was indeed pickled before attempting - # to unpickle. This only *reduces* risk of malicious shell code - # being executed. However, it does NOT fix anything. + if metastr.startswith('\x80\x02}') and metastr.endswith('.') and \ + Glusterfs._read_pickled_metadata: + # Assert that the serialized metadata is pickled using + # pickle protocol 2 and is a dictionary. try: return pickle.loads(metastr) - except (pickle.UnpicklingError, EOFError, AttributeError, - IndexError, ImportError, AssertionError): + except Exception: + logging.warning("pickle.loads() failed.", exc_info=True) + return {} + elif metastr.startswith('{') and metastr.endswith('}'): + + def _list_to_tuple(d): + for k, v in d.iteritems(): + if isinstance(v, list): + d[k] = tuple(i.encode('utf-8') + if isinstance(i, unicode) else i for i in v) + if isinstance(v, unicode): + d[k] = v.encode('utf-8') + return d + + try: + metadata = json.loads(metastr, object_hook=_list_to_tuple) + utf8encodekeys(metadata) + return metadata + except (UnicodeDecodeError, ValueError): + logging.warning("json.loads() failed.", exc_info=True) return {} else: + logging.warning("Invalid metadata format (neither PICKLE nor JSON)") return {} diff --git a/setup.py b/setup.py index 5673bdc..214d8f1 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ setup( scripts=[ 'bin/gluster-swift-gen-builders', 'bin/gluster-swift-print-metadata', + 'bin/gluster-swift-migrate-metadata', 'gluster/swift/common/middleware/gswauth/bin/gswauth-add-account', 'gluster/swift/common/middleware/gswauth/bin/gswauth-add-user', 'gluster/swift/common/middleware/gswauth/bin/gswauth-cleanup-tokens', diff --git a/test/functional/tests.py b/test/functional/tests.py index ad87d7e..b8633b0 100644 --- a/test/functional/tests.py +++ b/test/functional/tests.py @@ -1125,6 +1125,9 @@ class TestFile(Base): self.assert_status(400) def testMetadataNumberLimit(self): + raise SkipTest("Bad test") + # TODO(ppai): Fix it in upstream swift first + # Refer to comments below number_limit = load_constraint('max_meta_count') size_limit = load_constraint('max_meta_overall_size') @@ -1137,10 +1140,13 @@ class TestFile(Base): metadata = {} while len(metadata.keys()) < i: key = Utils.create_ascii_name() + # The following line returns a valid utf8 byte sequence val = Utils.create_name() if len(key) > j: key = key[:j] + # This slicing done below can make the 'utf8' byte + # sequence invalid and hence it cannot be decoded. val = val[:j] size += len(key) + len(val) @@ -1154,6 +1160,9 @@ class TestFile(Base): self.assert_status(201) self.assert_(file_item.sync_metadata()) self.assert_status((201, 202)) + self.assert_(file_item.initialize()) + self.assert_status(200) + self.assertEqual(file_item.metadata, metadata) else: self.assertRaises(ResponseError, file_item.write) self.assert_status(400) @@ -1315,6 +1324,9 @@ class TestFile(Base): self.assert_(file_item.write()) self.assert_status(201) self.assert_(file_item.sync_metadata()) + self.assert_(file_item.initialize()) + self.assert_status(200) + self.assertEqual(file_item.metadata, metadata) else: self.assertRaises(ResponseError, file_item.write) self.assert_status(400) diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 1fea1fc..0b173be 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -16,6 +16,7 @@ """ Tests for common.utils """ import os +import json import unittest import errno import xattr @@ -23,10 +24,12 @@ import tempfile import hashlib import tarfile import shutil +import cPickle as pickle from collections import defaultdict from mock import patch, Mock from gluster.swift.common import utils, Glusterfs -from gluster.swift.common.utils import deserialize_metadata, serialize_metadata +from gluster.swift.common.utils import deserialize_metadata, \ + serialize_metadata, PICKLE_PROTOCOL from gluster.swift.common.exceptions import GlusterFileSystemOSError,\ GlusterFileSystemIOError from swift.common.exceptions import DiskFileNoSpace @@ -138,6 +141,32 @@ def _mock_os_fsync(fd): return +class TestSafeUnpickler(unittest.TestCase): + + class Exploit(object): + def __reduce__(self): + return (os.system, ('touch /tmp/pickle-exploit',)) + + def test_loads(self): + valid_md = {'key1': 'val1', 'key2': 'val2'} + for protocol in (0, 1, 2): + valid_dump = pickle.dumps(valid_md, protocol) + mal_dump = pickle.dumps(self.Exploit(), protocol) + # malicious dump is appended to valid dump + payload1 = valid_dump[:-1] + mal_dump + # malicious dump is prefixed to valid dump + payload2 = mal_dump[:-1] + valid_dump + # entire dump is malicious + payload3 = mal_dump + for payload in (payload1, payload2, payload3): + try: + utils.SafeUnpickler.loads(payload) + except pickle.UnpicklingError as err: + self.assertTrue('Potentially unsafe pickle' in err) + else: + self.fail("Expecting cPickle.UnpicklingError") + + class TestUtils(unittest.TestCase): """ Tests for common.utils """ @@ -321,6 +350,57 @@ class TestUtils(unittest.TestCase): assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt assert _xattr_op_cnt['set'] == 0, "%r" % _xattr_op_cnt + def test_deserialize_metadata_pickle(self): + orig__read_pickled_metadata = Glusterfs._read_pickled_metadata + orig_md = {'key1': 'value1', 'key2': 'value2'} + pickled_md = pickle.dumps(orig_md, PICKLE_PROTOCOL) + _m_pickle_loads = Mock(return_value={}) + try: + with patch('gluster.swift.common.utils.pickle.loads', + _m_pickle_loads): + # Conf option turned off + Glusterfs._read_pickled_metadata = False + # pickled + utils.deserialize_metadata(pickled_md) + self.assertFalse(_m_pickle_loads.called) + _m_pickle_loads.reset_mock() + # not pickled + utils.deserialize_metadata("not_pickle") + self.assertFalse(_m_pickle_loads.called) + _m_pickle_loads.reset_mock() + + # Conf option turned on + Glusterfs._read_pickled_metadata = True + # pickled + md = utils.deserialize_metadata(pickled_md) + self.assertTrue(_m_pickle_loads.called) + self.assertTrue(isinstance(md, dict)) + _m_pickle_loads.reset_mock() + # not pickled + utils.deserialize_metadata("not_pickle") + self.assertFalse(_m_pickle_loads.called) + _m_pickle_loads.reset_mock() + + # malformed pickle + _m_pickle_loads.side_effect = pickle.UnpicklingError + md = utils.deserialize_metadata("malformed_pickle") + self.assertTrue(isinstance(md, dict)) + finally: + Glusterfs._read_pickled_metadata = orig__read_pickled_metadata + + def test_deserialize_metadata_json(self): + orig_md = {'key1': 'value1', 'key2': 'value2'} + json_md = json.dumps(orig_md, separators=(',', ':')) + _m_json_loads = Mock(return_value={}) + with patch('gluster.swift.common.utils.json.loads', + _m_json_loads): + utils.deserialize_metadata("not_json") + self.assertFalse(_m_json_loads.called) + _m_json_loads.reset_mock() + utils.deserialize_metadata("{fake_valid_json}") + self.assertTrue(_m_json_loads.called) + _m_json_loads.reset_mock() + def test_add_timestamp_empty(self): orig = {} res = utils._add_timestamp(orig) -- cgit