From c5d76cdd2e2e99d4ac65b645b17cf8a43e4ccab4 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Tue, 8 Sep 2015 15:44:09 +0530 Subject: Do not use pickle: Use json Change-Id: Iffdd56704330897fbde21f101c9b2ed03c2ae296 Signed-off-by: Prashanth Pai Reviewed-by: Thiago da Silva Tested-by: Thiago da Silva Reviewed-on: http://review.gluster.org/13221 --- bin/gluster-swift-migrate-metadata | 162 +++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100755 bin/gluster-swift-migrate-metadata (limited to 'bin/gluster-swift-migrate-metadata') diff --git a/bin/gluster-swift-migrate-metadata b/bin/gluster-swift-migrate-metadata new file mode 100755 index 0000000..2ccf157 --- /dev/null +++ b/bin/gluster-swift-migrate-metadata @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# +# Copyright (c) 2015 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pwd +import sys +import stat +import errno +import xattr +import cPickle as pickle +import multiprocessing + +from optparse import OptionParser +from gluster.swift.common.utils import write_metadata, SafeUnpickler, \ + METADATA_KEY, MAX_XATTR_SIZE + + +ORIGINAL_EUID = os.geteuid() +NOBODY_UID = pwd.getpwnam('nobody').pw_uid + + +def print_msg(s): + global options + if options.verbose: + print(s) + + +def clean_metadata(path, key_count): + """ + Can only be used when you know the key_count. Saves one unnecessarry + removexattr() call. Ignores error when file or metadata isn't found. + """ + for key in xrange(0, key_count): + try: + xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or ''))) + except IOError as err: + if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA): + print_msg("xattr.removexattr(%s, %s%s) failed: %s" % + (path, METADATA_KEY, (key or ''), err.errno)) + + +def process_object(path): + + metastr = '' + key_count = 0 + try: + while True: + metastr += xattr.getxattr(path, '%s%s' % + (METADATA_KEY, (key_count or ''))) + key_count += 1 + if len(metastr) < MAX_XATTR_SIZE: + # Prevent further getxattr calls + break + except IOError as err: + if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA): + print_msg("xattr.getxattr(%s, %s%s) failed: %s" % + (path, METADATA_KEY, (key_count or ''), err.errno)) + + if not metastr: + return + + if metastr.startswith('\x80\x02}') and metastr.endswith('.'): + # It's pickled. If unpickling is successful and metadata is + # not stale write back the metadata by serializing it. + try: + os.seteuid(NOBODY_UID) # Drop privileges + metadata = SafeUnpickler.loads(metastr) + os.seteuid(ORIGINAL_EUID) # Restore privileges + assert isinstance(metadata, dict) + except (pickle.UnpicklingError, EOFError, AttributeError, + IndexError, ImportError, AssertionError): + clean_metadata(path, key_count) + else: + try: + # Remove existing metadata first before writing new metadata + clean_metadata(path, key_count) + write_metadata(path, metadata) + print_msg("%s MIGRATED" % (path)) + except IOError as err: + if err.errno not in (errno.ENOENT, errno.ESTALE): + raise + elif metastr.startswith("{") and metastr.endswith("}"): + # It's not pickled and is already serialized, just return + print_msg("%s SKIPPED" % (path)) + else: + # Metadata is malformed + clean_metadata(path, key_count) + print_msg("%s CLEANED" % (path)) + + +def walktree(top, pool, root=True): + """ + Recursively walk the filesystem tree and migrate metadata of each object + found. Unlike os.walk(), this method performs stat() sys call on a + file/directory at most only once. + """ + + if root: + # The root of volume is account which also contains metadata + pool.apply_async(process_object, (top, )) + + for f in os.listdir(top): + if root and f in (".trashcan", ".glusterfs", "async_pending", "tmp"): + continue + path = os.path.join(top, f) + try: + s = os.stat(path) + except OSError as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + continue + raise + if stat.S_ISLNK(s.st_mode): + pass + elif stat.S_ISDIR(s.st_mode): + pool.apply_async(process_object, (path, )) + # Recurse into directory + walktree(path, pool, root=False) + elif stat.S_ISREG(s.st_mode): + pool.apply_async(process_object, (path, )) + + +if __name__ == '__main__': + + global options + + usage = "usage: %prog [options] volume1_mountpath volume2_mountpath..." + description = """Account, container and object metadata are stored as \ +extended attributes of files and directories. This utility migrates metadata \ +stored in pickled format to JSON format.""" + parser = OptionParser(usage=usage, description=description) + parser.add_option("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="Print object paths as they are processed.") + (options, mount_paths) = parser.parse_args() + + if len(mount_paths) < 1: + print "Mountpoint path(s) missing." + parser.print_usage() + sys.exit(-1) + + pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2) + + for path in mount_paths: + if os.path.isdir(path): + walktree(path, pool) + + pool.close() + pool.join() -- cgit