diff options
Diffstat (limited to 'tools/glusterfind/src/changelog.py')
| -rw-r--r-- | tools/glusterfind/src/changelog.py | 144 |
1 files changed, 108 insertions, 36 deletions
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py index 4d0a190286e..a5e9ea4288f 100644 --- a/tools/glusterfind/src/changelog.py +++ b/tools/glusterfind/src/changelog.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/python3 +# -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> # This file is part of GlusterFS. @@ -13,9 +14,14 @@ import sys import time import xattr import logging +from gfind_py2py3 import bytearray_to_str from argparse import ArgumentParser, RawDescriptionHelpFormatter import hashlib -import urllib +try: + import urllib.parse as urllib +except ImportError: + import urllib +import codecs import libgfchangelog from utils import mkdirp, symlink_gfid_to_path @@ -38,8 +44,6 @@ history_turn_time = 0 logger = logging.getLogger() - - def pgfid_to_path(brick, changelog_data): """ For all the pgfids in table, converts into path using recursive @@ -47,13 +51,13 @@ def pgfid_to_path(brick, changelog_data): """ # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}): - # In case of Data/Metadata only, pgfid1 will not be their + # In case of Data/Metadata only, pgfid1 will not be there if row[0] == "": continue try: path = symlink_gfid_to_path(brick, row[0]) - path = output_path_prepare(path, args.output_prefix) + path = output_path_prepare(path, args) changelog_data.gfidpath_set_path1(path, row[0]) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) @@ -69,7 +73,7 @@ def pgfid_to_path(brick, changelog_data): try: path = symlink_gfid_to_path(brick, row[0]) - path = output_path_prepare(path, args.output_prefix) + path = output_path_prepare(path, args) changelog_data.gfidpath_set_path2(path, row[0]) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) @@ -90,9 +94,9 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): # It is a Directory if GFID backend path is symlink try: path = symlink_gfid_to_path(brick, gfid) - path = output_path_prepare(path, args.output_prefix) + path = output_path_prepare(path, args) changelog_data.gfidpath_update({"path1": path}, - {"gfid": gfid}) + {"gfid": gfid}) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue @@ -102,15 +106,55 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): changelog_data.inodegfid_add(os.stat(p).st_ino, gfid) file_xattrs = xattr.list(p) for x in file_xattrs: - if x.startswith("trusted.pgfid."): + x_str = bytearray_to_str(x) + if x_str.startswith("trusted.pgfid."): # PGFID in pgfid table - changelog_data.pgfid_add(x.split(".")[-1]) + changelog_data.pgfid_add(x_str.split(".")[-1]) except (IOError, OSError): # All OS Errors ignored, since failures will be logged # in End. All GFIDs present in gfidpath table continue +def enum_hard_links_using_gfid2path(brick, gfid, args): + hardlinks = [] + p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) + if not os.path.isdir(p): + # we have a symlink or a normal file + try: + file_xattrs = xattr.list(p) + for x in file_xattrs: + x_str = bytearray_to_str(x) + if x_str.startswith("trusted.gfid2path."): + # get the value for the xattr i.e. <PGFID>/<BN> + v = xattr.getxattr(p, x_str) + v_str = bytearray_to_str(v) + pgfid, bn = v_str.split(os.sep) + try: + path = symlink_gfid_to_path(brick, pgfid) + fullpath = os.path.join(path, bn) + fullpath = output_path_prepare(fullpath, args) + hardlinks.append(fullpath) + except (IOError, OSError) as e: + logger.warn("Error converting to path: %s" % e) + continue + except (IOError, OSError): + pass + return hardlinks + + +def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args): + path = "" + for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): + gfid = row[3].strip() + logger.debug("Processing gfid %s" % gfid) + hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args) + + path = ",".join(hardlinks) + + changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) + + def gfid_to_path_using_pgfid(brick, changelog_data, args): """ For all the pgfids collected, Converts to Path and @@ -145,7 +189,7 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args): path = path.strip() path = path[brick_path_len+1:] - path = output_path_prepare(path, args.output_prefix) + path = output_path_prepare(path, args) changelog_data.append_path1(path, inode) changelog_data.inodegfid_update({"converted": 1}, {"inode": inode}) @@ -158,10 +202,10 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args): try: path = symlink_gfid_to_path(brick, row[0]) find(os.path.join(brick, path), - callback_func=output_callback, - filter_func=inode_filter, - ignore_dirs=ignore_dirs, - subdirs_crawl=False) + callback_func=output_callback, + filter_func=inode_filter, + ignore_dirs=ignore_dirs, + subdirs_crawl=False) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue @@ -193,7 +237,7 @@ def gfid_to_path_using_batchfind(brick, changelog_data): # Also updates converted flag in inodegfid table as 1 path = path.strip() path = path[brick_path_len+1:] - path = output_path_prepare(path, args.output_prefix) + path = output_path_prepare(path, args) changelog_data.append_path1(path, inode) @@ -211,7 +255,7 @@ def parse_changelog_to_db(changelog_data, filename, args): """ Parses a Changelog file and populates data in gfidpath table """ - with open(filename) as f: + with codecs.open(filename, encoding="utf-8") as f: changelogfile = os.path.basename(filename) for line in f: data = line.strip().split(" ") @@ -230,7 +274,7 @@ def parse_changelog_to_db(changelog_data, filename, args): changelog_data.when_rename(changelogfile, data) elif data[0] == "E" and data[2] in ["UNLINK", "RMDIR"]: # UNLINK/RMDIR - changelog_data.when_unlink_rmdir(changelogfile, data, args) + changelog_data.when_unlink_rmdir(changelogfile, data) def get_changes(brick, hash_dir, log_file, start, end, args): @@ -243,7 +287,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) # Get previous session try: @@ -260,7 +304,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args): fail("%s Changelog register failed: %s" % (brick, e), logger=logger) # Output files to record GFIDs and GFID to Path failure GFIDs - changelog_data = ChangelogData(args.outfile) + changelog_data = ChangelogData(args.outfile, args) # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs cl_path = os.path.join(brick, ".glusterfs/changelogs") @@ -270,9 +314,10 @@ def get_changes(brick, hash_dir, log_file, start, end, args): actual_end = libgfchangelog.cl_history_changelog( cl_path, start, end, CHANGELOGAPI_NUM_WORKERS) except libgfchangelog.ChangelogException as e: - fail("%s Historical Changelogs not available: %s" % (brick, e), - logger=logger) + fail("%s: %s Historical Changelogs not available: %s" % + (args.node, brick, e), logger=logger) + logger.info("[1/4] Starting changelog parsing ...") try: # scan followed by getchanges till scan returns zero. # history_scan() is blocking call, till it gets the number @@ -282,7 +327,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args): # history_getchanges() changes = [] while libgfchangelog.cl_history_scan() > 0: - changes += libgfchangelog.cl_history_getchanges() + changes = libgfchangelog.cl_history_getchanges() for change in changes: # Ignore if last processed changelog comes @@ -294,25 +339,34 @@ def get_changes(brick, hash_dir, log_file, start, end, args): libgfchangelog.cl_history_done(change) except IOError as e: logger.warn("Error parsing changelog file %s: %s" % - (change, e)) + (change, e)) changelog_data.commit() except libgfchangelog.ChangelogException as e: fail("%s Error during Changelog Crawl: %s" % (brick, e), logger=logger) + logger.info("[1/4] Finished changelog parsing.") + # Convert all pgfid available from Changelogs + logger.info("[2/4] Starting 'pgfid to path' conversions ...") pgfid_to_path(brick, changelog_data) changelog_data.commit() + logger.info("[2/4] Finished 'pgfid to path' conversions.") - # Convert all GFIDs for which no other additional details available - gfid_to_path_using_pgfid(brick, changelog_data, args) + # Convert all gfids recorded for data and metadata to all hardlink paths + logger.info("[3/4] Starting 'gfid2path' conversions ...") + gfid_to_all_paths_using_gfid2path(brick, changelog_data, args) changelog_data.commit() + logger.info("[3/4] Finished 'gfid2path' conversions.") # If some GFIDs fail to get converted from previous step, # convert using find + logger.info("[4/4] Starting 'gfid to path using batchfind' " + "conversions ...") gfid_to_path_using_batchfind(brick, changelog_data) changelog_data.commit() + logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.") return actual_end @@ -326,7 +380,7 @@ def changelog_crawl(brick, start, end, args): # WORKING_DIR/BRICKHASH/OUTFILE working_dir = os.path.dirname(args.outfile) - brickhash = hashlib.sha1(brick) + brickhash = hashlib.sha1(brick.encode()) brickhash = str(brickhash.hexdigest()) working_dir = os.path.join(working_dir, brickhash) @@ -348,12 +402,20 @@ def _get_args(): parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") + parser.add_argument("node", help="Node Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=int) + parser.add_argument("end", help="End Time", type=int) + parser.add_argument("--only-query", help="Query mode only (no session)", + action="store_true") parser.add_argument("--debug", help="Debug", action="store_true") + parser.add_argument("--no-encode", + help="Do not encode path in outfile", + action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") + parser.add_argument("--type",default="both") parser.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true") @@ -373,24 +435,34 @@ if __name__ == "__main__": session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) - try: - with open(status_file) as f: - start = int(f.read().strip()) - except (ValueError, OSError, IOError): + end = -1 + if args.only_query: start = args.start + end = args.end + else: + try: + with open(status_file) as f: + start = int(f.read().strip()) + except (ValueError, OSError, IOError): + start = args.start + + # end time is optional; so a -1 may be sent to use the default method of + # identifying the end time + if end == -1: + end = int(time.time()) - get_changelog_rollover_time(args.volume) - end = int(time.time()) - get_changelog_rollover_time(args.volume) logger.info("%s Started Changelog Crawl - Start: %s End: %s" % (args.brick, start, end)) actual_end = changelog_crawl(args.brick, start, end, args) - with open(status_file_pre, "w", buffering=0) as f: - f.write(str(actual_end)) + if not args.only_query: + with open(status_file_pre, "w") as f: + f.write(str(actual_end)) logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick, actual_end)) |
