From 16a3f0d020d23978b22a07354c25c654c88163a2 Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Sat, 28 Nov 2015 14:56:02 +0530 Subject: tools/glusterfind: add --full option to query command The --full option will use brickfind.py to list all files in the volume. The output file will contain url-encoded file names prefixed with the tag string to indicate that all files should be considered as new. The default tag string for the --full option is "NEW". This can be changed with the --tag-for-full-find command-line option. Change-Id: Ic85ba5db062e19df13ae9dc2de8a08eacb5c9792 BUG: 1286279 Signed-off-by: Milind Changire Reviewed-on: http://review.gluster.org/12779 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Aravinda VK --- tools/glusterfind/src/brickfind.py | 10 ++-- tools/glusterfind/src/main.py | 99 ++++++++++++++++++++++++++------------ tools/glusterfind/src/utils.py | 7 ++- 3 files changed, 81 insertions(+), 35 deletions(-) diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py index 57e4c71e750..efc840bca70 100644 --- a/tools/glusterfind/src/brickfind.py +++ b/tools/glusterfind/src/brickfind.py @@ -42,7 +42,7 @@ def brickfind_crawl(brick, args): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix, - encode=(not args.no_encode)) + encode=(not args.no_encode), tag=args.tag) ignore_dirs = [os.path.join(brick, dirname) for dirname in @@ -64,6 +64,9 @@ def _get_args(): parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=float) + parser.add_argument("tag", help="Tag to prefix file name with") + parser.add_argument("--only-query", help="Only query, No session update", + action="store_true") parser.add_argument("--debug", help="Debug", action="store_true") parser.add_argument("--no-encode", help="Do not encode path in outfile", @@ -92,6 +95,7 @@ if __name__ == "__main__": time_to_update = int(time.time()) brickfind_crawl(args.brick, args) - with open(status_file_pre, "w", buffering=0) as f: - f.write(str(time_to_update)) + if not args.only_query: + with open(status_file_pre, "w", buffering=0) as f: + f.write(str(time_to_update)) sys.exit(0) diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py index 9d2b9890b96..37d6c38cc49 100644 --- a/tools/glusterfind/src/main.py +++ b/tools/glusterfind/src/main.py @@ -109,8 +109,12 @@ def run_cmd_nodes(task, args, **kwargs): # If Full backup is requested or start time is zero, use brickfind change_detector = conf.get_change_detector("changelog") + tag = None if args.full: change_detector = conf.get_change_detector("brickfind") + tag = args.tag_for_full_find.strip() + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) @@ -119,9 +123,9 @@ def run_cmd_nodes(task, args, **kwargs): args.volume, brick, node_outfile, - str(kwargs.get("start")), - "--output-prefix", - args.output_prefix] + \ + str(kwargs.get("start"))] + \ + ([tag] if tag is not None else []) + \ + ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes @@ -131,7 +135,14 @@ def run_cmd_nodes(task, args, **kwargs): opts["copy_outfile"] = True elif task == "query": # If Full backup is requested or start time is zero, use brickfind + tag = None change_detector = conf.get_change_detector("changelog") + if args.full: + change_detector = conf.get_change_detector("brickfind") + tag = args.tag_for_full_find.strip() + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" + node_outfiles.append(node_outfile) cmd = [change_detector, @@ -140,6 +151,7 @@ def run_cmd_nodes(task, args, **kwargs): brick, node_outfile, str(kwargs.get("start"))] + \ + ([tag] if tag is not None else []) + \ ["--only-query"] + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ @@ -296,23 +308,35 @@ def _get_args(): parser_pre.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true") + parser_pre.add_argument("--tag-for-full-find", + help="Tag prefix for file names emitted during" + " a full find operation; default: \"NEW\"", + default="NEW") # query --since-time # [--output-prefix ] [--full] - parser_pre = subparsers.add_parser('query') - parser_pre.add_argument("volume", help="Volume Name") - parser_pre.add_argument("outfile", help="Output File", - action=StoreAbsPath) - parser_pre.add_argument("--since-time", help="UNIX epoch time since which " - "listing is required", type=int) - parser_pre.add_argument("--debug", help="Debug", action="store_true") - parser_pre.add_argument("--disable-partial", help="Disable Partial find, " - "Fail when one node fails", action="store_true") - parser_pre.add_argument("--output-prefix", help="File prefix in output", - default=".") - parser_pre.add_argument("-N", "--only-namespace-changes", - help="List only namespace changes", - action="store_true") + parser_query = subparsers.add_parser('query') + parser_query.add_argument("volume", help="Volume Name") + parser_query.add_argument("outfile", help="Output File", + action=StoreAbsPath) + parser_query.add_argument("--since-time", help="UNIX epoch time since " + "which listing is required", type=int) + parser_query.add_argument("--no-encode", + help="Do not encode path in output file", + action="store_true") + parser_query.add_argument("--full", help="Full find", action="store_true") + parser_query.add_argument("--debug", help="Debug", action="store_true") + parser_query.add_argument("--disable-partial", help="Disable Partial find," + " Fail when one node fails", action="store_true") + parser_query.add_argument("--output-prefix", help="File prefix in output", + default=".") + parser_query.add_argument("-N", "--only-namespace-changes", + help="List only namespace changes", + action="store_true") + parser_query.add_argument("--tag-for-full-find", + help="Tag prefix for file names emitted during" + " a full find operation; default: \"NEW\"", + default="NEW") # post parser_post = subparsers.add_parser('post') @@ -491,31 +515,46 @@ def mode_query(session_dir, args): # Enable volume options for changelog capture enable_volume_options(args) + # Test options + if not args.since_time and not args.full: + fail("Please specify either --since-time or --full", logger=logger) + + if args.since_time and args.full: + fail("Please specify either --since-time or --full, but not both", + logger=logger) + # Start query command processing if args.since_time: start = args.since_time - logger.debug("Query is called - Session: %s, Volume: %s, " - "Start time: %s" - % ("default", args.volume, start)) + else: + start = 0 # --full option is handled separately + + logger.debug("Query is called - Session: %s, Volume: %s, " + "Start time: %s" + % ("default", args.volume, start)) - run_cmd_nodes("query", args, start=start) + run_cmd_nodes("query", args, start=start) - # Merger + # Merger + if args.full: + cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " + "collected from nodes", logger=logger) + else: # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) write_output(args, outfilemerger) - try: - os.remove(args.outfile + ".db") - except (IOError, OSError): - pass + try: + os.remove(args.outfile + ".db") + except (IOError, OSError): + pass - run_cmd_nodes("cleanup", args) + run_cmd_nodes("cleanup", args) - sys.stdout.write("Generated output file %s\n" % args.outfile) - else: - fail("Please specify --since-time option") + sys.stdout.write("Generated output file %s\n" % args.outfile) def mode_pre(session_dir, args): diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py index b05f08ee5f6..598cc9e7f46 100644 --- a/tools/glusterfind/src/utils.py +++ b/tools/glusterfind/src/utils.py @@ -75,7 +75,7 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True, callback_func(full_path, filter_result) -def output_write(f, path, prefix=".", encode=False): +def output_write(f, path, prefix=".", encode=False, tag=""): if path == "": return @@ -85,7 +85,10 @@ def output_write(f, path, prefix=".", encode=False): if encode: path = urllib.quote_plus(path) - f.write("%s\n" % path) + # set the field separator + FS = "" if tag == "" else " " + + f.write("%s%s%s\n" % (tag.strip(), FS, path)) def human_time(ts): -- cgit