summaryrefslogtreecommitdiffstats
path: root/tools/glusterfind
diff options
context:
space:
mode:
authorAravinda VK <avishwan@redhat.com>2016-02-19 17:08:56 +0530
committerVenky Shankar <vshankar@redhat.com>2016-02-26 02:11:08 -0800
commit1b897c39ba8c0f1bf180316637cc2d87e6920800 (patch)
tree78f974a872e202c831d63e275e7fccb6e2a3eda3 /tools/glusterfind
parentbf2004bc1346890e69292c5177a5d8e002b696e2 (diff)
tools/glusterfind: New option --no-encode
New option added to skip encoding path in output file. Also handled Unicode strings. File paths can have newline characters, to differentiate between each path patch is encoded according to RFC3986(https://www.ietf.org/rfc/rfc3986.txt). Due to this consumer applications have to decode the path before consuming it. With this option Paths are not encoded, can be directly consumed by applications. Unicode encoding is handled automatically BUG: 1310080 Change-Id: I83d59831997dbd1264b48e9b1aa732c7dfc700b5 Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: http://review.gluster.org/13477 Smoke: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Milind Changire <mchangir@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Kotresh HR <khiremat@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Venky Shankar <vshankar@redhat.com>
Diffstat (limited to 'tools/glusterfind')
-rw-r--r--tools/glusterfind/src/__init__.py1
-rw-r--r--tools/glusterfind/src/brickfind.py7
-rw-r--r--tools/glusterfind/src/changelog.py18
-rw-r--r--tools/glusterfind/src/changelogdata.py94
-rw-r--r--tools/glusterfind/src/conf.py1
-rw-r--r--tools/glusterfind/src/libgfchangelog.py1
-rw-r--r--tools/glusterfind/src/main.py19
-rw-r--r--tools/glusterfind/src/nodeagent.py1
-rw-r--r--tools/glusterfind/src/utils.py12
9 files changed, 112 insertions, 42 deletions
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py
index eb941c6..0ffb3f7 100644
--- a/tools/glusterfind/src/__init__.py
+++ b/tools/glusterfind/src/__init__.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py
index f300638..57e4c71 100644
--- a/tools/glusterfind/src/brickfind.py
+++ b/tools/glusterfind/src/brickfind.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
@@ -40,7 +41,8 @@ def brickfind_crawl(brick, args):
def output_callback(path, filter_result):
path = path.strip()
path = path[brick_path_len+1:]
- output_write(fout, path, args.output_prefix, encode=True)
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode))
ignore_dirs = [os.path.join(brick, dirname)
for dirname in
@@ -63,6 +65,9 @@ def _get_args():
parser.add_argument("outfile", help="Output File")
parser.add_argument("start", help="Start Time", type=float)
parser.add_argument("--debug", help="Debug", action="store_true")
+ parser.add_argument("--no-encode",
+ help="Do not encode path in outfile",
+ action="store_true")
parser.add_argument("--output-prefix", help="File prefix in output",
default=".")
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
index d6f3dc1..a58a7eb 100644
--- a/tools/glusterfind/src/changelog.py
+++ b/tools/glusterfind/src/changelog.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
@@ -53,7 +54,7 @@ def pgfid_to_path(brick, changelog_data):
try:
path = symlink_gfid_to_path(brick, row[0])
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.gfidpath_set_path1(path, row[0])
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
@@ -69,7 +70,7 @@ def pgfid_to_path(brick, changelog_data):
try:
path = symlink_gfid_to_path(brick, row[0])
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.gfidpath_set_path2(path, row[0])
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
@@ -90,7 +91,7 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
# It is a Directory if GFID backend path is symlink
try:
path = symlink_gfid_to_path(brick, gfid)
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.gfidpath_update({"path1": path},
{"gfid": gfid})
except (IOError, OSError) as e:
@@ -145,7 +146,7 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
path = path.strip()
path = path[brick_path_len+1:]
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.append_path1(path, inode)
changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})
@@ -193,7 +194,7 @@ def gfid_to_path_using_batchfind(brick, changelog_data):
# Also updates converted flag in inodegfid table as 1
path = path.strip()
path = path[brick_path_len+1:]
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.append_path1(path, inode)
@@ -230,7 +231,7 @@ def parse_changelog_to_db(changelog_data, filename, args):
changelog_data.when_rename(changelogfile, data)
elif data[0] == "E" and data[2] in ["UNLINK", "RMDIR"]:
# UNLINK/RMDIR
- changelog_data.when_unlink_rmdir(changelogfile, data, args)
+ changelog_data.when_unlink_rmdir(changelogfile, data)
def get_changes(brick, hash_dir, log_file, start, end, args):
@@ -260,7 +261,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
fail("%s Changelog register failed: %s" % (brick, e), logger=logger)
# Output files to record GFIDs and GFID to Path failure GFIDs
- changelog_data = ChangelogData(args.outfile)
+ changelog_data = ChangelogData(args.outfile, args)
# Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
cl_path = os.path.join(brick, ".glusterfs/changelogs")
@@ -354,6 +355,9 @@ def _get_args():
parser.add_argument("--only-query", help="Query mode only (no session)",
action="store_true")
parser.add_argument("--debug", help="Debug", action="store_true")
+ parser.add_argument("--no-encode",
+ help="Do not encode path in outfile",
+ action="store_true")
parser.add_argument("--output-prefix", help="File prefix in output",
default=".")
parser.add_argument("-N", "--only-namespace-changes",
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
index 39a16d5..abb8b01 100644
--- a/tools/glusterfind/src/changelogdata.py
+++ b/tools/glusterfind/src/changelogdata.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
@@ -82,13 +83,15 @@ class OutputMerger(object):
class ChangelogData(object):
- def __init__(self, dbpath):
+ def __init__(self, dbpath, args):
self.conn = sqlite3.connect(dbpath)
self.cursor = self.conn.cursor()
self.cursor_reader = self.conn.cursor()
self._create_table_gfidpath()
self._create_table_pgfid()
self._create_table_inodegfid()
+ self.args = args
+ self.path_sep = "/" if args.no_encode else "%2F"
def _create_table_gfidpath(self):
drop_table = "DROP TABLE IF EXISTS gfidpath"
@@ -143,7 +146,10 @@ class ChangelogData(object):
for key, value in filters.items():
query += " AND %s = ?" % key
- params.append(value)
+ if isinstance(value, int):
+ params.append(value)
+ else:
+ params.append(unicode(value, "utf8"))
return self.cursor_reader.execute(query, params)
@@ -155,7 +161,10 @@ class ChangelogData(object):
for key, value in filters.items():
query += " AND %s = ?" % key
- params.append(value)
+ if isinstance(value, int):
+ params.append(value)
+ else:
+ params.append(unicode(value, "utf8"))
return self.cursor_reader.execute(query, params)
@@ -166,7 +175,10 @@ class ChangelogData(object):
for key, value in filters.items():
query += " AND %s = ?" % key
- params.append(value)
+ if isinstance(value, int):
+ params.append(value)
+ else:
+ params.append(unicode(value, "utf8"))
self.cursor.execute(query, params)
@@ -177,7 +189,10 @@ class ChangelogData(object):
params = []
for key, value in data.items():
fields.append(key)
- params.append(value)
+ if isinstance(value, int):
+ params.append(value)
+ else:
+ params.append(unicode(value, "utf8"))
values_substitute = len(fields)*["?"]
query += "%s) VALUES(%s)" % (",".join(fields),
@@ -190,14 +205,20 @@ class ChangelogData(object):
update_fields = []
for key, value in data.items():
update_fields.append("%s = ?" % key)
- params.append(value)
+ if isinstance(value, int):
+ params.append(value)
+ else:
+ params.append(unicode(value, "utf8"))
query = "UPDATE %s SET %s WHERE 1 = 1" % (tablename,
", ".join(update_fields))
for key, value in filters.items():
query += " AND %s = ?" % key
- params.append(value)
+ if isinstance(value, int):
+ params.append(value)
+ else:
+ params.append(unicode(value, "utf8"))
self.cursor.execute(query, params)
@@ -209,8 +230,12 @@ class ChangelogData(object):
params = []
for key, value in filters.items():
+ print value
query += " AND %s = ?" % key
- params.append(value)
+ if isinstance(value, int):
+ params.append(value)
+ else:
+ params.append(unicode(value, "utf8"))
self.cursor.execute(query, params)
row = self.cursor.fetchone()
@@ -293,8 +318,8 @@ class ChangelogData(object):
update_str1 = "? || bn1"
update_str2 = "? || bn2"
else:
- update_str1 = "? || '%2F' || bn1"
- update_str2 = "? || '%2F' || bn2"
+ update_str1 = "? || '{0}' || bn1".format(self.path_sep)
+ update_str2 = "? || '{0}' || bn2".format(self.path_sep)
query = """UPDATE gfidpath SET path1 = %s
WHERE pgfid1 = ?""" % update_str1
@@ -310,7 +335,7 @@ class ChangelogData(object):
if path2 == "":
update_str = "? || bn2"
else:
- update_str = "? || '%2F' || bn2"
+ update_str = "? || '{0}' || bn2".format(self.path_sep)
query = """UPDATE gfidpath SET path2 = %s
WHERE pgfid2 = ?""" % update_str
@@ -321,8 +346,11 @@ class ChangelogData(object):
# Add the Entry to DB
pgfid1, bn1 = urllib.unquote_plus(data[6]).split("/", 1)
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ if self.args.no_encode:
+ bn1 = bn1.strip()
+ else:
+ # Quote again the basename
+ bn1 = urllib.quote_plus(bn1.strip())
self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
@@ -331,9 +359,14 @@ class ChangelogData(object):
pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)
pgfid2, bn2 = urllib.unquote_plus(data[4]).split("/", 1)
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
- bn2 = urllib.quote_plus(bn2.strip())
+ if self.args.no_encode:
+ # Quote again the basename
+ bn1 = bn1.strip()
+ bn2 = bn2.strip()
+ else:
+ # Quote again the basename
+ bn1 = urllib.quote_plus(bn1.strip())
+ bn2 = urllib.quote_plus(bn2.strip())
if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
"pgfid1": pgfid1, "bn1": bn1}):
@@ -374,9 +407,12 @@ class ChangelogData(object):
# E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
# Add as New record in Db as Type NEW
pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)
-
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ if self.args.no_encode:
+ # Quote again the basename
+ bn1 = bn1.strip()
+ else:
+ # Quote again the basename
+ bn1 = urllib.quote_plus(bn1.strip())
self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
@@ -386,16 +422,20 @@ class ChangelogData(object):
not self.gfidpath_exists({"gfid": data[1], "type": "MODIFY"}):
self.gfidpath_add(changelogfile, RecordType.MODIFY, data[1])
- def when_unlink_rmdir(self, changelogfile, data, args):
+ def when_unlink_rmdir(self, changelogfile, data):
# E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+
+ if self.args.no_encode:
+ bn1 = bn1.strip()
+ else:
+ # Quote again the basename
+ bn1 = urllib.quote_plus(bn1.strip())
deleted_path = data[4] if len(data) == 5 else ""
if deleted_path != "":
deleted_path = output_path_prepare(deleted_path,
- args.output_prefix)
+ self.args)
if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
"pgfid1": pgfid1, "bn1": bn1}):
@@ -421,12 +461,12 @@ class ChangelogData(object):
"bn2": bn1})
# If deleted directory is parent for somebody
- query1 = """UPDATE gfidpath SET path1 = ? || '%2F' || bn1
- WHERE pgfid1 = ? AND path1 != ''"""
+ query1 = """UPDATE gfidpath SET path1 = ? || '{0}' || bn1
+ WHERE pgfid1 = ? AND path1 != ''""".format(self.path_sep)
self.cursor.execute(query1, (deleted_path, data[1]))
- query1 = """UPDATE gfidpath SET path2 = ? || '%2F' || bn1
- WHERE pgfid2 = ? AND path2 != ''"""
+ query1 = """UPDATE gfidpath SET path2 = ? || '{0}' || bn1
+ WHERE pgfid2 = ? AND path2 != ''""".format(self.path_sep)
self.cursor.execute(query1, (deleted_path, data[1]))
def commit(self):
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
index 2c6eac2..d73fee4 100644
--- a/tools/glusterfind/src/conf.py
+++ b/tools/glusterfind/src/conf.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
index 44e8fd5..dd8153e 100644
--- a/tools/glusterfind/src/libgfchangelog.py
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
index 6d03cbe..86cfae0 100644
--- a/tools/glusterfind/src/main.py
+++ b/tools/glusterfind/src/main.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
@@ -121,6 +122,7 @@ def run_cmd_nodes(task, args, **kwargs):
"--output-prefix",
args.output_prefix] + \
(["--debug"] if args.debug else []) + \
+ (["--no-encode"] if args.no_encode else []) + \
(["--only-namespace-changes"] if args.only_namespace_changes
else [])
@@ -140,6 +142,7 @@ def run_cmd_nodes(task, args, **kwargs):
["--only-query"] + \
["--output-prefix", args.output_prefix] + \
(["--debug"] if args.debug else []) + \
+ (["--no-encode"] if args.no_encode else []) + \
(["--only-namespace-changes"]
if args.only_namespace_changes else [])
@@ -277,6 +280,9 @@ def _get_args():
parser_pre.add_argument("volume", help="Volume Name")
parser_pre.add_argument("outfile", help="Output File", action=StoreAbsPath)
parser_pre.add_argument("--debug", help="Debug", action="store_true")
+ parser_pre.add_argument("--no-encode",
+ help="Do not encode path in output file",
+ action="store_true")
parser_pre.add_argument("--full", help="Full find", action="store_true")
parser_pre.add_argument("--disable-partial", help="Disable Partial find, "
"Fail when one node fails", action="store_true")
@@ -400,12 +406,19 @@ def write_output(args, outfilemerger):
for p in paths:
if p == "":
continue
- p_rep = p.replace("%2F%2F", "%2F")
+ p_rep = p.replace("%2F%2F", "%2F").replace("//", "/")
if not row_2_rep:
- row_2_rep = row[2].replace("%2F%2F", "%2F")
+ row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//",
+ "/")
if p_rep == row_2_rep:
continue
- f.write("%s %s %s\n" % (row[0], p_rep, row_2_rep))
+
+ p_rep = p_rep.encode('utf8', 'replace')
+ row_2_rep = row_2_rep.encode('utf8', 'replace')
+
+ f.write("{0} {1} {2}\n".format(row[0],
+ p_rep,
+ row_2_rep))
def mode_create(session_dir, args):
diff --git a/tools/glusterfind/src/nodeagent.py b/tools/glusterfind/src/nodeagent.py
index e7ba4af..f707449 100644
--- a/tools/glusterfind/src/nodeagent.py
+++ b/tools/glusterfind/src/nodeagent.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
index fdf61fe..b3b0bdf 100644
--- a/tools/glusterfind/src/utils.py
+++ b/tools/glusterfind/src/utils.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
@@ -228,14 +229,17 @@ def get_changelog_rollover_time(volumename):
return DEFAULT_CHANGELOG_INTERVAL
-def output_path_prepare(path, output_prefix):
+def output_path_prepare(path, args):
"""
If Prefix is set, joins to Path, removes ending slash
and encodes it.
"""
- if output_prefix != ".":
- path = os.path.join(output_prefix, path)
+ if args.output_prefix != ".":
+ path = os.path.join(args.output_prefix, path)
if path.endswith("/"):
path = path[0:len(path)-1]
- return urllib.quote_plus(path)
+ if args.no_encode:
+ return path
+ else:
+ return urllib.quote_plus(path)