From df85ed48e5e94449cdcc77de3b86e10ccea49f1e Mon Sep 17 00:00:00 2001 From: Aravinda VK Date: Mon, 3 Jul 2017 14:51:21 +0530 Subject: tools/glusterfind: Fix encoding to encode only space,newline and percent chars libgfchangelog was encoding path using spec rfc3986, but encoding only required for SPACE, NEWLINE and PERCENT chars since the NEWLINE char is used as record separator and SPACE as field separator in the parsed changelogs output. Changed the encoding function to encode only SPACE, NEWLINE and PERCENT chars BUG: 1451724 Change-Id: Ic1dea824d23493dedcf3db45f353f90572f4e046 Signed-off-by: Aravinda VK Reviewed-on: https://review.gluster.org/17788 CentOS-regression: Gluster Build System Smoke: Gluster Build System Reviewed-by: Milind Changire --- tools/glusterfind/src/__init__.py | 1 - tools/glusterfind/src/changelogdata.py | 52 +++++++++------------------------ tools/glusterfind/src/conf.py | 1 - tools/glusterfind/src/libgfchangelog.py | 1 - tools/glusterfind/src/main.py | 7 ++--- tools/glusterfind/src/utils.py | 21 ++++++++++--- 6 files changed, 33 insertions(+), 50 deletions(-) diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py index 0ffb3f7432d..1753698b5fa 100644 --- a/tools/glusterfind/src/__init__.py +++ b/tools/glusterfind/src/__init__.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py index b4a97093aa8..3140d945b49 100644 --- a/tools/glusterfind/src/changelogdata.py +++ b/tools/glusterfind/src/changelogdata.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. @@ -10,10 +9,9 @@ # cases as published by the Free Software Foundation. import sqlite3 -import urllib import os -from utils import RecordType +from utils import RecordType, unquote_plus_space_newline from utils import output_path_prepare @@ -92,7 +90,7 @@ class ChangelogData(object): self._create_table_pgfid() self._create_table_inodegfid() self.args = args - self.path_sep = "/" if args.no_encode else "%2F" + self.path_sep = "/" def _create_table_gfidpath(self): drop_table = "DROP TABLE IF EXISTS gfidpath" @@ -323,36 +321,21 @@ class ChangelogData(object): def when_create_mknod_mkdir(self, changelogfile, data): # E / # Add the Entry to DB - # urllib.unquote_plus will not handle unicode so, encode Unicode to - # represent in 8 bit format and then unquote - pgfid1, bn1 = urllib.unquote_plus( - data[6].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[6].split("/", 1) if self.args.no_encode: - # No urlencode since no_encode is set, so convert again to Unicode - # format from previously encoded. - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) def when_rename(self, changelogfile, data): # E RENAME / / - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) - pgfid2, bn2 = urllib.unquote_plus( - data[4].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) + pgfid2, bn2 = data[4].split("/", 1) if self.args.no_encode: - # Quote again the basename - bn1 = bn1.decode("utf-8").strip() - bn2 = bn2.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) - bn2 = urllib.quote_plus(bn2.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() + bn2 = unquote_plus_space_newline(bn2).strip() if self.gfidpath_exists({"gfid": data[1], "type": "NEW", "pgfid1": pgfid1, "bn1": bn1}): @@ -392,14 +375,9 @@ class ChangelogData(object): def when_link_symlink(self, changelogfile, data): # E / # Add as New record in Db as Type NEW - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) if self.args.no_encode: - # Quote again the basename - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) @@ -411,18 +389,14 @@ class ChangelogData(object): def when_unlink_rmdir(self, changelogfile, data): # E / - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) if self.args.no_encode: - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() deleted_path = data[4] if len(data) == 5 else "" if deleted_path != "": - deleted_path = urllib.unquote_plus(deleted_path.encode("utf-8")) + deleted_path = unquote_plus_space_newline(deleted_path) deleted_path = output_path_prepare(deleted_path, self.args) if self.gfidpath_exists({"gfid": data[1], "type": "NEW", diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py index d73fee42aad..d91746bda13 100644 --- a/tools/glusterfind/src/conf.py +++ b/tools/glusterfind/src/conf.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py index dd8153e4e61..0f6b40d6c9c 100644 --- a/tools/glusterfind/src/libgfchangelog.py +++ b/tools/glusterfind/src/libgfchangelog.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py index 3d0f02a65d4..e7e9889569c 100644 --- a/tools/glusterfind/src/main.py +++ b/tools/glusterfind/src/main.py @@ -21,13 +21,13 @@ import shutil import tempfile import signal from datetime import datetime +import codecs from utils import execute, is_host_local, mkdirp, fail from utils import setup_logger, human_time, handle_rm_error from utils import get_changelog_rollover_time, cache_output, create_file import conf from changelogdata import OutputMerger -import codecs PROG_DESCRIPTION = """ GlusterFS Incremental API @@ -481,10 +481,9 @@ def write_output(outfile, outfilemerger, field_separator): for p in paths: if p == "": continue - p_rep = p.replace("%2F%2F", "%2F").replace("//", "/") + p_rep = p.replace("//", "/") if not row_2_rep: - row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//", - "/") + row_2_rep = row[2].replace("//", "/") if p_rep == row_2_rep: continue diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py index b08233e4a9f..c24258e6ef8 100644 --- a/tools/glusterfind/src/utils.py +++ b/tools/glusterfind/src/utils.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. @@ -16,10 +15,12 @@ import xml.etree.cElementTree as etree import logging import os from datetime import datetime -import urllib ROOT_GFID = "00000000-0000-0000-0000-000000000001" DEFAULT_CHANGELOG_INTERVAL = 15 +SPACE_ESCAPE_CHAR = "%20" +NEWLINE_ESCAPE_CHAR = "%0A" +PERCENTAGE_ESCAPE_CHAR = "%25" ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError cache_data = {} @@ -84,7 +85,7 @@ def output_write(f, path, prefix=".", encode=False, tag="", path = os.path.join(prefix, path) if encode: - path = urllib.quote_plus(path) + path = quote_plus_space_newline(path) # set the field separator FS = "" if tag == "" else field_separator @@ -246,4 +247,16 @@ def output_path_prepare(path, args): if args.no_encode: return path else: - return urllib.quote_plus(path.encode("utf-8")) + return quote_plus_space_newline(path) + + +def unquote_plus_space_newline(s): + return s.replace(SPACE_ESCAPE_CHAR, " ")\ + .replace(NEWLINE_ESCAPE_CHAR, "\n")\ + .replace(PERCENTAGE_ESCAPE_CHAR, "%") + + +def quote_plus_space_newline(s): + return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\ + .replace(" ", SPACE_ESCAPE_CHAR)\ + .replace("\n", NEWLINE_ESCAPE_CHAR) -- cgit