From df85ed48e5e94449cdcc77de3b86e10ccea49f1e Mon Sep 17 00:00:00 2001
From: Aravinda VK <avishwan@redhat.com>
Date: Mon, 3 Jul 2017 14:51:21 +0530
Subject: tools/glusterfind: Fix encoding to encode only space,newline and
 percent chars

libgfchangelog was encoding path using spec rfc3986, but encoding only
required for SPACE, NEWLINE and PERCENT chars since the NEWLINE char is
used as record separator and SPACE as field separator in the parsed
changelogs output.

Changed the encoding function to encode only SPACE, NEWLINE and PERCENT chars

BUG: 1451724
Change-Id: Ic1dea824d23493dedcf3db45f353f90572f4e046
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://review.gluster.org/17788
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Milind Changire <mchangir@redhat.com>
---
 tools/glusterfind/src/__init__.py       |  1 -
 tools/glusterfind/src/changelogdata.py  | 52 +++++++++------------------------
 tools/glusterfind/src/conf.py           |  1 -
 tools/glusterfind/src/libgfchangelog.py |  1 -
 tools/glusterfind/src/main.py           |  7 ++---
 tools/glusterfind/src/utils.py          | 21 ++++++++++---
 6 files changed, 33 insertions(+), 50 deletions(-)
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py
index 0ffb3f7432d..1753698b5fa 100644
--- a/tools/glusterfind/src/__init__.py
+++ b/tools/glusterfind/src/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
index b4a97093aa8..3140d945b49 100644
--- a/tools/glusterfind/src/changelogdata.py
+++ b/tools/glusterfind/src/changelogdata.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -10,10 +9,9 @@
 # cases as published by the Free Software Foundation.
 
 import sqlite3
-import urllib
 import os
 
-from utils import RecordType
+from utils import RecordType, unquote_plus_space_newline
 from utils import output_path_prepare
 
 
@@ -92,7 +90,7 @@ class ChangelogData(object):
         self._create_table_pgfid()
         self._create_table_inodegfid()
         self.args = args
-        self.path_sep = "/" if args.no_encode else "%2F"
+        self.path_sep = "/"
 
     def _create_table_gfidpath(self):
         drop_table = "DROP TABLE IF EXISTS gfidpath"
@@ -323,36 +321,21 @@ class ChangelogData(object):
     def when_create_mknod_mkdir(self, changelogfile, data):
         # E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>
         # Add the Entry to DB
-        # urllib.unquote_plus will not handle unicode so, encode Unicode to
-        # represent in 8 bit format and then unquote
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[6].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[6].split("/", 1)
 
         if self.args.no_encode:
-            # No urlencode since no_encode is set, so convert again to Unicode
-            # format from previously encoded.
-            bn1 = bn1.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
 
         self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
 
     def when_rename(self, changelogfile, data):
         # E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[3].encode("utf-8")).split("/", 1)
-        pgfid2, bn2 = urllib.unquote_plus(
-            data[4].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[3].split("/", 1)
+        pgfid2, bn2 = data[4].split("/", 1)
 
         if self.args.no_encode:
-            # Quote again the basename
-            bn1 = bn1.decode("utf-8").strip()
-            bn2 = bn2.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
-            bn2 = urllib.quote_plus(bn2.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
+            bn2 = unquote_plus_space_newline(bn2).strip()
 
         if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
                                  "pgfid1": pgfid1, "bn1": bn1}):
@@ -392,14 +375,9 @@ class ChangelogData(object):
     def when_link_symlink(self, changelogfile, data):
         # E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
         # Add as New record in Db as Type NEW
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[3].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[3].split("/", 1)
         if self.args.no_encode:
-            # Quote again the basename
-            bn1 = bn1.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
 
         self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
 
@@ -411,18 +389,14 @@ class ChangelogData(object):
 
     def when_unlink_rmdir(self, changelogfile, data):
         # E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[3].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[3].split("/", 1)
 
         if self.args.no_encode:
-            bn1 = bn1.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
 
         deleted_path = data[4] if len(data) == 5 else ""
         if deleted_path != "":
-            deleted_path = urllib.unquote_plus(deleted_path.encode("utf-8"))
+            deleted_path = unquote_plus_space_newline(deleted_path)
             deleted_path = output_path_prepare(deleted_path, self.args)
 
         if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
index d73fee42aad..d91746bda13 100644
--- a/tools/glusterfind/src/conf.py
+++ b/tools/glusterfind/src/conf.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
index dd8153e4e61..0f6b40d6c9c 100644
--- a/tools/glusterfind/src/libgfchangelog.py
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
index 3d0f02a65d4..e7e9889569c 100644
--- a/tools/glusterfind/src/main.py
+++ b/tools/glusterfind/src/main.py
@@ -21,13 +21,13 @@ import shutil
 import tempfile
 import signal
 from datetime import datetime
+import codecs
 
 from utils import execute, is_host_local, mkdirp, fail
 from utils import setup_logger, human_time, handle_rm_error
 from utils import get_changelog_rollover_time, cache_output, create_file
 import conf
 from changelogdata import OutputMerger
-import codecs
 
 PROG_DESCRIPTION = """
 GlusterFS Incremental API
@@ -481,10 +481,9 @@ def write_output(outfile, outfilemerger, field_separator):
             for p in paths:
                 if p == "":
                     continue
-                p_rep = p.replace("%2F%2F", "%2F").replace("//", "/")
+                p_rep = p.replace("//", "/")
                 if not row_2_rep:
-                    row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//",
-                                                                        "/")
+                    row_2_rep = row[2].replace("//", "/")
                 if p_rep == row_2_rep:
                     continue
 
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
index b08233e4a9f..c24258e6ef8 100644
--- a/tools/glusterfind/src/utils.py
+++ b/tools/glusterfind/src/utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -16,10 +15,12 @@ import xml.etree.cElementTree as etree
 import logging
 import os
 from datetime import datetime
-import urllib
 
 ROOT_GFID = "00000000-0000-0000-0000-000000000001"
 DEFAULT_CHANGELOG_INTERVAL = 15
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
 
 ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
 cache_data = {}
@@ -84,7 +85,7 @@ def output_write(f, path, prefix=".", encode=False, tag="",
         path = os.path.join(prefix, path)
 
     if encode:
-        path = urllib.quote_plus(path)
+        path = quote_plus_space_newline(path)
 
     # set the field separator
     FS = "" if tag == "" else field_separator
@@ -246,4 +247,16 @@ def output_path_prepare(path, args):
     if args.no_encode:
         return path
     else:
-        return urllib.quote_plus(path.encode("utf-8"))
+        return quote_plus_space_newline(path)
+
+
+def unquote_plus_space_newline(s):
+    return s.replace(SPACE_ESCAPE_CHAR, " ")\
+            .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+            .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+
+def quote_plus_space_newline(s):
+    return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+            .replace(" ", SPACE_ESCAPE_CHAR)\
+            .replace("\n", NEWLINE_ESCAPE_CHAR)
-- 
cgit