diff options
Diffstat (limited to 'tools/glusterfind/src/changelogdata.py')
| -rw-r--r-- | tools/glusterfind/src/changelogdata.py | 102 |
1 files changed, 65 insertions, 37 deletions
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py index c42aa2a2315..641593cf4b1 100644 --- a/tools/glusterfind/src/changelogdata.py +++ b/tools/glusterfind/src/changelogdata.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> # This file is part of GlusterFS. @@ -9,10 +9,10 @@ # cases as published by the Free Software Foundation. import sqlite3 -import urllib import os -from utils import RecordType +from utils import RecordType, unquote_plus_space_newline +from utils import output_path_prepare class OutputMerger(object): @@ -82,13 +82,15 @@ class OutputMerger(object): class ChangelogData(object): - def __init__(self, dbpath): + def __init__(self, dbpath, args): self.conn = sqlite3.connect(dbpath) self.cursor = self.conn.cursor() self.cursor_reader = self.conn.cursor() self._create_table_gfidpath() self._create_table_pgfid() self._create_table_inodegfid() + self.args = args + self.path_sep = "/" def _create_table_gfidpath(self): drop_table = "DROP TABLE IF EXISTS gfidpath" @@ -100,16 +102,21 @@ class ChangelogData(object): ts VARCHAR, type VARCHAR, gfid VARCHAR(40), - pgfid1 VARCHAR(40), - bn1 VARCHAR(500), - pgfid2 VARCHAR(40), - bn2 VARCHAR(500), + pgfid1 VARCHAR(40) DEFAULT '', + bn1 VARCHAR(500) DEFAULT '', + pgfid2 VARCHAR(40) DEFAULT '', + bn2 VARCHAR(500) DEFAULT '', path1 VARCHAR DEFAULT '', path2 VARCHAR DEFAULT '' ) """ self.cursor.execute(create_table) + create_index = """ + CREATE INDEX gfid_index ON gfidpath(gfid); + """ + self.cursor.execute(create_index) + def _create_table_inodegfid(self): drop_table = "DROP TABLE IF EXISTS inodegfid" self.cursor.execute(drop_table) @@ -283,7 +290,7 @@ class ChangelogData(object): def append_path1(self, path, inode): # || is for concatenate in SQL - query = """UPDATE gfidpath SET path1 = ',' || ? + query = """UPDATE gfidpath SET path1 = path1 || ',' || ? WHERE gfid IN (SELECT gfid FROM inodegfid WHERE inode = ?)""" self.cursor.execute(query, (path, inode)) @@ -293,8 +300,8 @@ class ChangelogData(object): update_str1 = "? || bn1" update_str2 = "? || bn2" else: - update_str1 = "? || '%2F' || bn1" - update_str2 = "? || '%2F' || bn2" + update_str1 = "? || '{0}' || bn1".format(self.path_sep) + update_str2 = "? || '{0}' || bn2".format(self.path_sep) query = """UPDATE gfidpath SET path1 = %s WHERE pgfid1 = ?""" % update_str1 @@ -310,7 +317,7 @@ class ChangelogData(object): if path2 == "": update_str = "? || bn2" else: - update_str = "? || '%2F' || bn2" + update_str = "? || '{0}' || bn2".format(self.path_sep) query = """UPDATE gfidpath SET path2 = %s WHERE pgfid2 = ?""" % update_str @@ -319,21 +326,21 @@ class ChangelogData(object): def when_create_mknod_mkdir(self, changelogfile, data): # E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME> # Add the Entry to DB - pgfid1, bn1 = urllib.unquote_plus(data[6]).split("/", 1) + pgfid1, bn1 = data[6].split("/", 1) - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + if self.args.no_encode: + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) def when_rename(self, changelogfile, data): # E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME> - pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1) - pgfid2, bn2 = urllib.unquote_plus(data[4]).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) + pgfid2, bn2 = data[4].split("/", 1) - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) - bn2 = urllib.quote_plus(bn2.strip()) + if self.args.no_encode: + bn1 = unquote_plus_space_newline(bn1).strip() + bn2 = unquote_plus_space_newline(bn2).strip() if self.gfidpath_exists({"gfid": data[1], "type": "NEW", "pgfid1": pgfid1, "bn1": bn1}): @@ -344,37 +351,58 @@ class ChangelogData(object): "pgfid1": pgfid1, "bn1": bn1}) elif self.gfidpath_exists({"gfid": data[1], "type": "RENAME", "pgfid2": pgfid1, "bn2": bn1}): - # If <OLD_PGFID>/<BNAME> is same as <PGFID2>/<BN2>(may be previous - # RENAME) then UPDATE <NEW_PGFID>/<BNAME> as <PGFID2>/<BN2> - self.gfidpath_update({"pgfid2": pgfid2, "bn2": bn2}, - {"gfid": data[1], "type": "RENAME", - "pgfid2": pgfid1, "bn2": bn1}) + # If we are renaming file back to original name then just + # delete the entry since it will effectively be a no-op + if self.gfidpath_exists({"gfid": data[1], "type": "RENAME", + "pgfid2": pgfid1, "bn2": bn1, + "pgfid1": pgfid2, "bn1": bn2}): + self.gfidpath_delete({"gfid": data[1], "type": "RENAME", + "pgfid2": pgfid1, "bn2": bn1}) + else: + # If <OLD_PGFID>/<BNAME> is same as <PGFID2>/<BN2> + # (may be previous RENAME) + # then UPDATE <NEW_PGFID>/<BNAME> as <PGFID2>/<BN2> + self.gfidpath_update({"pgfid2": pgfid2, "bn2": bn2}, + {"gfid": data[1], "type": "RENAME", + "pgfid2": pgfid1, "bn2": bn1}) else: # Else insert as RENAME self.gfidpath_add(changelogfile, RecordType.RENAME, data[1], pgfid1, bn1, pgfid2, bn2) + if self.gfidpath_exists({"gfid": data[1], "type": "MODIFY"}): + # If MODIFY exists already for that GFID, remove it and insert + # again so that MODIFY entry comes after RENAME entry + # Output will have MODIFY <NEWNAME> + self.gfidpath_delete({"gfid": data[1], "type": "MODIFY"}) + self.gfidpath_add(changelogfile, RecordType.MODIFY, data[1]) + def when_link_symlink(self, changelogfile, data): # E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME> # Add as New record in Db as Type NEW - pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1) - - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + pgfid1, bn1 = data[3].split("/", 1) + if self.args.no_encode: + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) def when_data_meta(self, changelogfile, data): # If GFID row exists, Ignore else Add to Db - if not self.gfidpath_exists({"gfid": data[1]}): + if not self.gfidpath_exists({"gfid": data[1], "type": "NEW"}) and \ + not self.gfidpath_exists({"gfid": data[1], "type": "MODIFY"}): self.gfidpath_add(changelogfile, RecordType.MODIFY, data[1]) def when_unlink_rmdir(self, changelogfile, data): # E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME> - pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1) - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + pgfid1, bn1 = data[3].split("/", 1) + + if self.args.no_encode: + bn1 = unquote_plus_space_newline(bn1).strip() + deleted_path = data[4] if len(data) == 5 else "" + if deleted_path != "": + deleted_path = unquote_plus_space_newline(deleted_path) + deleted_path = output_path_prepare(deleted_path, self.args) if self.gfidpath_exists({"gfid": data[1], "type": "NEW", "pgfid1": pgfid1, "bn1": bn1}): @@ -400,12 +428,12 @@ class ChangelogData(object): "bn2": bn1}) # If deleted directory is parent for somebody - query1 = """UPDATE gfidpath SET path1 = ? || '%2F' || bn1 - WHERE pgfid1 = ? AND path1 != ''""" + query1 = """UPDATE gfidpath SET path1 = ? || '{0}' || bn1 + WHERE pgfid1 = ? AND path1 != ''""".format(self.path_sep) self.cursor.execute(query1, (deleted_path, data[1])) - query1 = """UPDATE gfidpath SET path2 = ? || '%2F' || bn1 - WHERE pgfid2 = ? AND path2 != ''""" + query1 = """UPDATE gfidpath SET path2 = ? || '{0}' || bn1 + WHERE pgfid2 = ? AND path2 != ''""".format(self.path_sep) self.cursor.execute(query1, (deleted_path, data[1])) def commit(self): |
