From 14dbd5da1cae64e6d4d2c69966e19844d090ce98 Mon Sep 17 00:00:00 2001
From: Niklas Hambüchen <mail@nh2.me>
Date: Fri, 29 Dec 2017 15:49:13 +0100
Subject: glusterfind: Speed up gfid lookup 100x by using an SQL index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #1529883.

This fixes some bits of `glusterfind`'s horrible performance,
making it 100x faster.

Until now, glusterfind was, for each line in each CHANGELOG.* file,
linearly reading the entire contents of the sqlite database in
4096-bytes-sized pread64() syscalls when executing the

  SELECT COUNT(1) FROM %s WHERE 1=1 AND gfid = ?

query through the code path:

  get_changes()
    parse_changelog_to_db()
      when_data_meta()
        gfidpath_exists()
          _exists()

In a quick benchmark on my laptop, doing one such `SELECT` query
took ~75ms on a 10MB-sized sqlite DB, while doing the same query
with an index took < 1ms.

Change-Id: I8e7fe60f1f45a06c102f56b54d2ead9e0377794e
BUG: 1529883
Signed-off-by: Niklas Hambüchen <mail@nh2.me>
---
 tools/glusterfind/src/changelogdata.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
index 3140d945b49..641593cf4b1 100644
--- a/tools/glusterfind/src/changelogdata.py
+++ b/tools/glusterfind/src/changelogdata.py
@@ -112,6 +112,11 @@ class ChangelogData(object):
         """
         self.cursor.execute(create_table)
 
+        create_index = """
+        CREATE INDEX gfid_index ON gfidpath(gfid);
+        """
+        self.cursor.execute(create_index)
+
     def _create_table_inodegfid(self):
         drop_table = "DROP TABLE IF EXISTS inodegfid"
         self.cursor.execute(drop_table)
-- 
cgit