summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile.am2
-rw-r--r--configure.ac2
-rw-r--r--doc/tools/gfind_missing_files.md67
-rw-r--r--glusterfs.spec.in7
-rw-r--r--tools/Makefile.am3
-rw-r--r--tools/gfind_missing_files/Makefile.am24
-rw-r--r--tools/gfind_missing_files/gcrawler.c572
-rw-r--r--tools/gfind_missing_files/gfid_to_path.py162
-rw-r--r--tools/gfind_missing_files/gfid_to_path.sh42
-rw-r--r--tools/gfind_missing_files/gfind_missing_files.sh119
10 files changed, 999 insertions, 1 deletions
diff --git a/Makefile.am b/Makefile.am
index a35b1642143..60a8d36131d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -11,7 +11,7 @@ EXTRA_DIST = autogen.sh \
SUBDIRS = $(ARGP_STANDALONE_DIR) libglusterfs rpc api xlators glusterfsd \
$(FUSERMOUNT_SUBDIR) doc extras cli heal @SYNCDAEMON_SUBDIR@ \
- @UMOUNTD_SUBDIR@
+ @UMOUNTD_SUBDIR@ tools
pkgconfigdir = @pkgconfigdir@
pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
diff --git a/configure.ac b/configure.ac
index 0d2ee210539..1a513975d92 100644
--- a/configure.ac
+++ b/configure.ac
@@ -211,6 +211,8 @@ AC_CONFIG_FILES([Makefile
geo-replication/Makefile
geo-replication/src/Makefile
geo-replication/syncdaemon/Makefile
+ tools/Makefile
+ tools/gfind_missing_files/Makefile
heal/Makefile
heal/src/Makefile
glusterfs.spec])
diff --git a/doc/tools/gfind_missing_files.md b/doc/tools/gfind_missing_files.md
new file mode 100644
index 00000000000..47241be5ac6
--- /dev/null
+++ b/doc/tools/gfind_missing_files.md
@@ -0,0 +1,67 @@
+Introduction
+========
+The tool gfind_missing_files.sh can be used to find the missing files in a
+GlusterFS geo-replicated slave volume. The tool uses a multi-threaded crawler
+operating on the backend .glusterfs of a brickpath which is passed as one of
+the parameters to the tool. It does a stat on each entry in the slave volume
+mount to check for the presence of a file. The tool uses the aux-gfid-mount
+thereby avoiding path conversions and potentially saving time.
+
+This tool should be run on every node and each brickpath in a geo-replicated
+master volume to find the missing files on the slave volume.
+
+The script gfind_missing_files.sh is a wrapper script that in turn uses the
+gcrawler binary to do the backend crawling. The script detects the gfids of
+the missing files and runs the gfid-to-path conversion script to list out the
+missing files with their full pathnames.
+
+Usage
+=====
+```sh
+$bash gfind_missing_files.sh <BRICK_PATH> <SLAVE_HOST> <SLAVE_VOL> <OUTFILE>
+ BRICK_PATH - Full path of the brick
+ SLAVE_HOST - Hostname of gluster volume
+ SLAVE_VOL - Gluster volume name
+ OUTFILE - Output file which contains gfids of the missing files
+```
+
+The gfid-to-path conversion uses a quicker algorithm for converting gfids to
+paths and it is possible that in some cases all missing gfids may not be
+converted to their respective paths.
+
+Example output(126733 missing files)
+===================================
+```sh
+$ionice -c 2 -n 7 ./gfind_missing_files.sh /bricks/m3 acdc slave-vol ~/test_results/m3-4.txt
+Calling crawler...
+Crawl Complete.
+gfids of skipped files are available in the file /root/test_results/m3-4.txt
+Starting gfid to path conversion
+Path names of skipped files are available in the file /root/test_results/m3-4.txt_pathnames
+WARNING: Unable to convert some GFIDs to Paths, GFIDs logged to /root/test_results/m3-4.txt_gfids
+Use bash gfid_to_path.sh <brick-path> /root/test_results/m3-4.txt_gfids to convert those GFIDs to Path
+Total Missing File Count : 126733
+```
+In such cases, an additional step is needed to convert those gfids to paths.
+This can be used as shown below:
+```sh
+ $bash gfid_to_path.sh <BRICK_PATH> <GFID_FILE>
+ BRICK_PATH - Full path of the brick.
+ GFID_FILE - OUTFILE_gfids got from gfind_missing_files.sh
+```
+Things to keep in mind when running the tool
+============================================
+1. Running this tool can result in a crawl of the backend filesystem at each
+ brick which can be intensive. To ensure there is no impact on ongoing I/O on
+ RHS volumes, we recommend that this tool be run at a low I/O scheduling class
+ (best-effort) and priority.
+```sh
+$ionice -c 2 -p <pid of gfind_missing_files.sh>
+```
+
+2. We do not recommend interrupting the tool when it is running
+ (e.g. by doing CTRL^C). It is better to wait for the tool to finish
+ execution. In case it is interupted, manually unmount the Slave Volume.
+```sh
+ umount <MOUNT_POINT>
+```
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 1b31f51ed78..affb753cb67 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -955,6 +955,10 @@ fi
%{_datadir}/glusterfs/scripts/generate-gfid-file.sh
%{_datadir}/glusterfs/scripts/gsync-sync-gfid
%ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+%{_libexecdir}/glusterfs/gfind_missing_files
+%{_sbindir}/gfind_missing_files
+%exclude %{_libexecdir}/glusterfs/gfind_missing_files/gfid_to_path.pyo
+%exclude %{_libexecdir}/glusterfs/gfind_missing_files/gfid_to_path.pyc
%endif
%files libs
@@ -1049,6 +1053,9 @@ fi
%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
%changelog
+* Thu Mar 12 2015 Kotresh H R <khiremat@redhat.com>
+- gfind_missing_files tool is included (#1187140)
+
* Thu Feb 26 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
- enable cmocka unittest support only when asked for (#1067059)
diff --git a/tools/Makefile.am b/tools/Makefile.am
new file mode 100644
index 00000000000..74229ab41e7
--- /dev/null
+++ b/tools/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = gfind_missing_files
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/Makefile.am b/tools/gfind_missing_files/Makefile.am
new file mode 100644
index 00000000000..456aad836b6
--- /dev/null
+++ b/tools/gfind_missing_files/Makefile.am
@@ -0,0 +1,24 @@
+gfindmissingfilesdir = $(libexecdir)/glusterfs/gfind_missing_files
+
+gfindmissingfiles_SCRIPTS = gfind_missing_files.sh gfid_to_path.sh \
+ gfid_to_path.py
+
+EXTRA_DIST = gfind_missing_files.sh gfid_to_path.sh \
+ gfid_to_path.py
+
+gfindmissingfiles_PROGRAMS = gcrawler
+
+gcrawler_SOURCES = gcrawler.c
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+uninstall-local:
+ rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+
+install-data-local:
+ rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+ ln -s $(libexecdir)/glusterfs/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c
new file mode 100644
index 00000000000..517e773cb7c
--- /dev/null
+++ b/tools/gfind_missing_files/gcrawler.c
@@ -0,0 +1,572 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <assert.h>
+
+#ifndef __FreeBSD__
+#ifdef __NetBSD__
+#include <sys/xattr.h>
+#else
+#include <attr/xattr.h>
+#endif /* __NetBSD__ */
+#endif /* __FreeBSD__ */
+
+#include "list.h"
+
+#define THREAD_MAX 32
+#define BUMP(name) INC(name, 1)
+#define DEFAULT_WORKERS 4
+
+#define NEW(x) { \
+ x = calloc (1, sizeof (typeof (*x))); \
+ }
+
+#define err(x ...) fprintf(stderr, x)
+#define out(x ...) fprintf(stdout, x)
+#define dbg(x ...) do { if (debug) fprintf(stdout, x); } while (0)
+#define tout(x ...) do { out("[%ld] ", pthread_self()); out(x); } while (0)
+#define terr(x ...) do { err("[%ld] ", pthread_self()); err(x); } while (0)
+#define tdbg(x ...) do { dbg("[%ld] ", pthread_self()); dbg(x); } while (0)
+
+int debug = 0;
+const char *slavemnt = NULL;
+int workers = 0;
+
+struct stats {
+ unsigned long long int cnt_skipped_gfids;
+};
+
+pthread_spinlock_t stats_lock;
+
+struct stats stats_total;
+int stats = 0;
+
+#define INC(name, val) do { \
+ if (!stats) \
+ break; \
+ pthread_spin_lock(&stats_lock); \
+ { \
+ stats_total.cnt_##name += val; \
+ } \
+ pthread_spin_unlock(&stats_lock); \
+ } while (0)
+
+void
+stats_dump()
+{
+ if (!stats)
+ return;
+
+ out("-------------------------------------------\n");
+ out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids);
+ out("-------------------------------------------\n");
+}
+
+struct dirjob {
+ struct list_head list;
+
+ char *dirname;
+
+ struct dirjob *parent;
+ int ret; /* final status of this subtree */
+ int refcnt; /* how many dirjobs have this as parent */
+
+ pthread_spinlock_t lock;
+};
+
+
+struct xwork {
+ pthread_t cthreads[THREAD_MAX]; /* crawler threads */
+ int count;
+ int idle;
+ int stop;
+
+ struct dirjob crawl;
+
+ struct dirjob *rootjob; /* to verify completion in xwork_fini() */
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+};
+
+
+struct dirjob *
+dirjob_ref (struct dirjob *job)
+{
+ pthread_spin_lock (&job->lock);
+ {
+ job->refcnt++;
+ }
+ pthread_spin_unlock (&job->lock);
+
+ return job;
+}
+
+
+void
+dirjob_free (struct dirjob *job)
+{
+ assert (list_empty (&job->list));
+
+ pthread_spin_destroy (&job->lock);
+ free (job->dirname);
+ free (job);
+}
+
+void
+dirjob_ret (struct dirjob *job, int err)
+{
+ int ret = 0;
+ int refcnt = 0;
+ struct dirjob *parent = NULL;
+
+ pthread_spin_lock (&job->lock);
+ {
+ refcnt = --job->refcnt;
+ job->ret = (job->ret || err);
+ }
+ pthread_spin_unlock (&job->lock);
+
+ if (refcnt == 0) {
+ ret = job->ret;
+
+ if (ret)
+ terr ("Failed: %s (%d)\n", job->dirname, ret);
+ else
+ tdbg ("Finished: %s\n", job->dirname);
+
+ parent = job->parent;
+ if (parent)
+ dirjob_ret (parent, ret);
+
+ dirjob_free (job);
+ job = NULL;
+ }
+}
+
+
+struct dirjob *
+dirjob_new (const char *dir, struct dirjob *parent)
+{
+ struct dirjob *job = NULL;
+
+ NEW(job);
+ if (!job)
+ return NULL;
+
+ job->dirname = strdup (dir);
+ if (!job->dirname) {
+ free (job);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&job->list);
+ pthread_spin_init (&job->lock, PTHREAD_PROCESS_PRIVATE);
+ job->ret = 0;
+
+ if (parent)
+ job->parent = dirjob_ref (parent);
+
+ job->refcnt = 1;
+
+ return job;
+}
+
+void
+xwork_addcrawl (struct xwork *xwork, struct dirjob *job)
+{
+ pthread_mutex_lock (&xwork->mutex);
+ {
+ list_add_tail (&job->list, &xwork->crawl.list);
+ pthread_cond_broadcast (&xwork->cond);
+ }
+ pthread_mutex_unlock (&xwork->mutex);
+}
+
+int
+xwork_add (struct xwork *xwork, const char *dir, struct dirjob *parent)
+{
+ struct dirjob *job = NULL;
+
+ job = dirjob_new (dir, parent);
+ if (!job)
+ return -1;
+
+ xwork_addcrawl (xwork, job);
+
+ return 0;
+}
+
+
+struct dirjob *
+xwork_pick (struct xwork *xwork, int block)
+{
+ struct dirjob *job = NULL;
+ struct list_head *head = NULL;
+
+ head = &xwork->crawl.list;
+
+ pthread_mutex_lock (&xwork->mutex);
+ {
+ for (;;) {
+ if (xwork->stop)
+ break;
+
+ if (!list_empty (head)) {
+ job = list_entry (head->next, typeof(*job),
+ list);
+ list_del_init (&job->list);
+ break;
+ }
+
+ if (((xwork->count * 2) == xwork->idle) &&
+ list_empty (&xwork->crawl.list)) {
+ /* no outstanding jobs, and no
+ active workers
+ */
+ tdbg ("Jobless. Terminating\n");
+ xwork->stop = 1;
+ pthread_cond_broadcast (&xwork->cond);
+ break;
+ }
+
+ if (!block)
+ break;
+
+ xwork->idle++;
+ pthread_cond_wait (&xwork->cond, &xwork->mutex);
+ xwork->idle--;
+ }
+ }
+ pthread_mutex_unlock (&xwork->mutex);
+
+ return job;
+}
+
+int
+skip_name (const char *dirname, const char *name)
+{
+ if (strcmp (name, ".") == 0)
+ return 1;
+
+ if (strcmp (name, "..") == 0)
+ return 1;
+
+ if (strcmp (name, "changelogs") == 0)
+ return 1;
+
+ if (strcmp (name, "health_check") == 0)
+ return 1;
+
+ if (strcmp (name, "indices") == 0)
+ return 1;
+
+ if (strcmp (name, "landfill") == 0)
+ return 1;
+
+ return 0;
+}
+
+int
+skip_stat (struct dirjob *job, const char *name)
+{
+ if (job == NULL)
+ return 0;
+
+ if (strcmp (job->dirname, ".glusterfs") == 0) {
+ tdbg ("Directly adding directories under .glusterfs "
+ "to global list: %s\n", name);
+ return 1;
+ }
+
+ if (job->parent != NULL) {
+ if (strcmp (job->parent->dirname, ".glusterfs") == 0) {
+ tdbg ("Directly adding directories under .glusterfs/XX "
+ "to global list: %s\n", name);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int
+xworker_do_crawl (struct xwork *xwork, struct dirjob *job)
+{
+ DIR *dirp = NULL;
+ int ret = -1;
+ int boff;
+ int plen;
+ struct dirent *result;
+ char dbuf[512];
+ char *path = NULL;
+ struct dirjob *cjob = NULL;
+ struct stat statbuf = {0,};
+ char gfid_path[4096] = {0,};
+
+
+ plen = strlen (job->dirname) + 256 + 2;
+ path = alloca (plen);
+
+ tdbg ("Entering: %s\n", job->dirname);
+
+ dirp = opendir (job->dirname);
+ if (!dirp) {
+ terr ("opendir failed on %s (%s)\n", job->dirname,
+ strerror (errno));
+ goto out;
+ }
+
+ boff = sprintf (path, "%s/", job->dirname);
+
+ for (;;) {
+ ret = readdir_r (dirp, (struct dirent *)dbuf, &result);
+ if (ret) {
+ err ("readdir_r(%s): %s\n", job->dirname,
+ strerror (errno));
+ goto out;
+ }
+
+ if (!result) /* EOF */
+ break;
+
+ if (result->d_ino == 0)
+ continue;
+
+ if (skip_name (job->dirname, result->d_name))
+ continue;
+
+ /* It is sure that, children and grandchildren of .glusterfs
+ * are directories, just add them to global queue.
+ */
+ if (skip_stat (job, result->d_name)) {
+ strncpy (path + boff, result->d_name, (plen-boff));
+ cjob = dirjob_new (path, job);
+ if (!cjob) {
+ err ("dirjob_new(%s): %s\n",
+ path, strerror (errno));
+ ret = -1;
+ goto out;
+ }
+ xwork_addcrawl (xwork, cjob);
+ continue;
+ }
+
+ strcpy (gfid_path, slavemnt);
+ strcat (gfid_path, "/.gfid/");
+ strcat (gfid_path, result->d_name);
+ ret = lstat (gfid_path, &statbuf);
+
+ if (ret && errno == ENOENT) {
+ out ("%s\n", result->d_name);
+ BUMP (skipped_gfids);
+ }
+
+ if (ret && errno != ENOENT) {
+ err ("stat on slave failed(%s): %s\n",
+ gfid_path, strerror (errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (dirp)
+ closedir (dirp);
+
+ return ret;
+}
+
+
+void *
+xworker_crawl (void *data)
+{
+ struct xwork *xwork = data;
+ struct dirjob *job = NULL;
+ int ret = -1;
+
+ while ((job = xwork_pick (xwork, 0))) {
+ ret = xworker_do_crawl (xwork, job);
+ dirjob_ret (job, ret);
+ }
+
+ return NULL;
+}
+
+int
+xwork_fini (struct xwork *xwork, int stop)
+{
+ int i = 0;
+ int ret = 0;
+ void *tret = 0;
+
+ pthread_mutex_lock (&xwork->mutex);
+ {
+ xwork->stop = (xwork->stop || stop);
+ pthread_cond_broadcast (&xwork->cond);
+ }
+ pthread_mutex_unlock (&xwork->mutex);
+
+ for (i = 0; i < xwork->count; i++) {
+ pthread_join (xwork->cthreads[i], &tret);
+ tdbg ("CThread id %ld returned %p\n",
+ xwork->cthreads[i], tret);
+ }
+
+ if (debug) {
+ assert (xwork->rootjob->refcnt == 1);
+ dirjob_ret (xwork->rootjob, 0);
+ }
+
+ if (stats)
+ pthread_spin_destroy(&stats_lock);
+
+ return ret;
+}
+
+
+int
+xwork_init (struct xwork *xwork, int count)
+{
+ int i = 0;
+ int ret = 0;
+ struct dirjob *rootjob = NULL;
+
+ if (stats)
+ pthread_spin_init (&stats_lock, PTHREAD_PROCESS_PRIVATE);
+
+ pthread_mutex_init (&xwork->mutex, NULL);
+ pthread_cond_init (&xwork->cond, NULL);
+
+ INIT_LIST_HEAD (&xwork->crawl.list);
+
+ rootjob = dirjob_new (".glusterfs", NULL);
+ if (debug)
+ xwork->rootjob = dirjob_ref (rootjob);
+
+ xwork_addcrawl (xwork, rootjob);
+
+ xwork->count = count;
+ for (i = 0; i < count; i++) {
+ ret = pthread_create (&xwork->cthreads[i], NULL,
+ xworker_crawl, xwork);
+ if (ret)
+ break;
+ tdbg ("Spawned crawler %d thread %ld\n", i,
+ xwork->cthreads[i]);
+ }
+
+ return ret;
+}
+
+
+int
+xfind (const char *basedir)
+{
+ struct xwork xwork;
+ int ret = 0;
+ char *cwd = NULL;
+
+ ret = chdir (basedir);
+ if (ret) {
+ err ("%s: %s\n", basedir, strerror (errno));
+ return ret;
+ }
+
+ cwd = getcwd (0, 0);
+ if (!cwd) {
+ err ("getcwd(): %s\n", strerror (errno));
+ return -1;
+ }
+
+ tdbg ("Working directory: %s\n", cwd);
+ free (cwd);
+
+ memset (&xwork, 0, sizeof (xwork));
+
+ ret = xwork_init (&xwork, workers);
+ if (ret == 0)
+ xworker_crawl (&xwork);
+
+ ret = xwork_fini (&xwork, ret);
+ stats_dump ();
+
+ return ret;
+}
+
+static char *
+parse_and_validate_args (int argc, char *argv[])
+{
+ char *basedir = NULL;
+ struct stat d = {0, };
+ int ret = -1;
+#ifndef __FreeBSD__
+ unsigned char volume_id[16];
+#endif /* __FreeBSD__ */
+ char *slv_mnt = NULL;
+
+ if (argc != 4) {
+ err ("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n",
+ argv[0]);
+ return NULL;
+ }
+
+ basedir = argv[1];
+ ret = lstat (basedir, &d);
+ if (ret) {
+ err ("%s: %s\n", basedir, strerror (errno));
+ return NULL;
+ }
+
+#ifndef __FreeBSD__
+ ret = lgetxattr (basedir, "trusted.glusterfs.volume-id",
+ volume_id, 16);
+ if (ret != 16) {
+ err ("%s:Not a valid brick path.\n", basedir);
+ return NULL;
+ }
+#endif /* __FreeBSD__ */
+
+ slv_mnt = argv[2];
+ ret = lstat (slv_mnt, &d);
+ if (ret) {
+ err ("%s: %s\n", slv_mnt, strerror (errno));
+ return NULL;
+ }
+ slavemnt = argv[2];
+
+ workers = atoi(argv[3]);
+ if (workers <= 0)
+ workers = DEFAULT_WORKERS;
+
+ return basedir;
+}
+
+int
+main (int argc, char *argv[])
+{
+ char *basedir = NULL;
+
+ basedir = parse_and_validate_args (argc, argv);
+ if (!basedir)
+ return 1;
+
+ xfind (basedir);
+
+ return 0;
+}
diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py
new file mode 100644
index 00000000000..8362f68b955
--- /dev/null
+++ b/tools/gfind_missing_files/gfid_to_path.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+import os
+import xattr
+import uuid
+import re
+import errno
+
+CHANGELOG_SEARCH_MAX_TRY = 31
+DEC_CTIME_START = 5
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+MAX_NUM_CHANGELOGS_TRY = 2
+
+
+def output_not_found(gfid):
+ # Write GFID to stderr
+ sys.stderr.write("%s\n" % gfid)
+
+
+def output_success(path):
+ # Write converted Path to Stdout
+ sys.stdout.write("%s\n" % path)
+
+
+def full_dir_path(gfid):
+ out_path = ""
+ while True:
+ path = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ path_readlink = os.readlink(path)
+ pgfid = os.path.dirname(path_readlink)
+ out_path = os.path.join(os.path.basename(path_readlink), out_path)
+ if pgfid == "../../00/00/%s" % ROOT_GFID:
+ out_path = os.path.join("./", out_path)
+ break
+ gfid = os.path.basename(pgfid)
+ return out_path
+
+
+def find_path_from_changelog(fd, gfid):
+ """
+ In given Changelog File, finds using following pattern
+ <T><GFID>\x00<TYPE>\x00<MODE>\x00<UID>\x00<GID>\x00<PARGFID>/<BASENAME>
+ Pattern search finds PARGFID and BASENAME, Convert PARGFID to Path
+ Using readlink and add basename to form Full path.
+ """
+ content = fd.read()
+
+ pattern = "E%s" % gfid
+ pattern += "\x00(3|23)\x00\d+\x00\d+\x00\d+\x00([^\x00]+)/([^\x00]+)"
+ pat = re.compile(pattern)
+ match = pat.search(content)
+
+ if match:
+ pgfid = match.group(2)
+ basename = match.group(3)
+ if pgfid == ROOT_GFID:
+ return os.path.join("./", basename)
+ else:
+ full_path_parent = full_dir_path(pgfid)
+ if full_path_parent:
+ return os.path.join(full_path_parent, basename)
+
+ return None
+
+
+def gfid_to_path(gfid):
+ """
+ Try readlink, if it is directory it succeeds.
+ Get ctime of the GFID file, Decrement by 5 sec
+ Search for Changelog filename, Since Changelog file generated
+ every 15 sec, Search and get immediate next Changelog after the file
+ Creation. Get the Path by searching in Changelog file.
+ Get the resultant file's GFID and Compare with the input, If these
+ GFIDs are different then Some thing is changed(May be Rename)
+ """
+ gfid = gfid.strip()
+ gpath = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ try:
+ output_success(full_dir_path(gfid))
+ return
+ except OSError:
+ # Not an SymLink
+ pass
+
+ try:
+ ctime = int(os.stat(gpath).st_ctime)
+ ctime -= DEC_CTIME_START
+ except (OSError, IOError):
+ output_not_found(gfid)
+ return
+
+ path = None
+ found_changelog = False
+ changelog_parse_try = 0
+ for i in range(CHANGELOG_SEARCH_MAX_TRY):
+ cl = os.path.join(".glusterfs/changelogs", "CHANGELOG.%s" % ctime)
+
+ try:
+ with open(cl, "rb") as f:
+ changelog_parse_try += 1
+ found_changelog = True
+ path = find_path_from_changelog(f, gfid)
+ if not path and changelog_parse_try < MAX_NUM_CHANGELOGS_TRY:
+ ctime += 1
+ continue
+ break
+ except (IOError, OSError) as e:
+ if e.errno == errno.ENOENT:
+ ctime += 1
+ else:
+ break
+
+ if not found_changelog:
+ output_not_found(gfid)
+ return
+
+ if not path:
+ output_not_found(gfid)
+ return
+ gfid1 = str(uuid.UUID(bytes=xattr.get(path, "trusted.gfid")))
+ if gfid != gfid1:
+ output_not_found(gfid)
+ return
+
+ output_success(path)
+
+
+def main():
+ num_arguments = 3
+ if not sys.stdin.isatty():
+ num_arguments = 2
+
+ if len(sys.argv) != num_arguments:
+ sys.stderr.write("Invalid arguments\nUsage: "
+ "%s <BRICK_PATH> <GFID_FILE>\n" % sys.argv[0])
+ sys.exit(1)
+
+ path = sys.argv[1]
+
+ if sys.stdin.isatty():
+ gfid_list = os.path.abspath(sys.argv[2])
+ os.chdir(path)
+ with open(gfid_list) as f:
+ for gfid in f:
+ gfid_to_path(gfid)
+ else:
+ os.chdir(path)
+ for gfid in sys.stdin:
+ gfid_to_path(gfid)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/gfind_missing_files/gfid_to_path.sh b/tools/gfind_missing_files/gfid_to_path.sh
new file mode 100644
index 00000000000..20ac6a94fd2
--- /dev/null
+++ b/tools/gfind_missing_files/gfid_to_path.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+
+E_BADARGS=65
+
+
+function gfid_to_path()
+{
+ brick_dir=$1;
+ gfid_file=$(readlink -e $2);
+
+ current_dir=$(pwd);
+ cd $brick_dir;
+
+ while read gfid
+ do
+ to_search=`echo .glusterfs/${gfid:0:2}"/"${gfid:2:2}"/"$gfid`;
+ find . -samefile $to_search | grep -v $to_search;
+ done < $gfid_file;
+
+ cd $current_dir;
+}
+
+
+function main(){
+ if [ $# -ne 2 ]
+ then
+ echo "Usage: `basename $0` BRICK_DIR GFID_FILE";
+ exit $E_BADARGS;
+ fi
+
+ gfid_to_path $1 $2;
+}
+
+main "$@";
diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
new file mode 100644
index 00000000000..07d6befc958
--- /dev/null
+++ b/tools/gfind_missing_files/gfind_missing_files.sh
@@ -0,0 +1,119 @@
+#!/bin/sh
+
+## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+
+BRICKPATH= #Brick path of gluster volume
+SLAVEHOST= #Slave hostname
+SLAVEVOL= #Slave volume
+SLAVEMNT= #Slave gluster volume mount point
+WORKERS=4 #Default number of worker threads
+
+function out()
+{
+ echo "$@";
+}
+
+function fatal()
+{
+ out FATAL "$@";
+ exit 1
+}
+
+function ping_host ()
+{
+ ### Use bash internal socket support
+ {
+ exec 400<>/dev/tcp/$1/$2
+ if [ $? -ne '0' ]; then
+ return 1;
+ else
+ exec 400>&-
+ return 0;
+ fi
+ } 1>&2 2>/dev/null
+}
+
+function mount_slave()
+{
+ local i; # inode number
+ SSH_PORT=22
+
+ SLAVEMNT=`mktemp -d`
+ [ "x$SLAVEMNT" = "x" ] && fatal "Could not mktemp directory";
+ [ -d "$SLAVEMNT" ] || fatal "$SLAVEMNT not a directory";
+
+ ping_host ${SLAVEHOST} $SSH_PORT
+ if [ $? -ne 0 ]; then
+ echo "$SLAVEHOST not reachable.";
+ exit 1;
+ fi;
+
+ glusterfs --volfile-id=$SLAVEVOL --aux-gfid-mount --volfile-server=$SLAVEHOST $SLAVEMNT;
+ i=$(stat -c '%i' $SLAVEMNT);
+ [ "x$i" = "x1" ] || fatal "Could not mount volume $2 on $SLAVEMNT Please check host and volume exists";
+}
+
+function parse_cli()
+{
+ if [[ $# -ne 4 ]]; then
+ echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
+ exit 1
+ else
+ BRICKPATH=$1;
+ SLAVEHOST=$2;
+ SLAVEVOL=$3;
+ OUTFILE=$4;
+
+ mount_slave;
+ echo "Slave volume is mounted at ${SLAVEMNT}"
+ echo
+ fi
+}
+
+function main()
+{
+ parse_cli "$@";
+
+ echo "Calling crawler...";
+ path=$(readlink -e $0)
+ $(dirname $path)/gcrawler ${BRICKPATH} ${SLAVEMNT} ${WORKERS} > ${OUTFILE}
+
+ #Clean up the mount
+ umount $SLAVEMNT;
+ rmdir $SLAVEMNT;
+
+ echo "Crawl Complete."
+ num_files_missing=$(wc -l ${OUTFILE} | awk '{print $1}')
+ if [ $num_files_missing -eq 0 ]
+ then
+ echo "Total Missing File Count : 0"
+ exit 0;
+ fi
+
+ echo "gfids of skipped files are available in the file ${OUTFILE}"
+ echo
+ echo "Starting gfid to path conversion"
+
+ #Call python script to convert gfids to full pathname
+ INFILE=$(readlink -e ${OUTFILE})
+ python $(dirname $path)/gfid_to_path.py ${BRICKPATH} ${INFILE} 1> ${OUTFILE}_pathnames 2> ${OUTFILE}_gfids
+ echo "Path names of skipped files are available in the file ${OUTFILE}_pathnames"
+
+ gfid_to_path_failures=$(wc -l ${OUTFILE}_gfids | awk '{print $1}')
+ if [ $gfid_to_path_failures -gt 0 ]
+ then
+ echo "WARNING: Unable to convert some GFIDs to Paths, GFIDs logged to ${OUTFILE}_gfids"
+ echo "Use $(dirname $path)/gfid_to_path.sh <brick-path> ${OUTFILE}_gfids to convert those GFIDs to Path"
+ fi
+
+ #Output
+ echo "Total Missing File Count : $(wc -l ${OUTFILE} | awk '{print $1}')"
+}
+
+main "$@";