diff options
Diffstat (limited to 'tools/gfind_missing_files/gcrawler.c')
| -rw-r--r-- | tools/gfind_missing_files/gcrawler.c | 581 |
1 files changed, 581 insertions, 0 deletions
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c new file mode 100644 index 00000000000..4acbe92bc8f --- /dev/null +++ b/tools/gfind_missing_files/gcrawler.c @@ -0,0 +1,581 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <errno.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <dirent.h> +#include <assert.h> +#include <glusterfs/locking.h> + +#include <glusterfs/compat.h> +#include <glusterfs/list.h> +#include <glusterfs/syscall.h> + +#define THREAD_MAX 32 +#define BUMP(name) INC(name, 1) +#define DEFAULT_WORKERS 4 + +#define NEW(x) \ + { \ + x = calloc(1, sizeof(typeof(*x))); \ + } + +#define err(x...) fprintf(stderr, x) +#define out(x...) fprintf(stdout, x) +#define dbg(x...) \ + do { \ + if (debug) \ + fprintf(stdout, x); \ + } while (0) +#define tout(x...) \ + do { \ + out("[%ld] ", pthread_self()); \ + out(x); \ + } while (0) +#define terr(x...) \ + do { \ + err("[%ld] ", pthread_self()); \ + err(x); \ + } while (0) +#define tdbg(x...) \ + do { \ + dbg("[%ld] ", pthread_self()); \ + dbg(x); \ + } while (0) + +int debug = 0; +const char *slavemnt = NULL; +int workers = 0; + +struct stats { + unsigned long long int cnt_skipped_gfids; +}; + +pthread_spinlock_t stats_lock; + +struct stats stats_total; +int stats = 0; + +#define INC(name, val) \ + do { \ + if (!stats) \ + break; \ + pthread_spin_lock(&stats_lock); \ + { \ + stats_total.cnt_##name += val; \ + } \ + pthread_spin_unlock(&stats_lock); \ + } while (0) + +void +stats_dump() +{ + if (!stats) + return; + + out("-------------------------------------------\n"); + out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids); + out("-------------------------------------------\n"); +} + +struct dirjob { + struct list_head list; + + char *dirname; + + struct dirjob *parent; + int ret; /* final status of this subtree */ + int refcnt; /* how many dirjobs have this as parent */ + + pthread_spinlock_t lock; +}; + +struct xwork { + pthread_t cthreads[THREAD_MAX]; /* crawler threads */ + int count; + int idle; + int stop; + + struct dirjob crawl; + + struct dirjob *rootjob; /* to verify completion in xwork_fini() */ + + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +struct dirjob * +dirjob_ref(struct dirjob *job) +{ + pthread_spin_lock(&job->lock); + { + job->refcnt++; + } + pthread_spin_unlock(&job->lock); + + return job; +} + +void +dirjob_free(struct dirjob *job) +{ + assert(list_empty(&job->list)); + + pthread_spin_destroy(&job->lock); + free(job->dirname); + free(job); +} + +void +dirjob_ret(struct dirjob *job, int err) +{ + int ret = 0; + int refcnt = 0; + struct dirjob *parent = NULL; + + pthread_spin_lock(&job->lock); + { + refcnt = --job->refcnt; + job->ret = (job->ret || err); + } + pthread_spin_unlock(&job->lock); + + if (refcnt == 0) { + ret = job->ret; + + if (ret) + terr("Failed: %s (%d)\n", job->dirname, ret); + else + tdbg("Finished: %s\n", job->dirname); + + parent = job->parent; + if (parent) + dirjob_ret(parent, ret); + + dirjob_free(job); + job = NULL; + } +} + +struct dirjob * +dirjob_new(const char *dir, struct dirjob *parent) +{ + struct dirjob *job = NULL; + + NEW(job); + if (!job) + return NULL; + + job->dirname = strdup(dir); + if (!job->dirname) { + free(job); + return NULL; + } + + INIT_LIST_HEAD(&job->list); + pthread_spin_init(&job->lock, PTHREAD_PROCESS_PRIVATE); + job->ret = 0; + + if (parent) + job->parent = dirjob_ref(parent); + + job->refcnt = 1; + + return job; +} + +void +xwork_addcrawl(struct xwork *xwork, struct dirjob *job) +{ + pthread_mutex_lock(&xwork->mutex); + { + list_add_tail(&job->list, &xwork->crawl.list); + pthread_cond_broadcast(&xwork->cond); + } + pthread_mutex_unlock(&xwork->mutex); +} + +int +xwork_add(struct xwork *xwork, const char *dir, struct dirjob *parent) +{ + struct dirjob *job = NULL; + + job = dirjob_new(dir, parent); + if (!job) + return -1; + + xwork_addcrawl(xwork, job); + + return 0; +} + +struct dirjob * +xwork_pick(struct xwork *xwork, int block) +{ + struct dirjob *job = NULL; + struct list_head *head = NULL; + + head = &xwork->crawl.list; + + pthread_mutex_lock(&xwork->mutex); + { + for (;;) { + if (xwork->stop) + break; + + if (!list_empty(head)) { + job = list_entry(head->next, typeof(*job), list); + list_del_init(&job->list); + break; + } + + if (((xwork->count * 2) == xwork->idle) && + list_empty(&xwork->crawl.list)) { + /* no outstanding jobs, and no + active workers + */ + tdbg("Jobless. Terminating\n"); + xwork->stop = 1; + pthread_cond_broadcast(&xwork->cond); + break; + } + + if (!block) + break; + + xwork->idle++; + pthread_cond_wait(&xwork->cond, &xwork->mutex); + xwork->idle--; + } + } + pthread_mutex_unlock(&xwork->mutex); + + return job; +} + +int +skip_name(const char *dirname, const char *name) +{ + if (strcmp(name, ".") == 0) + return 1; + + if (strcmp(name, "..") == 0) + return 1; + + if (strcmp(name, "changelogs") == 0) + return 1; + + if (strcmp(name, "health_check") == 0) + return 1; + + if (strcmp(name, "indices") == 0) + return 1; + + if (strcmp(name, "landfill") == 0) + return 1; + + return 0; +} + +int +skip_stat(struct dirjob *job, const char *name) +{ + if (job == NULL) + return 0; + + if (strcmp(job->dirname, ".glusterfs") == 0) { + tdbg( + "Directly adding directories under .glusterfs " + "to global list: %s\n", + name); + return 1; + } + + if (job->parent != NULL) { + if (strcmp(job->parent->dirname, ".glusterfs") == 0) { + tdbg( + "Directly adding directories under .glusterfs/XX " + "to global list: %s\n", + name); + return 1; + } + } + + return 0; +} + +int +xworker_do_crawl(struct xwork *xwork, struct dirjob *job) +{ + DIR *dirp = NULL; + int ret = -1; + int boff; + int plen; + char *path = NULL; + struct dirjob *cjob = NULL; + struct stat statbuf = { + 0, + }; + struct dirent *entry; + struct dirent scratch[2] = { + { + 0, + }, + }; + char gfid_path[PATH_MAX] = { + 0, + }; + + plen = strlen(job->dirname) + 256 + 2; + path = alloca(plen); + + tdbg("Entering: %s\n", job->dirname); + + dirp = sys_opendir(job->dirname); + if (!dirp) { + terr("opendir failed on %s (%s)\n", job->dirname, strerror(errno)); + goto out; + } + + boff = sprintf(path, "%s/", job->dirname); + + for (;;) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) { + if (errno != 0) { + err("readdir(%s): %s\n", job->dirname, strerror(errno)); + ret = errno; + goto out; + } + break; + } + + if (entry->d_ino == 0) + continue; + + if (skip_name(job->dirname, entry->d_name)) + continue; + + /* It is sure that, children and grandchildren of .glusterfs + * are directories, just add them to global queue. + */ + if (skip_stat(job, entry->d_name)) { + strncpy(path + boff, entry->d_name, (plen - boff)); + cjob = dirjob_new(path, job); + if (!cjob) { + err("dirjob_new(%s): %s\n", path, strerror(errno)); + ret = -1; + goto out; + } + xwork_addcrawl(xwork, cjob); + continue; + } + + (void)snprintf(gfid_path, sizeof(gfid_path), "%s/.gfid/%s", slavemnt, + entry->d_name); + ret = sys_lstat(gfid_path, &statbuf); + + if (ret && errno == ENOENT) { + out("%s\n", entry->d_name); + BUMP(skipped_gfids); + } + + if (ret && errno != ENOENT) { + err("stat on slave failed(%s): %s\n", gfid_path, strerror(errno)); + goto out; + } + } + + ret = 0; +out: + if (dirp) + (void)sys_closedir(dirp); + + return ret; +} + +void * +xworker_crawl(void *data) +{ + struct xwork *xwork = data; + struct dirjob *job = NULL; + int ret = -1; + + while ((job = xwork_pick(xwork, 0))) { + ret = xworker_do_crawl(xwork, job); + dirjob_ret(job, ret); + } + + return NULL; +} + +int +xwork_fini(struct xwork *xwork, int stop) +{ + int i = 0; + int ret = 0; + void *tret = 0; + + pthread_mutex_lock(&xwork->mutex); + { + xwork->stop = (xwork->stop || stop); + pthread_cond_broadcast(&xwork->cond); + } + pthread_mutex_unlock(&xwork->mutex); + + for (i = 0; i < xwork->count; i++) { + pthread_join(xwork->cthreads[i], &tret); + tdbg("CThread id %ld returned %p\n", xwork->cthreads[i], tret); + } + + if (debug) { + assert(xwork->rootjob->refcnt == 1); + dirjob_ret(xwork->rootjob, 0); + } + + if (stats) + pthread_spin_destroy(&stats_lock); + + return ret; +} + +int +xwork_init(struct xwork *xwork, int count) +{ + int i = 0; + int ret = 0; + struct dirjob *rootjob = NULL; + + if (stats) + pthread_spin_init(&stats_lock, PTHREAD_PROCESS_PRIVATE); + + pthread_mutex_init(&xwork->mutex, NULL); + pthread_cond_init(&xwork->cond, NULL); + + INIT_LIST_HEAD(&xwork->crawl.list); + + rootjob = dirjob_new(".glusterfs", NULL); + if (debug) + xwork->rootjob = dirjob_ref(rootjob); + + xwork_addcrawl(xwork, rootjob); + + xwork->count = count; + for (i = 0; i < count; i++) { + ret = pthread_create(&xwork->cthreads[i], NULL, xworker_crawl, xwork); + if (ret) + break; + tdbg("Spawned crawler %d thread %ld\n", i, xwork->cthreads[i]); + } + + return ret; +} + +int +xfind(const char *basedir) +{ + struct xwork xwork; + int ret = 0; + char *cwd = NULL; + + ret = chdir(basedir); + if (ret) { + err("%s: %s\n", basedir, strerror(errno)); + return ret; + } + + cwd = getcwd(0, 0); + if (!cwd) { + err("getcwd(): %s\n", strerror(errno)); + return -1; + } + + tdbg("Working directory: %s\n", cwd); + free(cwd); + + memset(&xwork, 0, sizeof(xwork)); + + ret = xwork_init(&xwork, workers); + if (ret == 0) + xworker_crawl(&xwork); + + ret = xwork_fini(&xwork, ret); + stats_dump(); + + return ret; +} + +static char * +parse_and_validate_args(int argc, char *argv[]) +{ + char *basedir = NULL; + struct stat d = { + 0, + }; + int ret = -1; +#ifndef __FreeBSD__ + unsigned char volume_id[16]; +#endif /* __FreeBSD__ */ + char *slv_mnt = NULL; + + if (argc != 4) { + err("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n", + argv[0]); + return NULL; + } + + basedir = argv[1]; + ret = sys_lstat(basedir, &d); + if (ret) { + err("%s: %s\n", basedir, strerror(errno)); + return NULL; + } + +#ifndef __FreeBSD__ + ret = sys_lgetxattr(basedir, "trusted.glusterfs.volume-id", volume_id, 16); + if (ret != 16) { + err("%s:Not a valid brick path.\n", basedir); + return NULL; + } +#endif /* __FreeBSD__ */ + + slv_mnt = argv[2]; + ret = sys_lstat(slv_mnt, &d); + if (ret) { + err("%s: %s\n", slv_mnt, strerror(errno)); + return NULL; + } + slavemnt = argv[2]; + + workers = atoi(argv[3]); + if (workers <= 0) + workers = DEFAULT_WORKERS; + + return basedir; +} + +int +main(int argc, char *argv[]) +{ + char *basedir = NULL; + + basedir = parse_and_validate_args(argc, argv); + if (!basedir) + return 1; + + xfind(basedir); + + return 0; +} |
