diff options
Diffstat (limited to 'xlators/features/index')
| -rw-r--r-- | xlators/features/index/Makefile.am | 3 | ||||
| -rw-r--r-- | xlators/features/index/src/Makefile.am | 19 | ||||
| -rw-r--r-- | xlators/features/index/src/index-mem-types.h | 23 | ||||
| -rw-r--r-- | xlators/features/index/src/index-messages.h | 33 | ||||
| -rw-r--r-- | xlators/features/index/src/index.c | 2682 | ||||
| -rw-r--r-- | xlators/features/index/src/index.h | 86 |
6 files changed, 2846 insertions, 0 deletions
diff --git a/xlators/features/index/Makefile.am b/xlators/features/index/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/index/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am new file mode 100644 index 00000000000..c71c238c163 --- /dev/null +++ b/xlators/features/index/src/Makefile.am @@ -0,0 +1,19 @@ +if WITH_SERVER +xlator_LTLIBRARIES = index.la +endif +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +index_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +index_la_SOURCES = index.c +index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = index.h index-mem-types.h index-messages.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h new file mode 100644 index 00000000000..58833d0ec9b --- /dev/null +++ b/xlators/features/index/src/index-mem-types.h @@ -0,0 +1,23 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __INDEX_MEM_TYPES_H__ +#define __INDEX_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> + +enum gf_index_mem_types_ { + gf_index_mt_priv_t = gf_common_mt_end + 1, + gf_index_inode_ctx_t, + gf_index_fd_ctx_t, + gf_index_mt_local_t, + gf_index_mt_end +}; +#endif diff --git a/xlators/features/index/src/index-messages.h b/xlators/features/index/src/index-messages.h new file mode 100644 index 00000000000..364f17cd34e --- /dev/null +++ b/xlators/features/index/src/index-messages.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _INDEX_MESSAGES_H_ +#define _INDEX_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(INDEX, INDEX_MSG_INDEX_DIR_CREATE_FAILED, + INDEX_MSG_INDEX_READDIR_FAILED, INDEX_MSG_INDEX_ADD_FAILED, + INDEX_MSG_INDEX_DEL_FAILED, INDEX_MSG_DICT_SET_FAILED, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, INDEX_MSG_INVALID_ARGS, + INDEX_MSG_FD_OP_FAILED, INDEX_MSG_WORKER_THREAD_CREATE_FAILED, + INDEX_MSG_INVALID_GRAPH); + +#endif /* !_INDEX_MESSAGES_H_ */ diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c new file mode 100644 index 00000000000..4abb2c73ce5 --- /dev/null +++ b/xlators/features/index/src/index.c @@ -0,0 +1,2682 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include "index.h" +#include <glusterfs/options.h> +#include "glusterfs3-xdr.h" +#include <glusterfs/syscall.h> +#include <glusterfs/syncop.h> +#include <glusterfs/common-utils.h> +#include "index-messages.h" +#include <ftw.h> +#include <libgen.h> /* for dirname() */ +#include <signal.h> + +#define XATTROP_SUBDIR "xattrop" +#define DIRTY_SUBDIR "dirty" +#define ENTRY_CHANGES_SUBDIR "entry-changes" + +struct index_syncop_args { + inode_t *parent; + gf_dirent_t *entries; + char *path; +}; + +static char *index_vgfid_xattrs[XATTROP_TYPE_END] = { + [XATTROP] = GF_XATTROP_INDEX_GFID, + [DIRTY] = GF_XATTROP_DIRTY_GFID, + [ENTRY_CHANGES] = GF_XATTROP_ENTRY_CHANGES_GFID}; + +static char *index_subdirs[XATTROP_TYPE_END] = { + [XATTROP] = XATTROP_SUBDIR, + [DIRTY] = DIRTY_SUBDIR, + [ENTRY_CHANGES] = ENTRY_CHANGES_SUBDIR}; + +int +index_get_type_from_vgfid(index_priv_t *priv, uuid_t vgfid) +{ + int i = 0; + + for (i = 0; i < XATTROP_TYPE_END; i++) { + if (gf_uuid_compare(priv->internal_vgfid[i], vgfid) == 0) + return i; + } + return -1; +} + +gf_boolean_t +index_is_virtual_gfid(index_priv_t *priv, uuid_t vgfid) +{ + if (index_get_type_from_vgfid(priv, vgfid) < 0) + return _gf_false; + return _gf_true; +} + +static int +__index_inode_ctx_get(inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) +{ + int ret = 0; + index_inode_ctx_t *ictx = NULL; + uint64_t tmpctx = 0; + + ret = __inode_ctx_get(inode, this, &tmpctx); + if (!ret) { + ictx = (index_inode_ctx_t *)(long)tmpctx; + goto out; + } + ictx = GF_CALLOC(1, sizeof(*ictx), gf_index_inode_ctx_t); + if (!ictx) { + ret = -1; + goto out; + } + + INIT_LIST_HEAD(&ictx->callstubs); + ret = __inode_ctx_put(inode, this, (uint64_t)(uintptr_t)ictx); + if (ret) { + GF_FREE(ictx); + ictx = NULL; + goto out; + } +out: + if (ictx) + *ctx = ictx; + return ret; +} + +static int +index_inode_ctx_get(inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) +{ + int ret = 0; + + LOCK(&inode->lock); + { + ret = __index_inode_ctx_get(inode, this, ctx); + } + UNLOCK(&inode->lock); + + return ret; +} + +static gf_boolean_t +index_is_subdir_of_entry_changes(xlator_t *this, inode_t *inode) +{ + index_inode_ctx_t *ctx = NULL; + int ret = 0; + + if (!inode) + return _gf_false; + + ret = index_inode_ctx_get(inode, this, &ctx); + if ((ret == 0) && !gf_uuid_is_null(ctx->virtual_pargfid)) + return _gf_true; + return _gf_false; +} + +static int +index_get_type_from_vgfid_xattr(const char *name) +{ + int i = 0; + + for (i = 0; i < XATTROP_TYPE_END; i++) { + if (strcmp(name, index_vgfid_xattrs[i]) == 0) + return i; + } + return -1; +} + +gf_boolean_t +index_is_fop_on_internal_inode(xlator_t *this, inode_t *inode, uuid_t gfid) +{ + index_priv_t *priv = this->private; + uuid_t vgfid = {0}; + + if (!inode) + return _gf_false; + + if (gfid && !gf_uuid_is_null(gfid)) + gf_uuid_copy(vgfid, gfid); + else + gf_uuid_copy(vgfid, inode->gfid); + + if (index_is_virtual_gfid(priv, vgfid)) + return _gf_true; + if (index_is_subdir_of_entry_changes(this, inode)) + return _gf_true; + return _gf_false; +} + +static gf_boolean_t +index_is_vgfid_xattr(const char *name) +{ + if (index_get_type_from_vgfid_xattr(name) < 0) + return _gf_false; + return _gf_true; +} + +call_stub_t * +__index_dequeue(struct list_head *callstubs) +{ + call_stub_t *stub = NULL; + + if (!list_empty(callstubs)) { + stub = list_entry(callstubs->next, call_stub_t, list); + list_del_init(&stub->list); + } + + return stub; +} + +static void +__index_enqueue(struct list_head *callstubs, call_stub_t *stub) +{ + list_add_tail(&stub->list, callstubs); +} + +static void +worker_enqueue(xlator_t *this, call_stub_t *stub) +{ + index_priv_t *priv = NULL; + + priv = this->private; + pthread_mutex_lock(&priv->mutex); + { + __index_enqueue(&priv->callstubs, stub); + GF_ATOMIC_INC(priv->stub_cnt); + pthread_cond_signal(&priv->cond); + } + pthread_mutex_unlock(&priv->mutex); +} + +void * +index_worker(void *data) +{ + index_priv_t *priv = NULL; + xlator_t *this = NULL; + call_stub_t *stub = NULL; + gf_boolean_t bye = _gf_false; + + THIS = data; + this = data; + priv = this->private; + + for (;;) { + pthread_mutex_lock(&priv->mutex); + { + while (list_empty(&priv->callstubs)) { + if (priv->down) { + bye = _gf_true; /*Avoid wait*/ + break; + } + (void)pthread_cond_wait(&priv->cond, &priv->mutex); + if (priv->down) { + bye = _gf_true; + break; + } + } + if (!bye) + stub = __index_dequeue(&priv->callstubs); + if (bye) { + priv->curr_count--; + if (priv->curr_count == 0) + pthread_cond_broadcast(&priv->cond); + } + } + pthread_mutex_unlock(&priv->mutex); + + if (stub) { /* guard against spurious wakeups */ + call_resume(stub); + GF_ATOMIC_DEC(priv->stub_cnt); + } + stub = NULL; + if (bye) + break; + } + + return NULL; +} + +static void +make_index_dir_path(char *base, const char *subdir, char *index_dir, size_t len) +{ + snprintf(index_dir, len, "%s/%s", base, subdir); +} + +int +index_dir_create(xlator_t *this, const char *subdir) +{ + int ret = 0; + struct stat st = {0}; + char fullpath[PATH_MAX] = {0}; + char path[PATH_MAX] = {0}; + char *dir = NULL; + index_priv_t *priv = NULL; + size_t len = 0; + size_t pathlen = 0; + + priv = this->private; + make_index_dir_path(priv->index_basepath, subdir, fullpath, + sizeof(fullpath)); + ret = sys_stat(fullpath, &st); + if (!ret) { + if (!S_ISDIR(st.st_mode)) + ret = -2; + goto out; + } + + pathlen = strlen(fullpath); + if ((pathlen > 1) && fullpath[pathlen - 1] == '/') + fullpath[pathlen - 1] = '\0'; + dir = strchr(fullpath, '/'); + while (dir) { + dir = strchr(dir + 1, '/'); + if (dir) + len = pathlen - strlen(dir); + else + len = pathlen; + strncpy(path, fullpath, len); + path[len] = '\0'; + ret = sys_mkdir(path, 0600); + if (ret && (errno != EEXIST)) + goto out; + } + ret = 0; +out: + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_DIR_CREATE_FAILED, + "%s/%s: Failed to " + "create", + priv->index_basepath, subdir); + } else if (ret == -2) { + gf_msg(this->name, GF_LOG_ERROR, ENOTDIR, + INDEX_MSG_INDEX_DIR_CREATE_FAILED, + "%s/%s: Failed to " + "create, path exists, not a directory ", + priv->index_basepath, subdir); + } + return ret; +} + +void +index_get_index(index_priv_t *priv, uuid_t index) +{ + LOCK(&priv->lock); + { + gf_uuid_copy(index, priv->index); + } + UNLOCK(&priv->lock); +} + +void +index_generate_index(index_priv_t *priv, uuid_t index) +{ + LOCK(&priv->lock); + { + // To prevent duplicate generates. + // This method fails if number of contending threads is greater + // than MAX_LINK count of the fs + if (!gf_uuid_compare(priv->index, index)) + gf_uuid_generate(priv->index); + gf_uuid_copy(index, priv->index); + } + UNLOCK(&priv->lock); +} + +static void +make_index_path(char *base, const char *subdir, uuid_t index, char *index_path, + size_t len) +{ + make_index_dir_path(base, subdir, index_path, len); + snprintf(index_path + strlen(index_path), len - strlen(index_path), + "/%s-%s", subdir, uuid_utoa(index)); +} + +static void +make_gfid_path(char *base, const char *subdir, uuid_t gfid, char *gfid_path, + size_t len) +{ + make_index_dir_path(base, subdir, gfid_path, len); + snprintf(gfid_path + strlen(gfid_path), len - strlen(gfid_path), "/%s", + uuid_utoa(gfid)); +} + +static void +make_file_path(char *base, const char *subdir, const char *filename, + char *file_path, size_t len) +{ + make_index_dir_path(base, subdir, file_path, len); + snprintf(file_path + strlen(file_path), len - strlen(file_path), "/%s", + filename); +} + +static int +is_index_file_current(char *filename, uuid_t priv_index, char *subdir) +{ + char current_index[GF_UUID_BUF_SIZE + 16] = { + 0, + }; + + snprintf(current_index, sizeof current_index, "%s-%s", subdir, + uuid_utoa(priv_index)); + return (!strcmp(filename, current_index)); +} + +static void +check_delete_stale_index_file(xlator_t *this, char *filename, char *subdir) +{ + int ret = 0; + struct stat st = {0}; + char filepath[PATH_MAX] = {0}; + index_priv_t *priv = NULL; + + priv = this->private; + + if (is_index_file_current(filename, priv->index, subdir)) + return; + + make_file_path(priv->index_basepath, subdir, filename, filepath, + sizeof(filepath)); + ret = sys_stat(filepath, &st); + if (!ret && st.st_nlink == 1) + sys_unlink(filepath); +} + +static void +index_set_link_count(index_priv_t *priv, int64_t count, + index_xattrop_type_t type) +{ + switch (type) { + case XATTROP: + LOCK(&priv->lock); + { + priv->pending_count = count; + } + UNLOCK(&priv->lock); + break; + default: + break; + } +} + +static void +index_get_link_count(index_priv_t *priv, int64_t *count, + index_xattrop_type_t type) +{ + switch (type) { + case XATTROP: + LOCK(&priv->lock); + { + *count = priv->pending_count; + } + UNLOCK(&priv->lock); + break; + default: + break; + } +} + +static void +index_dec_link_count(index_priv_t *priv, index_xattrop_type_t type) +{ + switch (type) { + case XATTROP: + LOCK(&priv->lock); + { + priv->pending_count--; + if (priv->pending_count == 0) + priv->pending_count--; + } + UNLOCK(&priv->lock); + break; + default: + break; + } +} + +char * +index_get_subdir_from_type(index_xattrop_type_t type) +{ + if (type < XATTROP || type >= XATTROP_TYPE_END) + return NULL; + return index_subdirs[type]; +} + +char * +index_get_subdir_from_vgfid(index_priv_t *priv, uuid_t vgfid) +{ + return index_get_subdir_from_type(index_get_type_from_vgfid(priv, vgfid)); +} + +static int +index_fill_readdir(fd_t *fd, index_fd_ctx_t *fctx, DIR *dir, off_t off, + size_t size, gf_dirent_t *entries) +{ + off_t in_case = -1; + off_t last_off = 0; + size_t filled = 0; + int count = 0; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + xlator_t *this = NULL; + + this = THIS; + if (!off) { + rewinddir(dir); + } else { + seekdir(dir, off); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != off && off != fctx->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INDEX_READDIR_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + off, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + } + + while (filled <= size) { + in_case = (u_long)telldir(dir); + + if (in_case == -1) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_READDIR_FAILED, "telldir failed on dir=%p", + dir); + goto out; + } + + errno = 0; + entry = sys_readdir(dir, scratch); + if (!entry || errno != 0) { + if (errno == EBADF) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, + INDEX_MSG_INDEX_READDIR_FAILED, + "readdir failed on dir=%p", dir); + goto out; + } + break; + } + + if (!strncmp(entry->d_name, XATTROP_SUBDIR "-", + strlen(XATTROP_SUBDIR "-"))) { + check_delete_stale_index_file(this, entry->d_name, XATTROP_SUBDIR); + continue; + } else if (!strncmp(entry->d_name, DIRTY_SUBDIR "-", + strlen(DIRTY_SUBDIR "-"))) { + check_delete_stale_index_file(this, entry->d_name, DIRTY_SUBDIR); + continue; + } + + this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) + + strlen(entry->d_name) + 1; + + if (this_size + filled > size) { + seekdir(dir, in_case); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != in_case && in_case != fctx->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INDEX_READDIR_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + in_case, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + break; + } + + this_entry = gf_dirent_for_name(entry->d_name); + + if (!this_entry) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_READDIR_FAILED, + "could not create gf_dirent for entry %s", entry->d_name); + goto out; + } + /* + * we store the offset of next entry here, which is + * probably not intended, but code using syncop_readdir() + * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it + * for directory read resumption. + */ + last_off = (u_long)telldir(dir); + this_entry->d_off = last_off; + this_entry->d_ino = entry->d_ino; + + list_add_tail(&this_entry->list, &entries->list); + + filled += this_size; + count++; + } + + errno = 0; + + if ((!sys_readdir(dir, scratch) && (errno == 0))) { + /* Indicate EOF */ + errno = ENOENT; + /* Remember EOF offset for later detection */ + fctx->dir_eof = last_off; + } +out: + return count; +} + +int +index_link_to_base(xlator_t *this, char *fpath, const char *subdir) +{ + int ret = 0; + int fd = 0; + int op_errno = 0; + uuid_t index = {0}; + index_priv_t *priv = this->private; + char base[PATH_MAX] = {0}; + + index_get_index(priv, index); + make_index_path(priv->index_basepath, subdir, index, base, sizeof(base)); + + ret = sys_link(base, fpath); + if (!ret || (errno == EEXIST)) { + ret = 0; + goto out; + } + + op_errno = errno; + if (op_errno == ENOENT) { + ret = index_dir_create(this, subdir); + if (ret) { + op_errno = errno; + goto out; + } + } else if (op_errno == EMLINK) { + index_generate_index(priv, index); + make_index_path(priv->index_basepath, subdir, index, base, + sizeof(base)); + } else { + goto out; + } + + op_errno = 0; + fd = sys_creat(base, 0); + if ((fd < 0) && (errno != EEXIST)) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, INDEX_MSG_INDEX_ADD_FAILED, + "%s: Not able to " + "create index", + fpath); + goto out; + } + + if (fd >= 0) + sys_close(fd); + + ret = sys_link(base, fpath); + if (ret && (errno != EEXIST)) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_INDEX_ADD_FAILED, + "%s: Not able to " + "add to index", + fpath); + goto out; + } +out: + return -op_errno; +} + +int +index_add(xlator_t *this, uuid_t gfid, const char *subdir, + index_xattrop_type_t type) +{ + char gfid_path[PATH_MAX] = {0}; + int ret = -1; + index_priv_t *priv = NULL; + struct stat st = {0}; + + priv = this->private; + + if (gf_uuid_is_null(gfid)) { + GF_ASSERT(0); + goto out; + } + + make_gfid_path(priv->index_basepath, subdir, gfid, gfid_path, + sizeof(gfid_path)); + + ret = sys_stat(gfid_path, &st); + if (!ret) + goto out; + ret = index_link_to_base(this, gfid_path, subdir); +out: + return ret; +} + +int +index_del(xlator_t *this, uuid_t gfid, const char *subdir, int type) +{ + int32_t op_errno __attribute__((unused)) = 0; + index_priv_t *priv = NULL; + int ret = 0; + char gfid_path[PATH_MAX] = {0}; + char rename_dst[PATH_MAX] = { + 0, + }; + uuid_t uuid; + + priv = this->private; + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out, + op_errno, EINVAL); + make_gfid_path(priv->index_basepath, subdir, gfid, gfid_path, + sizeof(gfid_path)); + + if ((strcmp(subdir, ENTRY_CHANGES_SUBDIR)) == 0) { + ret = sys_rmdir(gfid_path); + /* rmdir above could fail with ENOTEMPTY if the indices under + * it were created when granular-entry-heal was enabled, whereas + * the actual heal that happened was non-granular (or full) in + * nature, resulting in name indices getting left out. To + * clean up this directory without it affecting the IO path perf, + * the directory is renamed to a unique name under + * indices/entry-changes. Self-heal will pick up this entry + * during crawl and on lookup into the file system figure that + * the index is stale and subsequently wipe it out using rmdir(). + */ + if ((ret) && (errno == ENOTEMPTY)) { + gf_uuid_generate(uuid); + make_gfid_path(priv->index_basepath, subdir, uuid, rename_dst, + sizeof(rename_dst)); + ret = sys_rename(gfid_path, rename_dst); + } + } else { + ret = sys_unlink(gfid_path); + } + + if (ret && (errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_INDEX_DEL_FAILED, + "%s: failed to delete" + " from index", + gfid_path); + ret = -errno; + goto out; + } + + index_dec_link_count(priv, type); + ret = 0; +out: + return ret; +} + +static gf_boolean_t +_is_xattr_in_watchlist(dict_t *d, char *k, data_t *v, void *tmp) +{ + if (!strncmp(k, tmp, strlen(k))) + return _gf_true; + + return _gf_false; +} + +static gf_boolean_t +is_xattr_in_watchlist(dict_t *this, char *key, data_t *value, void *matchdata) +{ + int ret = -1; + + // matchdata is a list of xattrs + // key is strncmp'ed with each xattr in matchdata. + // ret will be 0 if key pattern is not present in the matchdata + // else ret will be count number of xattrs the key pattern-matches with. + ret = dict_foreach_match(matchdata, _is_xattr_in_watchlist, key, + dict_null_foreach_fn, NULL); + + if (ret > 0) + return _gf_true; + return _gf_false; +} + +static int +index_find_xattr_type(dict_t *d, char *k, data_t *v) +{ + int idx = -1; + index_priv_t *priv = THIS->private; + + if (priv->dirty_watchlist && + is_xattr_in_watchlist(d, k, v, priv->dirty_watchlist)) + idx = DIRTY; + else if (priv->pending_watchlist && + is_xattr_in_watchlist(d, k, v, priv->pending_watchlist)) + idx = XATTROP; + + return idx; +} + +int +index_fill_zero_array(dict_t *d, char *k, data_t *v, void *adata) +{ + int idx = -1; + int *zfilled = adata; + // zfilled array contains `state` for all types xattrs. + // state : whether the gfid file of this file exists in + // corresponding xattr directory or not. + + idx = index_find_xattr_type(d, k, v); + if (idx == -1) + return 0; + zfilled[idx] = 0; + return 0; +} + +static int +_check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp) +{ + int *zfilled = tmp; + int idx = -1; + + idx = index_find_xattr_type(d, k, v); + if (idx == -1) + return 0; + + /* Along with checking that the value of a key is zero filled + * the key's corresponding index should be assigned + * appropriate value. + * zfilled[idx] will be 0(false) if value not zero. + * will be 1(true) if value is zero. + */ + if (mem_0filled((const char *)v->data, v->len)) { + zfilled[idx] = 0; + return 0; + } + + /* If zfilled[idx] was previously 0, it means at least + * one xattr of its "kind" is non-zero. Keep its value + * the same. + */ + if (zfilled[idx]) + zfilled[idx] = 1; + return 0; +} + +int +index_entry_create(xlator_t *this, inode_t *inode, char *filename) +{ + int ret = -1; + int op_errno = 0; + char pgfid_path[PATH_MAX] = {0}; + char entry_path[PATH_MAX] = {0}; + index_priv_t *priv = NULL; + index_inode_ctx_t *ctx = NULL; + int32_t len = 0; + + priv = this->private; + + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(inode->gfid), + out, op_errno, EINVAL); + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, filename, out, op_errno, EINVAL); + + ret = index_inode_ctx_get(inode, this, &ctx); + if (ret) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Not able to get inode ctx for %s", uuid_utoa(inode->gfid)); + goto out; + } + + make_gfid_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, inode->gfid, + pgfid_path, sizeof(pgfid_path)); + + if (ctx->state[ENTRY_CHANGES] != IN) { + ret = sys_mkdir(pgfid_path, 0600); + if (ret != 0 && errno != EEXIST) { + op_errno = errno; + goto out; + } + ctx->state[ENTRY_CHANGES] = IN; + } + + if (strchr(filename, '/')) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INDEX_ADD_FAILED, + "Got invalid entry (%s) for pargfid path (%s)", filename, + pgfid_path); + op_errno = EINVAL; + goto out; + } + + len = snprintf(entry_path, sizeof(entry_path), "%s/%s", pgfid_path, + filename); + if ((len < 0) || (len >= sizeof(entry_path))) { + op_errno = EINVAL; + goto out; + } + + op_errno = 0; + + ret = index_link_to_base(this, entry_path, ENTRY_CHANGES_SUBDIR); +out: + if (op_errno) + ret = -op_errno; + return ret; +} + +int +index_entry_delete(xlator_t *this, uuid_t pgfid, char *filename) +{ + int ret = 0; + int op_errno = 0; + char pgfid_path[PATH_MAX] = {0}; + char entry_path[PATH_MAX] = {0}; + index_priv_t *priv = NULL; + int32_t len = 0; + + priv = this->private; + + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(pgfid), out, + op_errno, EINVAL); + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, filename, out, op_errno, EINVAL); + + make_gfid_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, pgfid, + pgfid_path, sizeof(pgfid_path)); + + if (strchr(filename, '/')) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INDEX_DEL_FAILED, + "Got invalid entry (%s) for pargfid path (%s)", filename, + pgfid_path); + op_errno = EINVAL; + goto out; + } + + len = snprintf(entry_path, sizeof(entry_path), "%s/%s", pgfid_path, + filename); + if ((len < 0) || (len >= sizeof(entry_path))) { + op_errno = EINVAL; + goto out; + } + + ret = sys_unlink(entry_path); + if (ret && (errno != ENOENT)) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, INDEX_MSG_INDEX_DEL_FAILED, + "%s: failed to delete from index/entry-changes", entry_path); + } + +out: + return -op_errno; +} + +int +index_entry_action(xlator_t *this, inode_t *inode, dict_t *xdata, char *key) +{ + int ret = 0; + char *filename = NULL; + + ret = dict_get_str(xdata, key, &filename); + if (ret != 0) { + ret = 0; + goto out; + } + + if (strcmp(key, GF_XATTROP_ENTRY_IN_KEY) == 0) + ret = index_entry_create(this, inode, filename); + else if (strcmp(key, GF_XATTROP_ENTRY_OUT_KEY) == 0) + ret = index_entry_delete(this, inode->gfid, filename); + +out: + return ret; +} + +void +_index_action(xlator_t *this, inode_t *inode, int *zfilled) +{ + int ret = 0; + int i = 0; + index_inode_ctx_t *ctx = NULL; + char *subdir = NULL; + + ret = index_inode_ctx_get(inode, this, &ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Not able to get" + " inode context for %s.", + uuid_utoa(inode->gfid)); + goto out; + } + + for (i = 0; i < XATTROP_TYPE_END; i++) { + subdir = index_get_subdir_from_type(i); + if (zfilled[i] == 1) { + if (ctx->state[i] == NOTIN) + continue; + ret = index_del(this, inode->gfid, subdir, i); + if (!ret) + ctx->state[i] = NOTIN; + } else if (zfilled[i] == 0) { + if (ctx->state[i] == IN) + continue; + ret = index_add(this, inode->gfid, subdir, i); + if (!ret) + ctx->state[i] = IN; + } + } +out: + return; +} + +static void +index_init_state(xlator_t *this, inode_t *inode, index_inode_ctx_t *ctx, + char *subdir) +{ + int ret = -1; + char pgfid_path[PATH_MAX] = {0}; + struct stat st = {0}; + index_priv_t *priv = NULL; + + priv = this->private; + + make_gfid_path(priv->index_basepath, subdir, inode->gfid, pgfid_path, + sizeof(pgfid_path)); + + ret = sys_stat(pgfid_path, &st); + if (ret == 0) + ctx->state[ENTRY_CHANGES] = IN; + else if (ret != 0 && errno == ENOENT) + ctx->state[ENTRY_CHANGES] = NOTIN; + + return; +} + +void +xattrop_index_action(xlator_t *this, index_local_t *local, dict_t *xattr, + dict_match_t match, void *match_data) +{ + int ret = 0; + int zfilled[XATTROP_TYPE_END] = { + 0, + }; + int8_t value = 0; + char *subdir = NULL; + dict_t *req_xdata = NULL; + inode_t *inode = NULL; + index_inode_ctx_t *ctx = NULL; + + inode = local->inode; + req_xdata = local->xdata; + + memset(zfilled, -1, sizeof(zfilled)); + ret = dict_foreach_match(xattr, match, match_data, + _check_key_is_zero_filled, zfilled); + _index_action(this, inode, zfilled); + + if (req_xdata) { + ret = index_entry_action(this, inode, req_xdata, + GF_XATTROP_ENTRY_OUT_KEY); + + ret = dict_get_int8(req_xdata, GF_XATTROP_PURGE_INDEX, &value); + if ((ret) || (value == 0)) + goto out; + } + + if (zfilled[XATTROP] != 1) + goto out; + + if (inode->ia_type != IA_IFDIR) + goto out; + + subdir = index_get_subdir_from_type(ENTRY_CHANGES); + ret = index_inode_ctx_get(inode, this, &ctx); + if (ctx->state[ENTRY_CHANGES] == UNKNOWN) + index_init_state(this, inode, ctx, subdir); + if (ctx->state[ENTRY_CHANGES] == IN) { + ret = index_del(this, inode->gfid, subdir, ENTRY_CHANGES); + ctx->state[ENTRY_CHANGES] = NOTIN; + } + +out: + return; +} + +static gf_boolean_t +index_xattrop_track(xlator_t *this, gf_xattrop_flags_t flags, dict_t *dict) +{ + index_priv_t *priv = this->private; + + if (flags == GF_XATTROP_ADD_ARRAY) + return _gf_true; + + if (flags != GF_XATTROP_ADD_ARRAY64) + return _gf_false; + + if (!priv->pending_watchlist) + return _gf_false; + + if (dict_foreach_match(dict, is_xattr_in_watchlist, priv->pending_watchlist, + dict_null_foreach_fn, NULL) > 0) + return _gf_true; + + return _gf_false; +} + +int +index_inode_path(xlator_t *this, inode_t *inode, char *dirpath, size_t len) +{ + char *subdir = NULL; + int ret = 0; + index_priv_t *priv = NULL; + index_inode_ctx_t *ictx = NULL; + + priv = this->private; + if (!index_is_fop_on_internal_inode(this, inode, NULL)) { + ret = -EINVAL; + goto out; + } + + subdir = index_get_subdir_from_vgfid(priv, inode->gfid); + if (subdir) { + if (len <= strlen(priv->index_basepath) + 1 /*'/'*/ + strlen(subdir)) { + ret = -EINVAL; + goto out; + } + make_index_dir_path(priv->index_basepath, subdir, dirpath, len); + } else { + ret = index_inode_ctx_get(inode, this, &ictx); + if (ret) + goto out; + if (gf_uuid_is_null(ictx->virtual_pargfid)) { + ret = -EINVAL; + goto out; + } + make_index_dir_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, dirpath, + len); + if (len <= strlen(dirpath) + 1 /*'/'*/ + SLEN(UUID0_STR)) { + ret = -EINVAL; + goto out; + } + strcat(dirpath, "/"); + strcat(dirpath, uuid_utoa(ictx->virtual_pargfid)); + } +out: + return ret; +} + +int +__index_fd_ctx_get(fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) +{ + int ret = 0; + index_fd_ctx_t *fctx = NULL; + uint64_t tmpctx = 0; + char dirpath[PATH_MAX] = {0}; + + ret = __fd_ctx_get(fd, this, &tmpctx); + if (!ret) { + fctx = (index_fd_ctx_t *)(long)tmpctx; + *ctx = fctx; + goto out; + } + + ret = index_inode_path(this, fd->inode, dirpath, sizeof(dirpath)); + if (ret) + goto out; + + fctx = GF_CALLOC(1, sizeof(*fctx), gf_index_fd_ctx_t); + if (!fctx) { + ret = -ENOMEM; + goto out; + } + + fctx->dir = sys_opendir(dirpath); + if (!fctx->dir) { + ret = -errno; + GF_FREE(fctx); + fctx = NULL; + goto out; + } + fctx->dir_eof = -1; + + ret = __fd_ctx_set(fd, this, (uint64_t)(long)fctx); + if (ret) { + (void)sys_closedir(fctx->dir); + GF_FREE(fctx); + fctx = NULL; + ret = -EINVAL; + goto out; + } + *ctx = fctx; +out: + return ret; +} + +int +index_fd_ctx_get(fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) +{ + int ret = 0; + LOCK(&fd->lock); + { + ret = __index_fd_ctx_get(fd, this, ctx); + } + UNLOCK(&fd->lock); + return ret; +} + +// new - Not NULL means start a fop +// new - NULL means done processing the fop +void +index_queue_process(xlator_t *this, inode_t *inode, call_stub_t *new) +{ + call_stub_t *stub = NULL; + index_inode_ctx_t *ctx = NULL; + int ret = 0; + call_frame_t *frame = NULL; + + LOCK(&inode->lock); + { + ret = __index_inode_ctx_get(inode, this, &ctx); + if (ret) + goto unlock; + + if (new) { + __index_enqueue(&ctx->callstubs, new); + new = NULL; + } else { + ctx->processing = _gf_false; + } + + if (!ctx->processing) { + stub = __index_dequeue(&ctx->callstubs); + if (stub) + ctx->processing = _gf_true; + else + ctx->processing = _gf_false; + } + } +unlock: + UNLOCK(&inode->lock); + + if (ret && new) { + frame = new->frame; + if (new->fop == GF_FOP_XATTROP) { + INDEX_STACK_UNWIND(xattrop, frame, -1, ENOMEM, NULL, NULL); + } else if (new->fop == GF_FOP_FXATTROP) { + INDEX_STACK_UNWIND(fxattrop, frame, -1, ENOMEM, NULL, NULL); + } + call_stub_destroy(new); + } else if (stub) { + call_resume(stub); + } + return; +} + +static int +xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xattr, dict_t *xdata, dict_match_t match, + dict_t *matchdata) +{ + inode_t *inode = NULL; + index_local_t *local = NULL; + + local = frame->local; + inode = inode_ref(local->inode); + + if (op_ret < 0) + goto out; + + xattrop_index_action(this, local, xattr, match, matchdata); +out: + INDEX_STACK_UNWIND(xattrop, frame, op_ret, op_errno, xattr, xdata); + index_queue_process(this, inode, NULL); + inode_unref(inode); + + return 0; +} + +int32_t +index_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + index_priv_t *priv = this->private; + + xattrop_cbk(frame, cookie, this, op_ret, op_errno, xattr, xdata, + is_xattr_in_watchlist, priv->complete_watchlist); + return 0; +} + +int32_t +index_xattrop64_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + index_priv_t *priv = this->private; + + return xattrop_cbk(frame, cookie, this, op_ret, op_errno, xattr, xdata, + is_xattr_in_watchlist, priv->pending_watchlist); +} + +void +index_xattrop_do(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + int ret = -1; + int zfilled[XATTROP_TYPE_END] = { + 0, + }; + index_local_t *local = NULL; + fop_xattrop_cbk_t x_cbk = NULL; + + local = frame->local; + + if (optype == GF_XATTROP_ADD_ARRAY) + x_cbk = index_xattrop_cbk; + else + x_cbk = index_xattrop64_cbk; + + // In wind phase bring the gfid into index. This way if the brick crashes + // just after posix performs xattrop before _cbk reaches index xlator + // we will still have the gfid in index. + memset(zfilled, -1, sizeof(zfilled)); + + /* Foreach xattr, set corresponding index of zfilled to 1 + * zfilled[index] = 1 implies the xattr's value is zero filled + * and should be added in its corresponding subdir. + * + * zfilled should be set to 1 only for those index that + * exist in xattr variable. This is to distinguish + * between different types of volumes. + * For e.g., if the check is not made, + * zfilled[DIRTY] is set to 1 for EC volumes, + * index file will be tried to create in indices/dirty dir + * which doesn't exist for an EC volume. + */ + ret = dict_foreach(xattr, index_fill_zero_array, zfilled); + + _index_action(this, local->inode, zfilled); + if (xdata) + ret = index_entry_action(this, local->inode, xdata, + GF_XATTROP_ENTRY_IN_KEY); + if (ret < 0) { + x_cbk(frame, NULL, this, -1, -ret, NULL, NULL); + return; + } + + if (loc) + STACK_WIND(frame, x_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata); + else + STACK_WIND(frame, x_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata); +} + +int +index_xattrop_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + index_xattrop_do(frame, this, loc, NULL, optype, xattr, xdata); + return 0; +} + +int +index_fxattrop_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + index_xattrop_do(frame, this, NULL, fd, optype, xattr, xdata); + return 0; +} + +int32_t +index_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + call_stub_t *stub = NULL; + index_local_t *local = NULL; + + if (!index_xattrop_track(this, flags, dict)) + goto out; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->inode = inode_ref(loc->inode); + if (xdata) + local->xdata = dict_ref(xdata); + stub = fop_xattrop_stub(frame, index_xattrop_wrapper, loc, flags, dict, + xdata); + +err: + if ((!local) || (!stub)) { + INDEX_STACK_UNWIND(xattrop, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + index_queue_process(this, loc->inode, stub); + return 0; +out: + STACK_WIND(frame, default_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata); + return 0; +} + +int32_t +index_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + call_stub_t *stub = NULL; + index_local_t *local = NULL; + + if (!index_xattrop_track(this, flags, dict)) + goto out; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->inode = inode_ref(fd->inode); + if (xdata) + local->xdata = dict_ref(xdata); + stub = fop_fxattrop_stub(frame, index_fxattrop_wrapper, fd, flags, dict, + xdata); + +err: + if ((!local) || (!stub)) { + INDEX_STACK_UNWIND(fxattrop, frame, -1, ENOMEM, NULL, xdata); + return 0; + } + + index_queue_process(this, fd->inode, stub); + return 0; +out: + STACK_WIND(frame, default_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata); + return 0; +} + +uint64_t +index_entry_count(xlator_t *this, char *subdir) +{ + uint64_t count = 0; + index_priv_t *priv = NULL; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char index_dir[PATH_MAX] = { + 0, + }; + + priv = this->private; + + make_index_dir_path(priv->index_basepath, subdir, index_dir, + sizeof(index_dir)); + + dirp = sys_opendir(index_dir); + if (!dirp) + return 0; + + for (;;) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) + break; + + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + if (!strncmp(entry->d_name, subdir, strlen(subdir))) + continue; + + count++; + } + + (void)sys_closedir(dirp); + + return count; +} + +int32_t +index_getxattr_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + index_priv_t *priv = NULL; + dict_t *xattr = NULL; + int ret = 0; + int vgfid_type = 0; + uint64_t count = 0; + + priv = this->private; + + xattr = dict_new(); + if (!xattr) { + ret = -ENOMEM; + goto done; + } + + vgfid_type = index_get_type_from_vgfid_xattr(name); + if (vgfid_type >= 0) { + ret = dict_set_static_bin(xattr, (char *)name, + priv->internal_vgfid[vgfid_type], + sizeof(priv->internal_vgfid[vgfid_type])); + if (ret) { + ret = -EINVAL; + gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED, + "xattrop index " + "gfid set failed"); + goto done; + } + } + + /* TODO: Need to check what kind of link-counts are needed for + * ENTRY-CHANGES before refactor of this block with array*/ + if (strcmp(name, GF_XATTROP_INDEX_COUNT) == 0) { + count = index_entry_count(this, XATTROP_SUBDIR); + + ret = dict_set_uint64(xattr, (char *)name, count); + if (ret) { + ret = -EINVAL; + gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED, + "xattrop index " + "count set failed"); + goto done; + } + } else if (strcmp(name, GF_XATTROP_DIRTY_COUNT) == 0) { + count = index_entry_count(this, DIRTY_SUBDIR); + + ret = dict_set_uint64(xattr, (char *)name, count); + if (ret) { + ret = -EINVAL; + gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED, + "dirty index " + "count set failed"); + goto done; + } + } +done: + if (ret) + STACK_UNWIND_STRICT(getxattr, frame, -1, -ret, xattr, NULL); + else + STACK_UNWIND_STRICT(getxattr, frame, 0, 0, xattr, NULL); + + if (xattr) + dict_unref(xattr); + + return 0; +} + +static int +index_save_pargfid_for_entry_changes(xlator_t *this, loc_t *loc, char *path) +{ + index_priv_t *priv = NULL; + index_inode_ctx_t *ctx = NULL; + int ret = 0; + + priv = this->private; + if (!loc) + return -1; + if (gf_uuid_compare(loc->pargfid, priv->internal_vgfid[ENTRY_CHANGES])) + return 0; + + ret = index_inode_ctx_get(loc->inode, this, &ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Unable to get inode context for %s", path); + return -EINVAL; + } + ret = gf_uuid_parse(loc->name, ctx->virtual_pargfid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Unable to store " + "virtual gfid in inode context for %s", + path); + return -EINVAL; + } + return 0; +} + +int32_t +index_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) +{ + index_priv_t *priv = NULL; + struct stat lstatbuf = {0}; + int ret = 0; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + uint64_t val = IA_INVAL; + char path[PATH_MAX] = {0}; + struct iatt stbuf = { + 0, + }; + struct iatt postparent = { + 0, + }; + dict_t *xattr = NULL; + gf_boolean_t is_dir = _gf_false; + char *subdir = NULL; + loc_t iloc = {0}; + + priv = this->private; + loc_copy(&iloc, loc); + + VALIDATE_OR_GOTO(loc, done); + if (index_is_fop_on_internal_inode(this, loc->parent, loc->pargfid)) { + subdir = index_get_subdir_from_vgfid(priv, loc->pargfid); + ret = index_inode_path(this, loc->parent, path, sizeof(path)); + if (ret < 0) { + op_errno = -ret; + goto done; + } + ret = snprintf(path + strlen(path), PATH_MAX - strlen(path), "/%s", + loc->name); + + if ((ret < 0) || (ret > (PATH_MAX - strlen(path)))) { + op_errno = EINVAL; + op_ret = -1; + goto done; + } + + } else if (index_is_virtual_gfid(priv, loc->gfid)) { + subdir = index_get_subdir_from_vgfid(priv, loc->gfid); + make_index_dir_path(priv->index_basepath, subdir, path, sizeof(path)); + is_dir = _gf_true; + + if ((xattr_req) && (dict_get(xattr_req, GF_INDEX_IA_TYPE_GET_REQ))) { + if (0 == strcmp(subdir, index_get_subdir_from_type(ENTRY_CHANGES))) + val = IA_IFDIR; + else + val = IA_IFREG; + } + } else { + if (!inode_is_linked(loc->inode)) { + inode_unref(iloc.inode); + iloc.inode = inode_find(loc->inode->table, loc->gfid); + } + ret = index_inode_path(this, iloc.inode, path, sizeof(path)); + if (ret < 0) { + op_errno = -ret; + goto done; + } + } + ret = sys_lstat(path, &lstatbuf); + if (ret) { + gf_msg_debug(this->name, errno, "Stat failed on %s dir ", path); + op_errno = errno; + goto done; + } else if (!S_ISDIR(lstatbuf.st_mode) && is_dir) { + op_errno = ENOTDIR; + gf_msg_debug(this->name, op_errno, + "Stat failed on %s dir, " + "not a directory", + path); + goto done; + } + xattr = dict_new(); + if (!xattr) { + op_errno = ENOMEM; + goto done; + } + + if (val != IA_INVAL) { + ret = dict_set_uint64(xattr, GF_INDEX_IA_TYPE_GET_RSP, val); + if (ret) { + op_ret = -1; + op_errno = -ret; + goto done; + } + } + + iatt_from_stat(&stbuf, &lstatbuf); + if (is_dir || inode_is_linked(iloc.inode)) + loc_gfid(&iloc, stbuf.ia_gfid); + else + gf_uuid_generate(stbuf.ia_gfid); + + ret = index_save_pargfid_for_entry_changes(this, &iloc, path); + if (ret) { + op_ret = -1; + op_errno = -ret; + goto done; + } + + stbuf.ia_ino = -1; + op_ret = 0; +done: + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, + loc ? loc->inode : NULL, &stbuf, xattr, &postparent); + if (xattr) + dict_unref(xattr); + loc_wipe(&iloc); + return 0; +} + +int +index_get_gfid_type(void *opaque) +{ + gf_dirent_t *entry = NULL; + xlator_t *this = THIS; + struct index_syncop_args *args = opaque; + loc_t loc = {0}; + struct iatt iatt = {0}; + int ret = 0; + + list_for_each_entry(entry, &args->entries->list, list) + { + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + loc_wipe(&loc); + + entry->d_type = gf_d_type_from_ia_type(IA_INVAL); + entry->d_stat.ia_type = IA_INVAL; + if (gf_uuid_parse(entry->d_name, loc.gfid)) + continue; + + loc.inode = inode_find(args->parent->table, loc.gfid); + if (loc.inode) { + entry->d_stat.ia_type = loc.inode->ia_type; + entry->d_type = gf_d_type_from_ia_type(loc.inode->ia_type); + continue; + } + loc.inode = inode_new(args->parent->table); + if (!loc.inode) + continue; + ret = syncop_lookup(FIRST_CHILD(this), &loc, &iatt, 0, 0, 0); + if (ret == 0) { + entry->d_type = gf_d_type_from_ia_type(iatt.ia_type); + entry->d_stat = iatt; + } + } + loc_wipe(&loc); + + return 0; +} + +int32_t +index_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t off, dict_t *xdata) +{ + index_fd_ctx_t *fctx = NULL; + index_priv_t *priv = NULL; + DIR *dir = NULL; + int ret = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int count = 0; + gf_dirent_t entries; + struct index_syncop_args args = {0}; + + priv = this->private; + INIT_LIST_HEAD(&entries.list); + + ret = index_fd_ctx_get(fd, this, &fctx); + if (ret < 0) { + op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, op_errno, INDEX_MSG_FD_OP_FAILED, + "pfd is NULL, fd=%p", fd); + goto done; + } + + dir = fctx->dir; + if (!dir) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, op_errno, + INDEX_MSG_INDEX_READDIR_FAILED, "dir is NULL for fd=%p", fd); + goto done; + } + + count = index_fill_readdir(fd, fctx, dir, off, size, &entries); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + if (index_is_virtual_gfid(priv, fd->inode->gfid) && xdata && + dict_get(xdata, "get-gfid-type")) { + args.parent = fd->inode; + args.entries = &entries; + ret = synctask_new(this->ctx->env, index_get_gfid_type, NULL, NULL, + &args); + } +done: + STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL); + gf_dirent_free(&entries); + return 0; +} + +int +deletion_handler(const char *fpath, const struct stat *sb, int typeflag, + struct FTW *ftwbuf) +{ + ia_type_t type = IA_INVAL; + + switch (sb->st_mode & S_IFMT) { + case S_IFREG: + sys_unlink(fpath); + break; + + case S_IFDIR: + sys_rmdir(fpath); + break; + default: + type = ia_type_from_st_mode(sb->st_mode); + gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, INDEX_MSG_INVALID_ARGS, + "%s neither a regular file nor a directory - type:%s", fpath, + gf_inode_type_to_str(type)); + break; + } + return 0; +} + +static int +index_wipe_index_subdir(void *opaque) +{ + struct index_syncop_args *args = opaque; + + nftw(args->path, deletion_handler, 1, FTW_DEPTH | FTW_PHYS); + return 0; +} + +static void +index_get_parent_iatt(struct iatt *parent, char *path, loc_t *loc, + int32_t *op_ret, int32_t *op_errno) +{ + int ret = -1; + struct stat lstatbuf = { + 0, + }; + + ret = sys_lstat(path, &lstatbuf); + if (ret < 0) { + *op_ret = -1; + *op_errno = errno; + return; + } + + iatt_from_stat(parent, &lstatbuf); + gf_uuid_copy(parent->ia_gfid, loc->pargfid); + parent->ia_ino = -1; + + return; +} + +int +index_rmdir_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + int ret = 0; + int32_t op_ret = 0; + int32_t op_errno = 0; + char *subdir = NULL; + char index_dir[PATH_MAX] = {0}; + char index_subdir[PATH_MAX] = {0}; + uuid_t gfid = {0}; + struct iatt preparent = {0}; + struct iatt postparent = {0}; + index_priv_t *priv = NULL; + index_xattrop_type_t type = XATTROP_TYPE_UNSET; + struct index_syncop_args args = { + 0, + }; + + priv = this->private; + + type = index_get_type_from_vgfid(priv, loc->pargfid); + subdir = index_get_subdir_from_vgfid(priv, loc->pargfid); + make_index_dir_path(priv->index_basepath, subdir, index_dir, + sizeof(index_dir)); + + index_get_parent_iatt(&preparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; + + gf_uuid_parse(loc->name, gfid); + make_gfid_path(priv->index_basepath, subdir, gfid, index_subdir, + sizeof(index_subdir)); + + if (flag == 0) { + ret = index_del(this, gfid, subdir, type); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto done; + } + } else { + args.path = index_subdir; + ret = synctask_new(this->ctx->env, index_wipe_index_subdir, NULL, NULL, + &args); + } + + index_get_parent_iatt(&postparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; + +done: + INDEX_STACK_UNWIND(rmdir, frame, op_ret, op_errno, &preparent, &postparent, + xdata); + return 0; +} + +int +index_unlink_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + index_priv_t *priv = NULL; + index_inode_ctx_t *ictx = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int ret = 0; + index_xattrop_type_t type = XATTROP_TYPE_UNSET; + struct iatt preparent = {0}; + struct iatt postparent = {0}; + char index_dir[PATH_MAX] = {0}; + char filepath[PATH_MAX] = {0}; + uuid_t gfid = {0}; + char *subdir = NULL; + + priv = this->private; + type = index_get_type_from_vgfid(priv, loc->pargfid); + ret = index_inode_path(this, loc->parent, index_dir, sizeof(index_dir)); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto done; + } + + index_get_parent_iatt(&preparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; + + if (type <= XATTROP_TYPE_UNSET) { + ret = index_inode_ctx_get(loc->parent, this, &ictx); + if ((ret == 0) && gf_uuid_is_null(ictx->virtual_pargfid)) { + ret = -EINVAL; + } + if (ret == 0) { + ret = index_entry_delete(this, ictx->virtual_pargfid, + (char *)loc->name); + } + } else if (type == ENTRY_CHANGES) { + make_file_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, + (char *)loc->name, filepath, sizeof(filepath)); + ret = sys_unlink(filepath); + } else { + subdir = index_get_subdir_from_type(type); + gf_uuid_parse(loc->name, gfid); + ret = index_del(this, gfid, subdir, type); + } + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto done; + } + + index_get_parent_iatt(&postparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; +done: + INDEX_STACK_UNWIND(unlink, frame, op_ret, op_errno, &preparent, &postparent, + xdata); + return 0; +} + +int32_t +index_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (!name || + (!index_is_vgfid_xattr(name) && strcmp(GF_XATTROP_INDEX_COUNT, name) && + strcmp(GF_XATTROP_DIRTY_COUNT, name))) + goto out; + + stub = fop_getxattr_stub(frame, index_getxattr_wrapper, loc, name, xdata); + if (!stub) { + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; +out: + STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; +} + +int64_t +index_fetch_link_count(xlator_t *this, index_xattrop_type_t type) +{ + index_priv_t *priv = this->private; + char *subdir = NULL; + struct stat lstatbuf = { + 0, + }; + int ret = -1; + int64_t count = -1; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char index_dir[PATH_MAX] = { + 0, + }; + char index_path[PATH_MAX] = { + 0, + }; + + subdir = index_get_subdir_from_type(type); + make_index_dir_path(priv->index_basepath, subdir, index_dir, + sizeof(index_dir)); + + dirp = sys_opendir(index_dir); + if (!dirp) + goto out; + + for (;;) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) { + if (count == -1) + count = 0; + goto out; + } + + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + make_file_path(priv->index_basepath, subdir, entry->d_name, index_path, + sizeof(index_path)); + + ret = sys_lstat(index_path, &lstatbuf); + if (ret < 0) { + count = -2; + continue; + } else { + count = lstatbuf.st_nlink - 1; + if (count == 0) + continue; + else + break; + } + } +out: + if (dirp) + (void)sys_closedir(dirp); + return count; +} + +dict_t * +index_fill_link_count(xlator_t *this, dict_t *xdata) +{ + int ret = -1; + index_priv_t *priv = NULL; + int64_t count = -1; + + priv = this->private; + xdata = (xdata) ? dict_ref(xdata) : dict_new(); + if (!xdata) + goto out; + + index_get_link_count(priv, &count, XATTROP); + if (count < 0) { + count = index_fetch_link_count(this, XATTROP); + index_set_link_count(priv, count, XATTROP); + } + + if (count == 0) { + ret = dict_set_int8(xdata, "link-count", 0); + if (ret < 0) + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_DICT_SET_FAILED, + "Unable to set link-count"); + } else { + ret = dict_set_int8(xdata, "link-count", 1); + if (ret < 0) + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_DICT_SET_FAILED, + "Unable to set link-count"); + } + +out: + return xdata; +} + +int32_t +index_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + xdata = index_fill_link_count(this, xdata); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (xdata) + dict_unref(xdata); + return 0; +} + +int32_t +index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + inode_t *inode = NULL; + call_stub_t *stub = NULL; + char *flag = NULL; + int ret = -1; + + if (!index_is_fop_on_internal_inode(this, loc->parent, loc->pargfid) && + !index_is_fop_on_internal_inode(this, loc->inode, loc->gfid)) { + if (!inode_is_linked(loc->inode)) { + inode = inode_find(loc->inode->table, loc->gfid); + if (!index_is_fop_on_internal_inode(this, inode, loc->gfid)) { + inode_unref(inode); + goto normal; + } + inode_unref(inode); + } else { + goto normal; + } + } + + stub = fop_lookup_stub(frame, index_lookup_wrapper, loc, xattr_req); + if (!stub) { + STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, loc->inode, NULL, NULL, + NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; +normal: + ret = dict_get_str_sizen(xattr_req, "link-count", &flag); + if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { + STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + } else { + STACK_WIND(frame, default_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + } + + return 0; +} + +int32_t +index_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + xdata = index_fill_link_count(this, xdata); + STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata); + if (xdata) + dict_unref(xdata); + return 0; +} + +int32_t +index_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int ret = -1; + char *flag = NULL; + + ret = dict_get_str(xdata, "link-count", &flag); + if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { + STACK_WIND(frame, index_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + } else { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + } + + return 0; +} + +int32_t +index_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + if (!index_is_fop_on_internal_inode(this, fd->inode, NULL)) + goto normal; + + frame->local = NULL; + STACK_UNWIND_STRICT(opendir, frame, 0, 0, fd, NULL); + return 0; + +normal: + STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +} + +int32_t +index_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (!index_is_fop_on_internal_inode(this, fd->inode, NULL)) + goto out; + + stub = fop_readdir_stub(frame, index_readdir_wrapper, fd, size, off, xdata); + if (!stub) { + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; +out: + STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata); + return 0; +} + +int +index_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (!index_is_fop_on_internal_inode(this, loc->parent, NULL)) + goto out; + + stub = fop_unlink_stub(frame, index_unlink_wrapper, loc, xflag, xdata); + if (!stub) { + STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; +out: + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int +index_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (!index_is_fop_on_internal_inode(this, loc->parent, NULL)) + goto out; + + stub = fop_rmdir_stub(frame, index_rmdir_wrapper, loc, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; +out: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, + loc, flags, xdata); + return 0; +} + +int +index_make_xattrop_watchlist(xlator_t *this, index_priv_t *priv, + char *watchlist, index_xattrop_type_t type) +{ + char *delim = NULL; + char *dup_watchlist = NULL; + char *key = NULL; + char *saveptr = NULL; + dict_t *xattrs = NULL; + data_t *dummy = NULL; + int ret = 0; + + if (!watchlist) + return 0; + + dup_watchlist = gf_strdup(watchlist); + if (!dup_watchlist) + return -1; + + xattrs = dict_new(); + if (!xattrs) { + ret = -1; + goto out; + } + + dummy = int_to_data(1); + if (!dummy) { + ret = -1; + goto out; + } + + data_ref(dummy); + + delim = ","; + key = strtok_r(dup_watchlist, delim, &saveptr); + while (key) { + if (strlen(key) == 0) { + ret = -1; + goto out; + } + + ret = dict_set(xattrs, key, dummy); + if (ret) + goto out; + + key = strtok_r(NULL, delim, &saveptr); + } + + switch (type) { + case DIRTY: + priv->dirty_watchlist = dict_copy_with_ref(xattrs, + priv->dirty_watchlist); + if (!priv->dirty_watchlist) { + ret = -1; + goto out; + } + break; + case XATTROP: + priv->pending_watchlist = dict_copy_with_ref( + xattrs, priv->pending_watchlist); + if (!priv->pending_watchlist) { + ret = -1; + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + if (xattrs) + dict_unref(xattrs); + + GF_FREE(dup_watchlist); + + if (dummy) + data_unref(dummy); + + return ret; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init(this, gf_index_mt_end + 1); + + return ret; +} + +int +init(xlator_t *this) +{ + int i = 0; + int ret = -1; + int64_t count = -1; + index_priv_t *priv = NULL; + pthread_attr_t w_attr; + gf_boolean_t mutex_inited = _gf_false; + gf_boolean_t cond_inited = _gf_false; + gf_boolean_t attr_inited = _gf_false; + char *watchlist = NULL; + char *dirtylist = NULL; + char *pendinglist = NULL; + char *index_base_parent = NULL; + char *tmp = NULL; + + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INVALID_GRAPH, + "'index' not configured with exactly one child"); + goto out; + } + + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, INDEX_MSG_INVALID_GRAPH, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_index_mt_priv_t); + if (!priv) + goto out; + + LOCK_INIT(&priv->lock); + if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS, + "pthread_cond_init failed"); + goto out; + } + cond_inited = _gf_true; + + if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS, + "pthread_mutex_init failed"); + goto out; + } + mutex_inited = _gf_true; + + if ((ret = pthread_attr_init(&w_attr)) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS, + "pthread_attr_init failed"); + goto out; + } + attr_inited = _gf_true; + + ret = pthread_attr_setstacksize(&w_attr, INDEX_THREAD_STACK_SIZE); + if (ret == EINVAL) { + gf_msg(this->name, GF_LOG_WARNING, ret, INDEX_MSG_INVALID_ARGS, + "Using default thread stack size"); + } + + GF_OPTION_INIT("index-base", priv->index_basepath, path, out); + tmp = gf_strdup(priv->index_basepath); + index_base_parent = dirname(tmp); + if (gf_lstat_dir(index_base_parent, NULL) != 0) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_DIR_CREATE_FAILED, + "Failed to find parent dir (%s) of index basepath %s.", + index_base_parent, priv->index_basepath); + goto out; + } + + GF_OPTION_INIT("xattrop64-watchlist", watchlist, str, out); + ret = index_make_xattrop_watchlist(this, priv, watchlist, XATTROP); + if (ret) + goto out; + + GF_OPTION_INIT("xattrop-dirty-watchlist", dirtylist, str, out); + ret = index_make_xattrop_watchlist(this, priv, dirtylist, DIRTY); + if (ret) + goto out; + + GF_OPTION_INIT("xattrop-pending-watchlist", pendinglist, str, out); + ret = index_make_xattrop_watchlist(this, priv, pendinglist, XATTROP); + if (ret) + goto out; + + if (priv->dirty_watchlist) + priv->complete_watchlist = dict_copy_with_ref(priv->dirty_watchlist, + priv->complete_watchlist); + if (priv->pending_watchlist) + priv->complete_watchlist = dict_copy_with_ref(priv->pending_watchlist, + priv->complete_watchlist); + + gf_uuid_generate(priv->index); + for (i = 0; i < XATTROP_TYPE_END; i++) + gf_uuid_generate(priv->internal_vgfid[i]); + + INIT_LIST_HEAD(&priv->callstubs); + GF_ATOMIC_INIT(priv->stub_cnt, 0); + + this->local_pool = mem_pool_new(index_local_t, 64); + if (!this->local_pool) { + ret = -1; + goto out; + } + + this->private = priv; + + ret = index_dir_create(this, XATTROP_SUBDIR); + if (ret < 0) + goto out; + + if (priv->dirty_watchlist) { + ret = index_dir_create(this, DIRTY_SUBDIR); + if (ret < 0) + goto out; + } + + ret = index_dir_create(this, ENTRY_CHANGES_SUBDIR); + if (ret < 0) + goto out; + + /*init indices files counts*/ + count = index_fetch_link_count(this, XATTROP); + index_set_link_count(priv, count, XATTROP); + priv->down = _gf_false; + + priv->curr_count = 0; + ret = gf_thread_create(&priv->thread, &w_attr, index_worker, this, + "idxwrker"); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ret, + INDEX_MSG_WORKER_THREAD_CREATE_FAILED, + "Failed to create worker thread, aborting"); + goto out; + } + priv->curr_count++; + ret = 0; +out: + GF_FREE(tmp); + + if (ret) { + if (cond_inited) + pthread_cond_destroy(&priv->cond); + if (mutex_inited) + pthread_mutex_destroy(&priv->mutex); + if (priv && priv->dirty_watchlist) + dict_unref(priv->dirty_watchlist); + if (priv && priv->pending_watchlist) + dict_unref(priv->pending_watchlist); + if (priv && priv->complete_watchlist) + dict_unref(priv->complete_watchlist); + if (priv) + GF_FREE(priv); + this->private = NULL; + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + + if (attr_inited) + pthread_attr_destroy(&w_attr); + return ret; +} + +void +fini(xlator_t *this) +{ + index_priv_t *priv = NULL; + + priv = this->private; + if (!priv) + goto out; + + priv->down = _gf_true; + pthread_cond_broadcast(&priv->cond); + if (priv->thread) { + gf_thread_cleanup_xint(priv->thread); + priv->thread = 0; + } + this->private = NULL; + LOCK_DESTROY(&priv->lock); + pthread_cond_destroy(&priv->cond); + pthread_mutex_destroy(&priv->mutex); + if (priv->dirty_watchlist) + dict_unref(priv->dirty_watchlist); + if (priv->pending_watchlist) + dict_unref(priv->pending_watchlist); + if (priv->complete_watchlist) + dict_unref(priv->complete_watchlist); + GF_FREE(priv); + + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } +out: + return; +} + +int +index_forget(xlator_t *this, inode_t *inode) +{ + uint64_t tmp_cache = 0; + if (!inode_ctx_del(inode, this, &tmp_cache)) + GF_FREE((index_inode_ctx_t *)(long)tmp_cache); + + return 0; +} + +int32_t +index_releasedir(xlator_t *this, fd_t *fd) +{ + index_fd_ctx_t *fctx = NULL; + uint64_t ctx = 0; + int ret = 0; + + ret = fd_ctx_del(fd, this, &ctx); + if (ret < 0) + goto out; + + fctx = (index_fd_ctx_t *)(long)ctx; + if (fctx->dir) { + ret = sys_closedir(fctx->dir); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_FD_OP_FAILED, + "closedir error"); + } + + GF_FREE(fctx); +out: + return 0; +} + +int32_t +index_release(xlator_t *this, fd_t *fd) +{ + index_fd_ctx_t *fctx = NULL; + uint64_t ctx = 0; + int ret = 0; + + ret = fd_ctx_del(fd, this, &ctx); + if (ret < 0) + goto out; + + fctx = (index_fd_ctx_t *)(long)ctx; + GF_FREE(fctx); +out: + return 0; +} + +int +notify(xlator_t *this, int event, void *data, ...) +{ + int ret = 0; + index_priv_t *priv = NULL; + uint64_t stub_cnt = 0; + xlator_t *victim = data; + struct timespec sleep_till = { + 0, + }; + + if (!this) + return 0; + + priv = this->private; + if (!priv) + return 0; + + if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) { + stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + + /* Wait for draining stub from queue before notify PARENT_DOWN */ + pthread_mutex_lock(&priv->mutex); + { + while (stub_cnt) { + (void)pthread_cond_timedwait(&priv->cond, &priv->mutex, + &sleep_till); + stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); + } + } + pthread_mutex_unlock(&priv->mutex); + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); + } + + if ((event == GF_EVENT_CHILD_DOWN) && victim->cleanup_starting) { + pthread_mutex_lock(&priv->mutex); + { + priv->down = _gf_true; + pthread_cond_broadcast(&priv->cond); + while (priv->curr_count) + pthread_cond_wait(&priv->cond, &priv->mutex); + } + pthread_mutex_unlock(&priv->mutex); + + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name); + } + + ret = default_notify(this, event, data); + return ret; +} + +struct xlator_fops fops = { + .xattrop = index_xattrop, + .fxattrop = index_fxattrop, + + // interface functions follow + .getxattr = index_getxattr, + .lookup = index_lookup, + .opendir = index_opendir, + .readdir = index_readdir, + .unlink = index_unlink, + .rmdir = index_rmdir, + .fstat = index_fstat, +}; + +struct xlator_dumpops dumpops; + +struct xlator_cbks cbks = {.forget = index_forget, + .release = index_release, + .releasedir = index_releasedir}; + +struct volume_options options[] = { + {.key = {"index-base"}, + .type = GF_OPTION_TYPE_PATH, + .description = "path where the index files need to be stored", + .default_value = "{{ brick.path }}/.glusterfs/indices"}, + {.key = {"xattrop64-watchlist"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma separated list of xattrs that are watched", + .default_value = "trusted.ec.dirty"}, + {.key = {"xattrop-dirty-watchlist"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma separated list of xattrs that are watched", + .default_value = "trusted.afr.dirty"}, + {.key = {"xattrop-pending-watchlist"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma separated list of xattrs that are watched", + .default_value = "trusted.afr.{{ volume.name }}"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "index", + .category = GF_MAINTAINED, +}; diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h new file mode 100644 index 00000000000..a2b6e6e2570 --- /dev/null +++ b/xlators/features/index/src/index.h @@ -0,0 +1,86 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __INDEX_H__ +#define __INDEX_H__ + +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/common-utils.h> +#include "index-mem-types.h" + +#define INDEX_THREAD_STACK_SIZE ((size_t)(1024 * 1024)) + +typedef enum { UNKNOWN, IN, NOTIN } index_state_t; + +typedef enum { + XATTROP_TYPE_UNSET = -1, + XATTROP, + DIRTY, + ENTRY_CHANGES, + XATTROP_TYPE_END +} index_xattrop_type_t; + +typedef struct index_inode_ctx { + gf_boolean_t processing; + struct list_head callstubs; + int state[XATTROP_TYPE_END]; + uuid_t virtual_pargfid; /* virtual gfid of dir under + .glusterfs/indices/entry-changes. */ +} index_inode_ctx_t; + +typedef struct index_fd_ctx { + DIR *dir; + off_t dir_eof; +} index_fd_ctx_t; + +typedef struct index_priv { + char *index_basepath; + char *dirty_basepath; + uuid_t index; + gf_lock_t lock; + uuid_t internal_vgfid[XATTROP_TYPE_END]; + struct list_head callstubs; + pthread_mutex_t mutex; + pthread_cond_t cond; + dict_t *dirty_watchlist; + dict_t *pending_watchlist; + dict_t *complete_watchlist; + int64_t pending_count; + pthread_t thread; + gf_boolean_t down; + gf_atomic_t stub_cnt; + int32_t curr_count; +} index_priv_t; + +typedef struct index_local { + inode_t *inode; + dict_t *xdata; +} index_local_t; + +#define INDEX_STACK_UNWIND(fop, frame, params...) \ + do { \ + index_local_t *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (__local) { \ + inode_unref(__local->inode); \ + if (__local->xdata) \ + dict_unref(__local->xdata); \ + mem_put(__local); \ + } \ + } while (0) + +#endif |
