summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht
diff options
context:
space:
mode:
authorVikas Gorur <vikas@zresearch.com>2009-02-18 17:36:07 +0530
committerVikas Gorur <vikas@zresearch.com>2009-02-18 17:36:07 +0530
commit77adf4cd648dce41f89469dd185deec6b6b53a0b (patch)
tree02e155a5753b398ee572b45793f889b538efab6b /xlators/cluster/dht
parentf3b2e6580e5663292ee113c741343c8a43ee133f (diff)
Added all files
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r--xlators/cluster/dht/Makefile.am1
-rw-r--r--xlators/cluster/dht/src/Makefile.am30
-rw-r--r--xlators/cluster/dht/src/dht-common.c3470
-rw-r--r--xlators/cluster/dht/src/dht-common.h212
-rw-r--r--xlators/cluster/dht/src/dht-hashfn-tea.c146
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c88
-rw-r--r--xlators/cluster/dht/src/dht-helper.c326
-rw-r--r--xlators/cluster/dht/src/dht-layout.c543
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c224
-rw-r--r--xlators/cluster/dht/src/dht-rename.c562
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c460
-rw-r--r--xlators/cluster/dht/src/dht.c222
-rw-r--r--xlators/cluster/dht/src/nufa.c684
13 files changed, 6968 insertions, 0 deletions
diff --git a/xlators/cluster/dht/Makefile.am b/xlators/cluster/dht/Makefile.am
new file mode 100644
index 000000000..f963effea
--- /dev/null
+++ b/xlators/cluster/dht/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src \ No newline at end of file
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
new file mode 100644
index 000000000..b7d07d137
--- /dev/null
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -0,0 +1,30 @@
+
+xlator_LTLIBRARIES = dht.la nufa.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+
+dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-hashfn-tea.c
+
+dht_la_SOURCES = $(dht_common_source) dht.c
+
+nufa_la_SOURCES = $(dht_common_source) nufa.c
+
+dht_la_LDFLAGS = -module -avoidversion
+dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = dht-common.h dht-common.c
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/distribute.so
+
+install-data-hook:
+ ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so \ No newline at end of file
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
new file mode 100644
index 000000000..5e4979e31
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -0,0 +1,3470 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "defaults.h"
+
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+
+int
+dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ ret = op_ret;
+
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+
+ if (ret == 0)
+ local->selfheal.layout = NULL;
+
+ if (local->st_ino) {
+ local->stbuf.st_ino = local->st_ino;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hashed subvolume for %s",
+ local->loc.path);
+ }
+ }
+
+ DHT_STACK_UNWIND (frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr);
+
+ return 0;
+}
+
+
+int
+dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+ int is_dir = 0;
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+ layout = local->layout;
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ /* TODO: assert equal hash type in xattr, local->xattr */
+
+ /* TODO: always ensure same subvolume is in layout->list[0] */
+
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+
+ if (op_ret == -1) {
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (!is_dir)
+ goto unlock;
+
+ local->op_ret = 0;
+ if (local->xattr == NULL)
+ local->xattr = dict_ref (xattr);
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ if (prev->this == local->hashed_subvol)
+ local->st_ino = local->stbuf.st_ino;
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == 0) {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+
+ local->layout = NULL;
+
+ if (ret != 0) {
+ layout->gen = conf->gen;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "fixing assignment on %s",
+ local->loc.path);
+ goto selfheal;
+ }
+
+ inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+
+ if (local->st_ino) {
+ local->stbuf.st_ino = local->st_ino;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hashed subvolume for %s",
+ local->loc.path);
+ }
+ }
+
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr);
+ }
+
+ return 0;
+
+selfheal:
+ ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+
+ return 0;
+}
+
+int
+dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ if (op_errno != ENOTCONN && op_errno != ENOENT) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ }
+
+ goto unlock;
+ }
+
+ if (S_IFMT & (stbuf->st_mode ^ local->inode->st_mode)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mismatching filetypes 0%o v/s 0%o for %s",
+ (stbuf->st_mode & S_IFMT),
+ (local->inode->st_mode & S_IFMT),
+ local->loc.path);
+
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+
+ goto unlock;
+ }
+
+ layout = dht_layout_get (this, inode);
+
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+
+ if (is_linkfile) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "linkfile found in revalidate for %s",
+ local->loc.path);
+ local->layout_mismatch = 1;
+
+ goto unlock;
+ }
+
+ if (is_dir) {
+ ret = dht_layout_dir_mismatch (this, layout,
+ prev->this, &local->loc,
+ xattr);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mismatching layouts for %s",
+ local->loc.path);
+
+ local->layout_mismatch = 1;
+
+ goto unlock;
+ }
+ }
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->op_ret = 0;
+ local->stbuf.st_ino = local->st_ino;
+
+ if (!local->xattr)
+ local->xattr = dict_ref (xattr);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ if (!S_ISDIR (local->stbuf.st_mode)
+ && (local->hashed_subvol != local->cached_subvol)
+ && (local->stbuf.st_nlink == 1))
+ local->stbuf.st_mode |= S_ISVTX;
+
+ if (local->layout_mismatch) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
+
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr);
+ }
+
+ return 0;
+}
+
+
+int
+dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *cached_subvol = NULL;
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+
+ layout = dht_layout_for_subvol (this, local->cached_subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ cached_subvol ? cached_subvol->name : "<nil>");
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+ local->op_ret = 0;
+ if (local->stbuf.st_nlink == 1)
+ local->stbuf.st_mode |= S_ISVTX;
+
+unwind:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr);
+ return 0;
+}
+
+
+int
+dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+
+ conf = this->private;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ prev = cookie;
+ subvol = prev->this;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, buf, xattr);
+ is_dir = check_is_dir (inode, buf, xattr);
+
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol (this, inode, buf,
+ xattr);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found on %s linkfile %s (-> %s)",
+ subvol->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found on %s file %s",
+ subvol->name, loc->path);
+ }
+
+ if (!local->cached_subvol) {
+ /* found one file */
+ dht_stat_merge (this, &local->stbuf, buf, subvol);
+ local->xattr = dict_ref (xattr);
+ local->cached_subvol = subvol;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "multiple subvolumes (%s and %s atleast) have "
+ "file %s", local->cached_subvol->name,
+ subvol->name, local->loc.path);
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (is_linkfile) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "deleting stale linkfile %s on %s",
+ loc->path, subvol->name);
+ dht_linkfile_unlink (frame, this, subvol, loc);
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ if (!cached_subvol) {
+ DHT_STACK_UNWIND (frame, -1, ENOENT, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "linking file %s existing on %s to %s (hash)",
+ loc->path, cached_subvol->name, hashed_subvol->name);
+
+ dht_linkfile_create (frame, dht_lookup_linkfile_create_cbk,
+ cached_subvol, hashed_subvol, loc);
+ }
+
+ return 0;
+}
+
+
+int
+dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ if (!local->inode)
+ local->inode = inode_ref (loc->inode);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_everywhere_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+}
+
+
+int
+dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+
+ prev = cookie;
+ subvol = prev->this;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "lookup of %s on %s (following linkfile) failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ /* TODO: assert type is non-dir and non-linkfile */
+
+ if (stbuf->st_nlink == 1)
+ stbuf->st_mode |= S_ISVTX;
+ dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ inode_ctx_put (inode, this, (uint64_t)(long)layout);
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+
+ return 0;
+}
+
+
+int
+dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_layout_t *layout = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ call_frame_t *prev = NULL;
+ int call_cnt = 0;
+
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == 0) {
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (is_dir) {
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+ }
+ }
+
+ if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ dht_itransform (this, prev->this, stbuf->st_ino,
+ &stbuf->st_ino);
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ goto out;
+ }
+
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ return 0;
+}
+
+
+int
+dht_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "copying location failed for path=%s",
+ loc->path);
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+
+ local->cached_subvol = cached_subvol;
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate (loc)) {
+ layout = dht_layout_get (this, loc->inode);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "revalidate without cache. path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ op_errno = EAGAIN;
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->st_ino = loc->inode->ino;
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ /* TODO: remove the hard-coding */
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht.linkto", 256);
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ hashed_subvol, hashed_subvol->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ if (local->inode)
+ local->stbuf.st_ino = local->inode->ino;
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_stat (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->stat,
+ loc);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ layout = dht_layout_get (this, fd->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "local allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = layout->cnt;;
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->fstat,
+ fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_chmod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->chmod,
+ loc, mode);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_chown (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, uid_t uid, gid_t gid)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->chown,
+ loc, uid, gid);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fchmod (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, mode_t mode)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+
+ layout = dht_layout_get (this, fd->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fchmod,
+ fd, mode);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fchown (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, uid_t uid, gid_t gid)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ layout = dht_layout_get (this, fd->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fchown,
+ fd, uid, gid);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_utimens (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct timespec tv[2])
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->utimens,
+ loc, tv);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->truncate,
+ loc, offset);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->ftruncate,
+ fd, offset);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->access,
+ loc, mask);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, path);
+
+ return 0;
+}
+
+
+int
+dht_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readlink_cbk,
+ subvol, subvol->fops->readlink,
+ loc, size);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, xattr);
+
+ return 0;
+}
+
+
+int
+dht_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr, int flags)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->setxattr,
+ loc, xattr, flags);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->removexattr,
+ loc, key);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->fd);
+
+ return 0;
+}
+
+
+int
+dht_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_fd_cbk,
+ subvol, subvol->fops->open,
+ loc, flags, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct stat *stbuf)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readv_cbk,
+ subvol, subvol->fops->readv,
+ fd, size, off);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int
+dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct stat *stbuf)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_writev (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iovec *vector, int count, off_t off)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ fd, vector, count, off);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+
+ return 0;
+}
+
+
+int
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->flush, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocatoin failed :(");
+ goto err;
+ }
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->fsync,
+ fd, datasync);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct flock *flock)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, flock);
+
+ return 0;
+}
+
+
+int
+dht_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct flock *flock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_lk_cbk,
+ subvol, subvol->fops->lk,
+ fd, cmd, flock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+/* gf_lk no longer exists
+int
+dht_gf_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct flock *flock)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, flock);
+
+ return 0;
+}
+
+
+int
+dht_gf_lk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int cmd, struct flock *flock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_gf_lk_cbk,
+ subvol, subvol->fops->gf_lk,
+ fd, cmd, flock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+*/
+
+int
+dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct statvfs *statvfs)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
+
+ /* TODO: normalize sizes */
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->statvfs);
+
+ return 0;
+}
+
+
+int
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_statfs_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fd_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *next = NULL;
+ dht_layout_t *layout = NULL;
+ int count = 0;
+
+
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto done;
+
+ layout = dht_layout_get (this, local->fd->inode);
+
+ list_for_each_entry (orig_entry, &orig_entries->list, list) {
+ subvol = dht_layout_search (this, layout, orig_entry->d_name);
+
+ if (!subvol || subvol == prev->this) {
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto unwind;
+ }
+
+ dht_itransform (this, subvol, orig_entry->d_ino,
+ &entry->d_ino);
+ dht_itransform (this, subvol, orig_entry->d_off,
+ &entry->d_off);
+
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ }
+ op_ret = count;
+
+done:
+ if (count == 0) {
+ next = dht_subvol_next (this, prev->this);
+ if (!next) {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, dht_readdir_cbk,
+ next, next->fops->readdir,
+ local->fd, local->size, 0);
+ return 0;
+ }
+
+unwind:
+ if (op_ret < 0)
+ op_ret = 0;
+
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, &entries);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+
+int
+dht_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t yoff)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ xlator_t *xvol = NULL;
+ off_t xoff = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->size = size;
+
+ dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+
+ /* TODO: do proper readdir */
+ STACK_WIND (frame, dht_readdir_cbk,
+ xvol, xvol->fops->readdir,
+ fd, size, xoff);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fsyncdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir,
+ fd, datasync);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+
+
+ if (op_ret == -1)
+ goto out;
+
+ prev = cookie;
+
+ dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ layout = dht_layout_for_subvol (this, prev->this);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set inode context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ return 0;
+}
+
+
+int
+dht_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ subvol, subvol->fops->mknod,
+ loc, mode, rdev);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ subvol, subvol->fops->symlink,
+ linkname, loc);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ if (hashed_subvol != cached_subvol)
+ local->call_cnt++;
+
+ STACK_WIND (frame, dht_err_cbk,
+ cached_subvol, cached_subvol->fops->unlink, loc);
+
+ if (hashed_subvol != cached_subvol)
+ STACK_WIND (frame, dht_err_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink, loc);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto out;
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ stbuf->st_ino = local->loc.inode->ino;
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
+
+
+ if (op_ret == -1)
+ goto err;
+
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
+
+ STACK_WIND (frame, dht_link_cbk,
+ srcvol, srcvol->fops->link,
+ &local->loc, &local->loc2);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ cached_subvol = dht_subvol_get_cached (this, oldloc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, newloc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc, oldloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ if (hashed_subvol != cached_subvol) {
+ dht_linkfile_create (frame, dht_link_linkfile_cbk,
+ cached_subvol, hashed_subvol, newloc);
+ } else {
+ STACK_WIND (frame, dht_link_cbk,
+ cached_subvol, cached_subvol->fops->link,
+ oldloc, newloc);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct stat *stbuf)
+{
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+
+
+ if (op_ret == -1)
+ goto out;
+
+ prev = cookie;
+
+ dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ layout = dht_layout_for_subvol (this, prev->this);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set inode context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf);
+ return 0;
+}
+
+
+int
+dht_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+
+ if (op_ret == 0) {
+ inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+ local->selfheal.layout = NULL;
+ local->stbuf.st_ino = local->st_ino;
+ }
+
+ DHT_STACK_UNWIND (frame, op_ret, op_errno,
+ local->inode, &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+
+ LOCK (&frame->lock);
+ {
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, NULL);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ local->layout = NULL;
+ dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
+ &local->loc, layout);
+ }
+
+ return 0;
+}
+
+int
+dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, NULL);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+ local->op_ret = 0;
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->st_ino = local->stbuf.st_ino;
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (local->call_cnt == 0) {
+ local->layout = NULL;
+ dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
+ &local->loc, layout);
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND (frame, dht_mkdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->mkdir,
+ &local->loc, local->mode);
+ }
+ return 0;
+err:
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ xlator_t *hashed_subvol = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+
+ if (hashed_subvol == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "hashed subvol not found");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+ local->inode = inode_ref (loc->inode);
+ ret = loc_copy (&local->loc, loc);
+ local->mode = mode;
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_mkdir_hashed_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->mkdir,
+ loc, mode);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ local->layout = NULL;
+
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ uint64_t tmp_layout = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+
+ if (op_errno != ENOENT)
+ local->need_selfheal = 1;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ inode_ctx_get (local->loc.inode, this,
+ &tmp_layout);
+ layout = (dht_layout_t *)(long)tmp_layout;
+
+ /* TODO: neater interface needed below */
+ local->stbuf.st_mode = local->loc.inode->st_mode;
+
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, layout);
+ } else {
+ DHT_STACK_UNWIND (frame, local->op_ret,
+ local->op_errno);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir_do (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1)
+ goto err;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rmdir,
+ &local->loc);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ return 0;
+}
+
+
+int
+dht_rmdir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret > 2) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_rmdir_readdir_cbk,
+ prev->this, prev->this->fops->readdir,
+ local->fd, 4096, 0);
+
+ return 0;
+
+err:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, local->fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+static int32_t
+dht_xattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+dht_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_xattrop_cbk,
+ subvol, subvol->fops->xattrop,
+ loc, flags, dict);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+dht_fxattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+dht_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ dht_fxattrop_cbk,
+ subvol, subvol->fops->fxattrop,
+ fd, flags, dict);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+static int32_t
+dht_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+dht_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *lock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_inodelk_cbk,
+ subvol, subvol->fops->inodelk,
+ loc, cmd, lock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+static int32_t
+dht_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+dht_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *lock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ STACK_WIND (frame,
+ dht_finodelk_cbk,
+ subvol, subvol->fops->finodelk,
+ fd, cmd, lock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+static int32_t
+dht_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+dht_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_entrylk_cbk,
+ subvol, subvol->fops->entrylk,
+ loc, basename, cmd, type);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+static int32_t
+dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fentrylk_cbk,
+ subvol, subvol->fops->fentrylk,
+ fd, basename, cmd, type);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_layout = 0;
+ dht_layout_t *layout = NULL;
+
+ inode_ctx_get (inode, this, &tmp_layout);
+
+ if (!layout)
+ return 0;
+ layout = (dht_layout_t *)(long)tmp_layout;
+ if (!layout->preset)
+ FREE (layout);
+
+ return 0;
+}
+
+
+
+static int
+dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+{
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
+
+ conf->subvolumes = CALLOC (cnt, sizeof (xlator_t *));
+ if (!conf->subvolumes) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
+
+ conf->subvolume_status = CALLOC (cnt, sizeof (char));
+ if (!conf->subvolume_status) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+dht_notify (xlator_t *this, int event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+
+ conf = this->private;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ subvol = data;
+
+ conf->gen++;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
+
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ subvol->name);
+ break;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
+
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ subvol->name);
+ break;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ break;
+ }
+
+ ret = default_notify (this, event, data);
+
+ return ret;
+}
+
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
new file mode 100644
index 000000000..17017381b
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -0,0 +1,212 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _DHT_H
+#define _DHT_H
+
+
+typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno);
+
+
+struct dht_layout {
+ int cnt;
+ int preset;
+ int gen;
+ int type;
+ struct {
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ xlator_t *xlator;
+ } list[0];
+};
+typedef struct dht_layout dht_layout_t;
+
+
+struct dht_local {
+ int call_cnt;
+ loc_t loc;
+ loc_t loc2;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
+ struct stat stbuf;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *xattr;
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t st_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ char need_selfheal;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct stat stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t missing;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_layout_t *layout;
+ } selfheal;
+
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+};
+typedef struct dht_local dht_local_t;
+
+
+struct dht_conf {
+ gf_lock_t subvolume_lock;
+ int subvolume_cnt;
+ xlator_t **subvolumes;
+ xlator_t *local_volume; /* Needed by NUFA */
+ char *subvolume_status;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ dht_layout_t *default_dir_layout;
+ gf_boolean_t search_unhashed;
+ int gen;
+};
+typedef struct dht_conf dht_conf_t;
+
+
+struct dht_disk_layout {
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
+};
+typedef struct dht_disk_layout dht_disk_layout_t;
+
+#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
+
+#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
+
+#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+
+#define is_last_call(cnt) (cnt == 0)
+
+#define DHT_LINKFILE_MODE (S_ISVTX)
+#define check_is_linkfile(i,s,x) ((s->st_mode & ~S_IFMT) == DHT_LINKFILE_MODE)
+
+#define check_is_dir(i,s,x) (S_ISDIR(s->st_mode))
+
+#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
+
+#define DHT_STACK_UNWIND(frame, params ...) do { \
+ dht_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_UNWIND (frame, params); \
+ dht_local_wipe (__local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) do { \
+ dht_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ dht_local_wipe (__local); \
+ } while (0)
+
+dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
+dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
+dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
+ const char *name);
+int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
+int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p,
+ uint32_t *misc_p);
+int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, loc_t *loc, dict_t *xattr);
+
+xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
+ struct stat *buf, dict_t *xattr);
+int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc);
+
+int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
+int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr);
+
+int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t **disk_layout_p);
+int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t *disk_layout);
+
+
+int dht_frame_return (call_frame_t *frame);
+
+int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
+int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
+ uint64_t *x);
+
+void dht_local_wipe (dht_local_t *local);
+dht_local_t *dht_local_init (call_frame_t *frame);
+int dht_stat_merge (xlator_t *this, struct stat *to, struct stat *from,
+ xlator_t *subvol);
+
+xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
+xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
+xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
+int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+
+int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+
+int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
+int
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+int
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+
+int dht_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc);
+#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-hashfn-tea.c b/xlators/cluster/dht/src/dht-hashfn-tea.c
new file mode 100644
index 000000000..8437b4955
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-hashfn-tea.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+
+#define DELTA 0x9E3779B9
+#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
+#define PARTROUNDS 6 /* 6 gets complete mixing */
+
+
+static int
+tearound (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1)
+{
+ uint32_t sum = 0;
+ int n = 0;
+ uint32_t b0 = 0;
+ uint32_t b1 = 0;
+
+ b0 = *h0;
+ b1 = *h1;
+
+ n = rounds;
+
+ do {
+ sum += DELTA;
+ b0 += ((b1 << 4) + array[0])
+ ^ (b1 + sum)
+ ^ ((b1 >> 5) + array[1]);
+ b1 += ((b0 << 4) + array[2])
+ ^ (b0 + sum)
+ ^ ((b0 >> 5) + array[3]);
+ } while (--n);
+
+ *h0 += b0;
+ *h1 += b1;
+
+ return 0;
+}
+
+
+uint32_t
+__pad (int len)
+{
+ uint32_t pad = 0;
+
+ pad = (uint32_t) len | ((uint32_t) len << 8);
+ pad |= pad << 16;
+
+ return pad;
+}
+
+
+uint32_t
+dht_hashfn_tea (const char *msg, int len)
+{
+ uint32_t h0 = 0x9464a485;
+ uint32_t h1 = 0x542e1a94;
+ uint32_t array[4];
+ uint32_t pad = 0;
+ int i = 0;
+ int j = 0;
+ int full_quads = 0;
+ int full_words = 0;
+ int full_bytes = 0;
+ uint32_t *intmsg = NULL;
+ int word = 0;
+
+
+ intmsg = (uint32_t *) msg;
+ pad = __pad (len);
+
+ full_bytes = len;
+ full_words = len / 4;
+ full_quads = len / 16;
+
+ for (i = 0; i < full_quads; i++) {
+ for (j = 0; j < 4; j++) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ }
+ tearound (PARTROUNDS, &array[0], &h0, &h1);
+ }
+
+ if ((len % 16) == 0) {
+ goto done;
+ }
+
+ for (j = 0; j < 4; j++) {
+ if (full_words) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ } else {
+ array[j] = pad;
+ while (full_bytes) {
+ array[j] <<= 8;
+ array[j] |= msg[len - full_bytes];
+ full_bytes--;
+ }
+ }
+ }
+ tearound (FULLROUNDS, &array[0], &h0, &h1);
+
+done:
+ return h0 ^ h1;
+}
+
+
+#if 0
+int
+main (int argc, char *argv[])
+{
+ int i = 0;
+ int hashval = 0;
+
+ for (i = 1; i < argc; i++) {
+ hashval = tea (argv[i], strlen (argv[i]));
+ printf ("%s: %x\n", argv[i], hashval);
+ }
+}
+#endif
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
new file mode 100644
index 000000000..9e321a43c
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -0,0 +1,88 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+
+
+uint32_t dht_hashfn_tea (const char *name, int len);
+
+
+typedef enum {
+ DHT_HASH_TYPE_TEA,
+} dht_hashfn_type_t;
+
+
+int
+dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
+{
+ int ret = 0;
+ uint32_t hash = 0;
+
+ switch (type) {
+ case DHT_HASH_TYPE_TEA:
+ hash = dht_hashfn_tea (name, strlen (name));
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ if (ret == 0) {
+ *hash_p = hash;
+ }
+
+ return ret;
+}
+
+
+#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
+ rsync_frndly_name = (char *) name; \
+ if (name[0] == '.') { \
+ char *dot = 0; \
+ int namelen = 0; \
+ \
+ dot = strrchr (name, '.'); \
+ if (dot && dot > (name + 1) && *(dot + 1)) { \
+ namelen = (dot - name); \
+ rsync_frndly_name = alloca (namelen); \
+ strncpy (rsync_frndly_name, name + 1, \
+ namelen); \
+ rsync_frndly_name[namelen - 1] = 0; \
+ } \
+ } \
+ } while (0);
+
+
+int
+dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+{
+ char *rsync_friendly_name = NULL;
+
+ MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+
+ return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
new file mode 100644
index 000000000..52d072002
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -0,0 +1,326 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+
+
+int
+dht_frame_return (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+
+ if (!frame)
+ return -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK (&frame->lock);
+
+ return this_call_cnt;
+}
+
+
+int
+dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
+{
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
+ }
+
+ conf = this->private;
+
+ max = conf->subvolume_cnt;
+ cnt = dht_subvol_cnt (this, subvol);
+
+ y = ((x * max) + cnt);
+
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
+}
+
+
+int
+dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
+ uint64_t *x_p)
+{
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ xlator_t *subvol = 0;
+
+
+ conf = this->private;
+ max = conf->subvolume_cnt;
+
+ cnt = y % max;
+ x = y / max;
+
+ subvol = conf->subvolumes[cnt];
+
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ if (x_p)
+ *x_p = x;
+
+ return 0;
+}
+
+
+void
+dht_local_wipe (dht_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->layout)
+ FREE (local->layout);
+
+ loc_wipe (&local->linkfile.loc);
+
+ if (local->linkfile.xattr)
+ dict_unref (local->linkfile.xattr);
+
+ if (local->linkfile.inode)
+ inode_unref (local->linkfile.inode);
+
+ if (local->fd) {
+ fd_unref (local->fd);
+ local->fd = NULL;
+ }
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ FREE (local);
+}
+
+
+dht_local_t *
+dht_local_init (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+
+ /* TODO: use mem-pool */
+ local = CALLOC (1, sizeof (*local));
+
+ if (!local)
+ return NULL;
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ frame->local = local;
+
+ return local;
+}
+
+
+char *
+basestr (const char *str)
+{
+ char *basestr = NULL;
+
+ basestr = strrchr (str, '/');
+ if (basestr)
+ basestr ++;
+
+ return basestr;
+}
+
+xlator_t *
+dht_first_up_child (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+
+ conf = this->private;
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolume_status[i]) {
+ child = conf->subvolumes[i];
+ break;
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ return child;
+}
+
+xlator_t *
+dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
+{
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+
+ if (is_fs_root (loc)) {
+ subvol = dht_first_up_child (this);
+ goto out;
+ }
+
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout missing path=%s parent=%"PRId64,
+ loc->path, loc->parent->ino);
+ goto out;
+ }
+
+ subvol = dht_layout_search (this, layout, loc->name);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not find subvolume for path=%s",
+ loc->path);
+ goto out;
+ }
+
+out:
+ return subvol;
+}
+
+
+xlator_t *
+dht_subvol_get_cached (xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+
+
+ layout = dht_layout_get (this, inode);
+
+ if (!layout) {
+ goto out;
+ }
+
+ subvol = layout->list[0].xlator;
+
+out:
+ return subvol;
+}
+
+
+xlator_t *
+dht_subvol_next (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
+ }
+ }
+
+ return next;
+}
+
+
+int
+dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+
+#define set_if_greater(a, b) do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
+
+int
+dht_stat_merge (xlator_t *this, struct stat *to,
+ struct stat *from, xlator_t *subvol)
+{
+ to->st_dev = from->st_dev;
+
+ dht_itransform (this, subvol, from->st_ino, &to->st_ino);
+
+ to->st_mode = from->st_mode;
+ to->st_nlink = from->st_nlink;
+ to->st_uid = from->st_uid;
+ to->st_gid = from->st_gid;
+ to->st_rdev = from->st_rdev;
+ to->st_size += from->st_size;
+ to->st_blksize = from->st_blksize;
+ to->st_blocks += from->st_blocks;
+
+ set_if_greater (to->st_atime, from->st_atime);
+ set_if_greater (to->st_mtime, from->st_mtime);
+ set_if_greater (to->st_ctime, from->st_ctime);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
new file mode 100644
index 000000000..08b4a2746
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -0,0 +1,543 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "byte-order.h"
+
+#define layout_base_size (sizeof (dht_layout_t))
+
+#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0])
+
+#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size))
+
+
+dht_layout_t *
+dht_layout_new (xlator_t *this, int cnt)
+{
+ dht_layout_t *layout = NULL;
+
+
+ layout = CALLOC (1, layout_size (cnt));
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ layout->cnt = cnt;
+
+out:
+ return layout;
+}
+
+
+dht_layout_t *
+dht_layout_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t layout = 0;
+ int ret = -1;
+
+ ret = inode_ctx_get (inode, this, &layout);
+
+ return (dht_layout_t *)(long)layout;
+}
+
+
+xlator_t *
+dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+{
+ uint32_t hash = 0;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+
+
+ ret = dht_hash_compute (layout->type, name, &hash);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "hash computation failed for type=%d name=%s",
+ layout->type, name);
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash
+ && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume for hash (value) = %u", hash);
+ }
+
+out:
+ return subvol;
+}
+
+
+dht_layout_t *
+dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
+{
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
+ }
+ }
+
+ return layout;
+}
+
+
+int
+dht_layouts_init (xlator_t *this, dht_conf_t *conf)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
+
+
+ conf->file_layouts = CALLOC (conf->subvolume_cnt,
+ sizeof (dht_layout_t *));
+ if (!conf->file_layouts) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new (this, 1);
+
+ if (!layout) {
+ goto out;
+ }
+
+ layout->preset = 1;
+
+ layout->list[0].xlator = conf->subvolumes[i];
+
+ conf->file_layouts[i] = layout;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t **disk_layout_p)
+{
+ int ret = -1;
+ int32_t *disk_layout = NULL;
+
+ disk_layout = CALLOC (5, sizeof (int));
+ if (!disk_layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ disk_layout[0] = hton32 (1);
+ disk_layout[1] = hton32 (layout->type);
+ disk_layout[2] = hton32 (layout->list[pos].start);
+ disk_layout[3] = hton32 (layout->list[pos].stop);
+
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t *disk_layout)
+{
+ int cnt = 0;
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+
+
+ /* TODO: assert disk_layout_ptr is of required length */
+
+ cnt = ntoh32 (disk_layout[0]);
+ if (cnt != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "disk layout has invalid count %d", cnt);
+ return -1;
+ }
+
+ /* TODO: assert type is compatible */
+ type = ntoh32 (disk_layout[1]);
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "merged to layout: %u - %u (type %d) from %s",
+ start_off, stop_off, type,
+ layout->list[pos].xlator->name);
+
+ return 0;
+}
+
+
+int
+dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ int32_t *disk_layout = NULL;
+
+
+ if (op_ret != 0) {
+ err = op_errno;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
+ }
+ }
+
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ VOID(&disk_layout));
+ }
+
+ if (ret != 0) {
+ layout->list[i].err = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dht_disk_layout_merge (this, layout, i, disk_layout);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout merge from subvolume %s failed",
+ subvol->name);
+ goto out;
+ }
+ layout->list[i].err = 0;
+
+out:
+ return ret;
+}
+
+
+void
+dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+}
+
+
+int64_t
+dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
+{
+ int64_t diff = 0;
+
+ if (layout->list[i].err || layout->list[j].err)
+ diff = layout->list[i].err - layout->list[j].err;
+ else
+ diff = (int64_t) layout->list[i].start
+ - (int64_t) layout->list[j].start;
+
+ return diff;
+}
+
+
+int
+dht_layout_sort (dht_layout_t *layout)
+{
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
+
+ /* TODO: O(n^2) -- bad bad */
+
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+{
+ dht_conf_t *conf = NULL;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ int ret = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+
+
+ conf = this->private;
+
+ /* TODO: explain WTF is happening */
+
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ missing++;
+ break;
+ case ENOTCONN:
+ down++;
+ break;
+ default:
+ misc++;
+ }
+ continue;
+ }
+
+ is_virgin = 0;
+
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
+ holes += (layout->list[i].start - (prev_stop + 1));
+ }
+
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
+ }
+ prev_stop = layout->list[i].stop;
+ }
+
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
+ holes += (last_stop - prev_stop);
+
+ if (holes_p)
+ *holes_p = hole_cnt;
+
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
+
+ if (missing_p)
+ *missing_p = missing;
+
+ if (down_p)
+ *down_p = down;
+
+ if (misc_p)
+ *misc_p = misc;
+
+ return ret;
+}
+
+
+int
+dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+
+
+ ret = dht_layout_sort (layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "sort failed?! how the ....");
+ goto out;
+ }
+
+ ret = dht_layout_anomalies (this, loc, layout,
+ &holes, &overlaps,
+ &missing, &down, &misc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error while finding anomalies in %s -- not good news",
+ loc->path);
+ goto out;
+ }
+
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "directory %s looked up first time",
+ loc->path);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "found anomalies in %s. holes=%d overlaps=%d",
+ loc->path, holes, overlaps);
+ }
+ ret = 1;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr)
+{
+ int idx = 0;
+ int pos = -1;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
+ int32_t count = -1;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s - no layout info for subvolume %s",
+ loc->path, subvol->name);
+ ret = 1;
+ goto out;
+ }
+
+ if (xattr == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - xattr dictionary is NULL",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ VOID(&disk_layout));
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - disk layout missing", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ count = ntoh32 (disk_layout[0]);
+ if (count != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - disk layout has invalid count %d",
+ loc->path, count);
+ ret = -1;
+ goto out;
+ }
+
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ if ((layout->list[pos].start != start_off)
+ || (layout->list[pos].stop != stop_off)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvol: %s; inode layout - %"PRId32" - %"PRId32"; "
+ "disk layout - %"PRId32" - %"PRId32,
+ layout->list[pos].xlator->name,
+ layout->list[pos].start, layout->list[pos].stop,
+ start_off, stop_off);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
new file mode 100644
index 000000000..9cc24ccf6
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -0,0 +1,224 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "compat.h"
+#include "dht-common.h"
+
+
+
+int
+dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ local->linkfile.inode,
+ &local->linkfile.stbuf);
+
+ return 0;
+}
+
+
+int
+dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattr = NULL;
+ data_t *str_data = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1)
+ goto err;
+
+ xattr = get_new_dict ();
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->linkfile.xattr = dict_ref (xattr);
+ local->linkfile.inode = inode_ref (inode);
+
+ str_data = str_to_data (local->linkfile.srcvol->name);
+ if (!str_data) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to initialize linkfile data");
+ op_errno = EINVAL;
+ }
+ str_data = NULL;
+
+ local->linkfile.stbuf = *stbuf;
+
+ STACK_WIND (frame, dht_linkfile_xattr_cbk,
+ prev->this, prev->this->fops->setxattr,
+ &local->linkfile.loc, local->linkfile.xattr, 0);
+
+ return 0;
+
+err:
+ if (str_data) {
+ data_destroy (str_data);
+ str_data = NULL;
+ }
+
+ local->linkfile.linkfile_cbk (frame, cookie, this,
+ op_ret, op_errno, inode, stbuf);
+ return 0;
+}
+
+
+int
+dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+{
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+ loc_copy (&local->linkfile.loc, loc);
+
+ STACK_WIND (frame, dht_linkfile_create_cbk,
+ fromvol, fromvol->fops->mknod, loc,
+ S_IFREG | DHT_LINKFILE_MODE, 0);
+
+ return 0;
+}
+
+
+int
+dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ subvol = prev->this;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unlinking linkfile %s on %s failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+ }
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc)
+{
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
+
+ unlink_frame = copy_frame (frame);
+ if (!unlink_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ unlink_local = dht_local_init (unlink_frame);
+ if (!unlink_local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ loc_copy (&unlink_local->loc, loc);
+
+ STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
+ subvol, subvol->fops->unlink,
+ &unlink_local->loc);
+
+ return 0;
+err:
+ if (unlink_frame)
+ DHT_STACK_DESTROY (unlink_frame);
+
+ return -1;
+}
+
+
+xlator_t *
+dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct stat *stbuf,
+ dict_t *xattr)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
+
+
+ conf = this->private;
+
+ if (!xattr)
+ goto out;
+
+ ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+
+ if ((-1 == ret) || !volname)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
+ }
+ }
+
+out:
+ return subvol;
+}
+
+
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
new file mode 100644
index 000000000..e5532f1bc
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -0,0 +1,562 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
+ * delete the newpath if it gets EEXISTS from link() call.
+ */
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "defaults.h"
+
+
+int
+dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "rename %s -> %s on %s failed (%s)",
+ local->loc.path, local->loc2.path,
+ prev->this->name, strerror (op_errno));
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ } else {
+ /* TODO: construct proper stbuf for dir */
+ local->stbuf = *stbuf;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+ }
+
+ return 0;
+}
+
+
+
+int
+dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1)
+ goto err;
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rename_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename,
+ &local->loc, &local->loc2);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ return 0;
+}
+
+
+int
+dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret > 2) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_rename_readdir_cbk,
+ prev->this, prev->this->fops->readdir,
+ local->fd, 4096, 0);
+
+ return 0;
+
+err:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rename_dir (call_frame_t *frame, xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int op_errno = -1;
+
+
+ conf = frame->this->private;
+ local = frame->local;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = 0;
+
+ if (!local->dst_cached) {
+ dht_rename_dir_do (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rename_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ &local->loc2, local->fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unlink on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rename on %s failed (%s)", prev->this->name,
+ strerror (op_errno));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink should not be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ /* TODO: delete files in background */
+
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
+
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
+
+ if (local->call_cnt == 0)
+ goto unwind;
+
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc);
+ }
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_hashed, src_hashed->fops->unlink,
+ &local->loc);
+ }
+
+ if (dst_cached
+ && (dst_cached != dst_hashed)
+ && (dst_cached != src_cached)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ dst_cached, dst_cached->fops->unlink,
+ &local->loc2);
+ }
+ return 0;
+
+unwind:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_do_rename (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
+
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "renaming %s => %s (%s)",
+ local->loc.path, local->loc2.path, rename_subvol->name);
+
+ STACK_WIND (frame, dht_rename_cbk,
+ rename_subvol, rename_subvol->fops->rename,
+ &local->loc, &local->loc2);
+
+ return 0;
+}
+
+
+int
+dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "link/file on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == -1)
+ goto unwind;
+
+ dht_do_rename (frame);
+ }
+
+ return 0;
+
+unwind:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_rename_create_links (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (src_cached == dst_cached)
+ goto nolinks;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached)
+ call_cnt++;
+
+ if (src_cached != dst_hashed)
+ call_cnt++;
+
+ local->call_cnt = call_cnt;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linkfile %s @ %s => %s",
+ local->loc.path, dst_hashed->name, src_cached->name);
+ dht_linkfile_create (frame, dht_rename_links_cbk,
+ src_cached, dst_hashed, &local->loc);
+ }
+
+ if (src_cached != dst_hashed) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ STACK_WIND (frame, dht_rename_links_cbk,
+ src_cached, src_cached->fops->link,
+ &local->loc, &local->loc2);
+ }
+
+nolinks:
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename (frame);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ src_hashed = dht_subvol_get_hashed (this, oldloc);
+ if (!src_hashed) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ src_cached = dht_subvol_get_cached (this, oldloc->inode);
+ if (!src_cached) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ dst_hashed = dht_subvol_get_hashed (this, newloc);
+ if (!dst_hashed) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached (this, newloc->inode);
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc, oldloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
+ oldloc->path, src_hashed->name, src_cached->name,
+ newloc->path, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (S_ISDIR (oldloc->inode->st_mode)) {
+ dht_rename_dir (frame, this);
+ } else {
+ local->op_ret = 0;
+ dht_rename_create_links (frame);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
new file mode 100644
index 000000000..ee32b2253
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -0,0 +1,460 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+
+
+int
+dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
+{
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+ local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
+ local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
+
+ if (op_ret == 0)
+ err = 0;
+ else
+ err = op_errno;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
+ }
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ }
+
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int i)
+{
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+
+
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ xattr = get_new_dict ();
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to extract disk layout");
+ goto err;
+ }
+
+ ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
+ disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr dictionary");
+ goto err;
+ }
+ disk_layout = NULL;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting hash range %u - %u (type %d) on subvolume %s for %s",
+ layout->list[i].start, layout->list[i].stop,
+ layout->type, subvol->name, loc->path);
+
+ dict_ref (xattr);
+
+ STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
+ subvol, subvol->fops->setxattr,
+ loc, xattr, 0);
+
+ dict_unref (xattr);
+
+ return 0;
+
+err:
+ if (xattr)
+ dict_destroy (xattr);
+
+ if (disk_layout)
+ FREE (disk_layout);
+
+ dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
+ -1, ENOMEM);
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+ /* attr missing and layout present */
+ missing_xattr++;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
+
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ return 0;
+ }
+
+ local->call_cnt = missing_xattr;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+
+ ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+
+ if (--missing_xattr == 0)
+ break;
+ }
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
+
+ if ((op_ret == 0) || (op_errno == EEXIST)) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
+ }
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_xattr (frame, &local->loc, layout);
+ }
+
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int force)
+{
+ int missing_dirs = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_xattr (frame, loc, layout);
+ return 0;
+ }
+
+ local->call_cnt = missing_dirs;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->mkdir,
+ loc, local->stbuf.st_mode);
+ }
+ }
+
+ return 0;
+}
+
+void
+dht_selfheal_fix_this_virgin (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ uint32_t chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int cnt = 0;
+ int err = 0;
+
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1) {
+ cnt++;
+ }
+ }
+
+ chunk = ((unsigned long) 0xffffffff) / cnt;
+
+ start = 0;
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1) {
+ layout->list[i].start = start;
+ layout->list[i].stop = start + chunk - 1;
+
+ start = start + chunk;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "gave fix: %u - %u on %s for %s",
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].xlator->name, loc->path);
+ if (--cnt == 0) {
+ layout->list[i].stop = 0xffffffff;
+ break;
+ }
+ }
+ }
+}
+
+
+int
+dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int missing = -1;
+ int down = -1;
+ int holes = -1;
+ int ret = -1;
+ int i = -1;
+
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+
+ missing = local->selfheal.missing;
+ down = local->selfheal.down;
+ holes = local->selfheal.hole_cnt;
+
+ if ((missing + down) == conf->subvolume_cnt) {
+ dht_selfheal_fix_this_virgin (frame, loc, layout);
+ ret = 0;
+ }
+
+ if (holes <= down) {
+ /* the down subvol might fill up the holes */
+ ret = 0;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* TODO: give a fix to these non-virgins */
+
+ return ret;
+}
+
+
+int
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ ret = dht_layout_anomalies (this, loc, layout,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ &local->selfheal.missing,
+ &local->selfheal.down,
+ &local->selfheal.misc);
+
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
+ missing = local->selfheal.missing;
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = layout;
+
+/*
+ if (down) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%d subvolumes down -- not fixing", down);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (overlaps) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not fixing overlaps in %s", loc->path);
+ local->op_errno = EINVAL;
+ ret = -1;
+ goto sorry_no_fix;
+ }
+
+ if (misc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%d subvolumes have unrecoverable errors", misc);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (holes > missing) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%d holes and %d pigeons -- not fixing",
+ holes, missing);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+*/
+ ret = dht_selfheal_dir_getafix (frame, loc, layout);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "the directory is not a virgin");
+ goto sorry_no_fix;
+ }
+
+ dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+
+ return 0;
+
+sorry_no_fix:
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish (frame, this, ret);
+
+ return 0;
+}
+
+
+int
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = layout;
+
+ ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
new file mode 100644
index 000000000..836e7a4e8
--- /dev/null
+++ b/xlators/cluster/dht/src/dht.c
@@ -0,0 +1,222 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "dht-common.c"
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = -1;
+
+ ret = dht_notify (this, event, data);
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return;
+}
+
+int
+init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *lookup_unhashed_str = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "DHT needs more than one child defined");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = CALLOC (1, sizeof (*conf));
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ conf->search_unhashed = 0;
+
+ if (dict_get_str (this->options, "lookup-unhashed",
+ &lookup_unhashed_str) == 0) {
+ gf_string2boolean (lookup_unhashed_str,
+ &conf->search_unhashed);
+ }
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+
+ conf->gen = 1;
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
+
+ .stat = dht_stat,
+ .chmod = dht_chmod,
+ .chown = dht_chown,
+ .fchown = dht_fchown,
+ .fchmod = dht_fchmod,
+ .fstat = dht_fstat,
+ .utimens = dht_utimens,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+#if 0
+ .setdents = dht_setdents,
+ .getdents = dht_getdents,
+ .checksum = dht_checksum,
+#endif
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+// .release = dht_release,
+// .releasedir = dht_releasedir,
+ .forget = dht_forget
+};
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
new file mode 100644
index 000000000..6333e002f
--- /dev/null
+++ b/xlators/cluster/dht/src/nufa.c
@@ -0,0 +1,684 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.c"
+
+/* TODO: all 'TODO's in dht.c holds good */
+
+int
+nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ call_frame_t *prev = NULL;
+ int call_cnt = 0;
+
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ dht_itransform (this, prev->this, stbuf->st_ino,
+ &stbuf->st_ino);
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ goto out;
+ }
+
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ }
+
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ if (!local->hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
+
+ err:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ return 0;
+}
+
+int
+nufa_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "copying location failed for path=%s",
+ loc->path);
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+
+ local->cached_subvol = cached_subvol;
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate (loc)) {
+ layout = dht_layout_get (this, loc->inode);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "revalidate without cache. path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ op_errno = EAGAIN;
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->st_ino = loc->inode->ino;
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht.linkto", 256);
+
+ /* Send it to only local volume */
+ STACK_WIND (frame, nufa_local_lookup_cbk,
+ conf->local_volume,
+ conf->local_volume->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto err;
+
+ STACK_WIND (frame, dht_create_cbk,
+ conf->local_volume, conf->local_volume->fops->create,
+ &local->loc, local->flags, local->mode, local->fd);
+
+ return 0;
+
+ err:
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+nufa_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ if (subvol != conf->local_volume) {
+ /* create a link file instead of actual file */
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->mode = mode;
+ local->flags = flags;
+
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ conf->local_volume, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret >= 0) {
+ STACK_WIND (frame, dht_newfile_cbk,
+ conf->local_volume,
+ conf->local_volume->fops->mknod,
+ &local->loc, local->mode, local->rdev);
+
+ return 0;
+ }
+
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ return 0;
+}
+
+
+int
+nufa_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+
+ if (conf->local_volume != subvol) {
+ /* Create linkfile first */
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->mode = mode;
+ local->rdev = rdev;
+
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ conf->local_volume, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ subvol, subvol->fops->mknod,
+ loc, mode, rdev);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = -1;
+
+ ret = dht_notify (this, event, data);
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return;
+}
+
+int
+init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ xlator_list_t *trav = NULL;
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ char *lookup_unhashed_str = NULL;
+ int ret = -1;
+ int i = 0;
+ char my_hostname[256];
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "DHT needs more than one child defined");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = CALLOC (1, sizeof (*conf));
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ conf->search_unhashed = 0;
+
+ if (dict_get_str (this->options, "lookup-unhashed",
+ &lookup_unhashed_str) == 0) {
+ gf_string2boolean (lookup_unhashed_str,
+ &conf->search_unhashed);
+ }
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+
+ conf->gen = 1;
+
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)",
+ strerror (errno));
+ }
+
+ if (ret == 0)
+ local_volname = my_hostname;
+
+ data = dict_get (this->options, "local-volume-name");
+ if (data) {
+ local_volname = data->data;
+ }
+
+ trav = this->children;
+ while (trav) {
+ if (strcmp (trav->xlator->name, local_volname) == 0)
+ break;
+ trav = trav->next;
+ }
+
+ if (!trav) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not find subvolume named '%s'. "
+ "Please define volume with the name as the hostname "
+ "or override it with 'option local-volume-name'",
+ local_volname);
+ goto err;
+ }
+ /* The volume specified exists */
+ conf->local_volume = trav->xlator;
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .chmod = dht_chmod,
+ .chown = dht_chown,
+ .fchown = dht_fchown,
+ .fchmod = dht_fchmod,
+ .fstat = dht_fstat,
+ .utimens = dht_utimens,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+#if 0
+ .setdents = dht_setdents,
+ .getdents = dht_getdents,
+ .checksum = dht_checksum,
+#endif
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+// .release = dht_release,
+// .releasedir = dht_releasedir,
+ .forget = dht_forget
+};
+
+
+struct volume_options options[] = {
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+ { .key = {"lookup-unhashed"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};