summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/syncop.c130
-rw-r--r--libglusterfs/src/syncop.h7
-rw-r--r--xlators/cluster/dht/src/Makefile.am9
-rw-r--r--xlators/cluster/dht/src/dht-common.c1523
-rw-r--r--xlators/cluster/dht/src/dht-common.h413
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c9
-rw-r--r--xlators/cluster/dht/src/dht-helper.c416
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1115
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c597
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c7
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c326
-rw-r--r--xlators/cluster/dht/src/dht-rename.c18
-rw-r--r--xlators/cluster/dht/src/dht.c40
-rw-r--r--xlators/cluster/dht/src/nufa.c39
-rw-r--r--xlators/cluster/dht/src/switch.c37
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c7
-rw-r--r--xlators/storage/posix/src/posix.c4
17 files changed, 3040 insertions, 1657 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index 76d2b5811fe..bbcf5201ebd 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -494,8 +494,8 @@ syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags)
}
int
-syncop_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict)
+syncop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict)
{
struct syncargs *args = NULL;
@@ -516,7 +516,7 @@ syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict)
{
struct syncargs args = {0, };
- SYNCOP (subvol, (&args), syncop_listxattr_cbk, subvol->fops->getxattr,
+ SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
loc, NULL);
if (dict)
@@ -527,6 +527,36 @@ syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict)
}
int
+syncop_getxattr (xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
+ loc, key);
+
+ if (dict)
+ *dict = args.xattr;
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr,
+ fd, key);
+
+ if (dict)
+ *dict = args.xattr;
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct statvfs *buf)
@@ -856,3 +886,97 @@ syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset)
errno = args.op_errno;
return args.op_ret;
}
+
+int
+syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate,
+ loc, offset);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_fsync (xlator_t *subvol, fd_t *fd)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync,
+ fd, 0);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int
+syncop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->iatt1 = *stbuf;
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat,
+ fd);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int
+syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->stat,
+ loc);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
index bb898e11edb..1c3fe07b517 100644
--- a/libglusterfs/src/syncop.h
+++ b/libglusterfs/src/syncop.h
@@ -181,6 +181,8 @@ int syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf);
int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags);
int syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict);
+int syncop_getxattr (xlator_t *xl, loc_t *loc, dict_t **dict, const char *key);
+int syncop_fgetxattr (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key);
int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name);
int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
@@ -197,8 +199,13 @@ int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
struct iovec **vector, int *count, struct iobref **iobref);
int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset);
+int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset);
int syncop_unlink (xlator_t *subvol, loc_t *loc);
+int syncop_fsync (xlator_t *subvol, fd_t *fd);
+int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf);
+int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf);
+
#endif /* _SYNCOP_H */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index cf883a974ca..e35058d65d4 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -2,10 +2,10 @@
xlator_LTLIBRARIES = dht.la nufa.la switch.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
- dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
+ dht-common.c dht-inode-write.c dht-inode-read.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
dht_la_SOURCES = $(dht_common_source) dht.c
@@ -21,7 +21,8 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
switch_la_LDFLAGS = -module -avoidversion
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = dht-common.h dht-common.c dht-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = dht-common.h dht-mem-types.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index b5168605bbb..6f8594e30b7 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -309,6 +309,14 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->return_estale = 1;
}
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG (local->loc.inode->ia_type)) {
+ local->need_lookup_everywhere = 1;
+ }
+ }
goto unlock;
}
@@ -386,6 +394,18 @@ out:
return 0;
}
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+
+ /* We know that current cached subvol is no more
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ dht_lookup_everywhere (frame, this, &local->loc);
+ return 0;
+ }
if (local->return_estale) {
local->op_ret = -1;
local->op_errno = ESTALE;
@@ -459,7 +479,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
dht_local_t *local = NULL;
xlator_t *hashed_subvol = NULL;
xlator_t *cached_subvol = NULL;
-
+ dht_layout_t *layout = NULL;
local = frame->local;
hashed_subvol = local->hashed_subvol;
@@ -487,6 +507,42 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
return 0;
}
+ if (local->need_lookup_everywhere) {
+ if (uuid_compare (local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol (this, cached_subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ ret = dht_layout_set (this, local->inode, layout);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to set layout for subvol %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ WIPE (&local->postparent);
+
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_INFO,
"cannot create linkfile file for %s on %s: "
@@ -560,6 +616,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
xlator_t *link_subvol = NULL;
int ret = -1;
+ int32_t fd_count = 0;
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -642,12 +699,17 @@ unlock:
UNLOCK (&frame->lock);
if (is_linkfile) {
- gf_log (this->name, GF_LOG_INFO,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc);
- return 0;
+ ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+ /* Delete the linkfile only if there are no open fds on it.
+ if there is a open-fd, it may be in migration */
+ if (!ret && (fd_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "deleting stale linkfile %s on %s",
+ loc->path, subvol->name);
+ STACK_WIND (frame, dht_lookup_unlink_cbk,
+ subvol, subvol->fops->unlink, loc);
+ return 0;
+ }
}
this_call_cnt = dht_frame_return (frame);
@@ -980,7 +1042,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
if (!conf)
goto err;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1003,16 +1065,14 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
local->xattr_req = dict_new ();
}
+
+ cached_subvol = local->cached_subvol;
if (!hashed_subvol)
hashed_subvol = dht_subvol_get_hashed (this, loc);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
-
- local->cached_subvol = cached_subvol;
local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -1032,10 +1092,9 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
goto do_fresh_lookup;
}
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
+ call_cnt = local->call_cnt = layout->cnt;
/* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
* revalidates directly go to the cached-subvolume.
@@ -1043,6 +1102,11 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
ret = dict_set_uint32 (local->xattr_req,
"trusted.glusterfs.dht", 4 * 4);
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+
for (i = 0; i < layout->cnt; i++) {
subvol = layout->list[i].xlator;
@@ -1060,7 +1124,12 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
"trusted.glusterfs.dht", 4 * 4);
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ DHT_LINKFILE_KEY, 256);
+
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -1101,286 +1170,6 @@ err:
int
-dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->prebuf, prebuf, prev->this);
- dht_iatt_merge (this, &local->stbuf, postbuf, prev->this);
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (truncate, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
-err:
- return 0;
-}
-
-
-
-int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf);
-err:
- return 0;
-}
-
-
-int
-dht_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;;
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->truncate,
- loc, offset);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent)
@@ -1473,42 +1262,6 @@ err:
}
-int
-dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- prebuf, postbuf);
-
- return 0;
-}
-
-
int
dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -1518,7 +1271,6 @@ dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int this_call_cnt = 0;
call_frame_t *prev = NULL;
-
local = frame->local;
prev = cookie;
@@ -1545,125 +1297,6 @@ unlock:
return 0;
}
-
-int
-dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
-{
- DHT_STACK_UNWIND (access, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-dht_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_access_cbk,
- subvol, subvol->fops->access,
- loc, mask);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno);
-
- return 0;
-}
-
-
-int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path, struct iatt *sbuf)
-{
- dht_local_t *local = NULL;
-
- local = frame->local;
- if (op_ret == -1)
- goto err;
-
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- }
-
-err:
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf);
-
- return 0;
-}
-
-
-int
-dht_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
static void
fill_layout_info (dht_layout_t *layout, char *buf)
{
@@ -1869,7 +1502,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
dht_layout_t *layout = NULL;
xlator_t **sub_volumes = NULL;
int op_errno = -1;
- int ret = 0;
int i = 0;
int cnt = 0;
@@ -1881,38 +1513,33 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "layout is NULL");
- op_errno = ENOENT;
- goto err;
- }
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
goto err;
}
- local->layout = layout;
-
- if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (key) {
local->key = gf_strdup (key);
if (!local->key) {
op_errno = ENOMEM;
-
goto err;
}
+ }
+
+ if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) {
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ cached_subvol = local->cached_subvol;
local->call_cnt = 1;
if (hashed_subvol != cached_subvol) {
@@ -1990,14 +1617,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- if (key) {
- local->key = gf_strdup (key);
- if (!local->key) {
- op_errno = ENOMEM;
- goto err;
- }
- }
-
if (loc->inode-> ia_type == IA_IFDIR) {
cnt = local->call_cnt = layout->cnt;
} else {
@@ -2032,7 +1651,13 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for fd=%p", fd);
@@ -2040,13 +1665,6 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
local->call_cnt = 1;
STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
@@ -2095,7 +1713,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->path, err);
conf = this->private;
- subvol = dht_subvol_get_cached (this, loc->inode);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", loc->path);
@@ -2103,13 +1727,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"no layout for path=%s", loc->path);
@@ -2117,12 +1735,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
tmp = dict_get (xattr, "distribute.migrate-data");
if (tmp) {
if (!IA_ISREG (loc->inode->ia_type)) {
@@ -2136,13 +1748,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
if (strcmp (value, "force") == 0)
forced_rebalance = 1;
- local->to_subvol = dht_subvol_get_hashed (this, loc);
- local->from_subvol = dht_subvol_get_cached (this, loc->inode);
- if (local->to_subvol == local->from_subvol) {
- op_errno = ENOTSUP;
+ local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
+ local->rebalance.from_subvol = local->cached_subvol;
+
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
goto err;
}
- if (local->to_subvol) {
+ if (local->rebalance.target_node) {
local->flags = forced_rebalance;
ret = dht_start_rebalance_task (this, frame);
@@ -2153,7 +1766,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
"%s: failed to create a new synctask",
loc->path);
}
- op_errno = ENOTSUP;
+ op_errno = EINVAL;
goto err;
}
@@ -2259,7 +1872,13 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", loc->path);
@@ -2267,13 +1886,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
+ layout = local->layout;
if (!local->layout) {
gf_log (this->name, GF_LOG_DEBUG,
"no layout for path=%s", loc->path);
@@ -2282,6 +1895,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
}
local->call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
for (i = 0; i < layout->cnt; i++) {
STACK_WIND (frame, dht_removexattr_cbk,
@@ -2308,7 +1922,6 @@ dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int this_call_cnt = 0;
call_frame_t *prev = NULL;
-
local = frame->local;
prev = cookie;
@@ -2335,318 +1948,6 @@ unlock:
return 0;
}
-
-int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, int wbflags)
-{
- xlator_t *subvol = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_fd_cbk,
- subvol, subvol->fops->open,
- loc, flags, fd, wbflags);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref)
-{
- dht_local_t *local = frame->local;
-
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
-out:
- DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
-
- return 0;
-}
-
-
-int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- dht_local_t *local = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
-out:
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
-
- return 0;
-}
-
-
-int
-dht_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int count, off_t off,
- struct iobref *iobref)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
-
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, iobref);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->fd = fd_ref (fd);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->flush, fd);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno);
-
- return 0;
-}
-
-
-int
-dht_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_fsync_cbk,
- subvol, subvol->fops->fsync,
- fd, datasync);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct gf_flock *flock)
-{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock);
-
- return 0;
-}
-
-
-int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct gf_flock *flock)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_lk_cbk,
- subvol, subvol->fops->lk,
- fd, cmd, flock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
/*
* dht_normalize_stats -
*/
@@ -2742,7 +2043,7 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2787,11 +2088,9 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
- int ret = -1;
int op_errno = -1;
int i = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -2799,21 +2098,13 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -2865,7 +2156,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
- if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL)
+ if (check_is_linkfile_wo_dict (NULL, (&orig_entry->d_stat))
|| (check_is_dir (NULL, (&orig_entry->d_stat), NULL)
&& (prev->this != dht_first_up_subvol (this)))) {
continue;
@@ -3053,9 +2344,8 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, NULL, NULL, whichop);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
@@ -3156,7 +2446,6 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
int op_errno = -1;
int i = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -3164,10 +2453,9 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
if (!local) {
op_errno = ENOMEM;
-
goto err;
}
@@ -3275,7 +2563,6 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
{
xlator_t *subvol = NULL;
int op_errno = -1;
- int ret = -1;
xlator_t *avail_subvol = NULL;
dht_local_t *local = NULL;
@@ -3285,10 +2572,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
if (!local) {
op_errno = ENOMEM;
-
goto err;
}
@@ -3301,13 +2587,6 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
if (!dht_is_subvol_filled (this, subvol)) {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
@@ -3357,17 +2636,14 @@ dht_symlink (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
if (!local) {
op_errno = ENOMEM;
-
goto err;
}
@@ -3380,13 +2656,6 @@ dht_symlink (call_frame_t *frame, xlator_t *this,
goto err;
}
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "Failed to copy loc");
- op_errno = ENOMEM;
- goto err;
- }
-
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
@@ -3410,11 +2679,9 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
xlator_t *cached_subvol = NULL;
xlator_t *hashed_subvol = NULL;
- int ret = -1;
int op_errno = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
@@ -3430,11 +2697,10 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto done;
}
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
goto err;
}
@@ -3447,17 +2713,11 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
goto err;
}
@@ -3522,15 +2782,13 @@ dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_local_t *local = NULL;
xlator_t *srcvol = NULL;
-
if (op_ret == -1)
goto err;
local = frame->local;
srcvol = local->linkfile.srcvol;
- STACK_WIND (frame, dht_link_cbk,
- srcvol, srcvol->fops->link,
+ STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
&local->loc, &local->loc2);
return 0;
@@ -3553,13 +2811,19 @@ dht_link (call_frame_t *frame, xlator_t *this,
int ret = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (oldloc, err);
VALIDATE_OR_GOTO (newloc, err);
- cached_subvol = dht_subvol_get_cached (this, oldloc->inode);
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
if (!cached_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", oldloc->path);
@@ -3576,29 +2840,14 @@ dht_link (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
ret = loc_copy (&local->loc2, newloc);
if (ret == -1) {
op_errno = ENOMEM;
-
goto err;
}
if (hashed_subvol != cached_subvol) {
- memcpy (local->gfid, oldloc->inode->gfid, 16);
+ uuid_copy (local->gfid, oldloc->inode->gfid);
dht_linkfile_create (frame, dht_link_linkfile_cbk,
cached_subvol, hashed_subvol, newloc);
} else {
@@ -3694,7 +2943,6 @@ dht_create (call_frame_t *frame, xlator_t *this,
fd_t *fd, dict_t *params)
{
int op_errno = -1;
- int ret = -1;
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
@@ -3705,9 +2953,8 @@ dht_create (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
@@ -3723,12 +2970,6 @@ dht_create (call_frame_t *frame, xlator_t *this,
goto done;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
-
- goto err;
- }
subvol = dht_subvol_get_hashed (this, loc);
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -3748,10 +2989,8 @@ dht_create (call_frame_t *frame, xlator_t *this,
}
/* Choose the minimum filled volume, and create the
files there */
- /* TODO */
avail_subvol = dht_free_disk_available_subvol (this, subvol);
if (avail_subvol != subvol) {
- local->fd = fd_ref (fd);
local->params = dict_ref (params);
local->flags = flags;
local->mode = mode;
@@ -3790,7 +3029,6 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
-
local = frame->local;
layout = local->selfheal.layout;
@@ -3928,7 +3166,6 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int op_errno = -1;
- int ret = -1;
xlator_t *hashed_subvol = NULL;
@@ -3943,15 +3180,13 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
hashed_subvol = dht_subvol_get_hashed (this, loc);
-
if (hashed_subvol == NULL) {
gf_log (this->name, GF_LOG_DEBUG,
"hashed subvol not found for %s",
@@ -3961,21 +3196,12 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
}
local->hashed_subvol = hashed_subvol;
- local->inode = inode_ref (loc->inode);
- ret = loc_copy (&local->loc, loc);
local->mode = mode;
-
- if (ret == -1) {
-
- op_errno = ENOMEM;
- goto err;
- }
-
local->params = dict_ref (params);
+ local->inode = inode_ref (loc->inode);
local->layout = dht_layout_new (this, conf->subvolume_cnt);
if (!local->layout) {
-
op_errno = ENOMEM;
goto err;
}
@@ -4366,8 +3592,6 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
dht_conf_t *conf = NULL;
int op_errno = -1;
int i = -1;
- int ret = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -4378,9 +3602,8 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
if (!local) {
-
op_errno = ENOMEM;
goto err;
}
@@ -4388,13 +3611,6 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
local->call_cnt = conf->subvolume_cnt;
local->op_ret = 0;
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
-
- op_errno = ENOMEM;
- goto err;
- }
-
local->flags = flags;
local->fd = fd_create (local->loc.inode, frame->root->pid);
@@ -4421,209 +3637,6 @@ err:
return 0;
}
-
-int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno);
-
- return 0;
-}
-
-
-int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
-
- STACK_WIND (frame,
- dht_finodelk_cbk,
- subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno);
-
- return 0;
-}
-
-
int
dht_entrylk_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno)
@@ -4649,7 +3662,13 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
if (!subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no cached subvolume for path=%s", loc->path);
@@ -4657,14 +3676,6 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,
goto err;
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
local->call_cnt = 1;
STACK_WIND (frame, dht_entrylk_cbk,
@@ -4726,161 +3737,6 @@ err:
int
-dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
- dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
-
- return 0;
-}
-
-
-int
-dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid)
-{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
dht_forget (xlator_t *this, inode_t *inode)
{
uint64_t tmp_layout = 0;
@@ -4898,53 +3754,6 @@ dht_forget (xlator_t *this, inode_t *inode)
}
-
-int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
-{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
-
- if (!conf)
- return -1;
-
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
-
- conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->subvolumes) {
-
- return -1;
- }
- conf->subvolume_cnt = cnt;
-
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
-
- conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
- gf_dht_mt_char);
- if (!conf->subvolume_status) {
-
- return -1;
- }
-
- conf->last_event = GF_CALLOC (cnt, sizeof (int),
- gf_dht_mt_char);
- if (!conf->last_event) {
-
- return -1;
- }
- conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
- gf_dht_mt_subvol_time);
- if (!conf->subvol_up_time) {
- return -1;
- }
- return 0;
-}
-
-
int
dht_notify (xlator_t *this, int event, void *data, ...)
{
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 95d1b1d6a08..ab1b82af2a0 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -29,45 +29,61 @@
#ifndef _DHT_H
#define _DHT_H
-#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout"
+#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout"
#define GF_DHT_LOOKUP_UNHASHED_ON 1
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
-#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
#include <fnmatch.h>
typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno);
+typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
+ int ret);
struct dht_layout {
- int spread_cnt; /* layout spread count per directory,
- is controlled by 'setxattr()' with
- special key */
- int cnt;
- int preset;
- int gen;
- int type;
- int ref; /* use with dht_conf_t->layout_lock */
- int search_unhashed;
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ int gen;
+ int type;
+ int ref; /* use with dht_conf_t->layout_lock */
+ int search_unhashed;
struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ xlator_t *xlator;
} list[0];
};
-typedef struct dht_layout dht_layout_t;
+typedef struct dht_layout dht_layout_t;
typedef enum {
DHT_HASH_TYPE_DM,
} dht_hashfn_type_t;
+/* rebalance related */
+struct dht_rebalance_ {
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ int32_t wbflags;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+};
struct dht_local {
int call_cnt;
@@ -140,11 +156,12 @@ struct dht_local {
/* flag used to make sure we need to return estale in
{lookup,revalidate}_cbk */
- char return_estale;
+ char return_estale;
+ char need_lookup_everywhere;
- /* rebalance related */
-#define to_subvol hashed_subvol
-#define from_subvol cached_subvol
+ glusterfs_fop_t fop;
+
+ struct dht_rebalance_ rebalance;
};
typedef struct dht_local dht_local_t;
@@ -209,11 +226,21 @@ typedef struct dht_disk_layout dht_disk_layout_t;
#define is_last_call(cnt) (cnt == 0)
-#define DHT_LINKFILE_MODE (S_ISVTX)
+#define DHT_MIGRATION_IN_PROGRESS 1
+#define DHT_MIGRATION_COMPLETED 2
+
+#define DHT_LINKFILE_KEY "trusted.glusterfs.dht.linkto"
+#define DHT_LINKFILE_MODE (S_ISVTX)
+
#define check_is_linkfile(i,s,x) ( \
((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \
== DHT_LINKFILE_MODE) && \
- (s->ia_size == 0))
+ dict_get (x, DHT_LINKFILE_KEY))
+
+#define check_is_linkfile_wo_dict(i,s) ( \
+ ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \
+ == DHT_LINKFILE_MODE))
+
#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
@@ -221,85 +248,86 @@ typedef struct dht_disk_layout dht_disk_layout_t;
#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
+ xlator_t *__xl = NULL; \
if (frame) { \
- __xl = frame->this; \
- __local = frame->local; \
+ __xl = frame->this; \
+ __local = frame->local; \
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
dht_local_wipe (__xl, __local); \
} while (0)
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__xl, __local); \
+#define DHT_STACK_DESTROY(frame) do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ dht_local_wipe (__xl, __local); \
} while (0)
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
+dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
+dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
+dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
const char *name);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
+int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
+int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p,
+ uint32_t *misc_p);
+int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, loc_t *loc, dict_t *xattr);
xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
struct iatt *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
+int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc);
int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
+ int op_ret, int op_errno, dict_t *xattr);
int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw);
+ int pos, int32_t **disk_layout_p);
+int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, void *disk_layout_raw);
int dht_frame_return (call_frame_t *frame);
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
+int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
uint64_t *x);
void dht_local_wipe (xlator_t *this, dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame);
-int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
- xlator_t *subvol);
+dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
+ glusterfs_fop_t fop);
+int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
+ xlator_t *subvol);
xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
+int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
dht_layout_t *layout);
int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_layout_sort_volname (dht_layout_t *layout);
@@ -308,31 +336,252 @@ int dht_rename (call_frame_t *frame, xlator_t *this,
int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
+int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);
-void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
+int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);
+void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
-xlator_t *dht_first_up_subvol (xlator_t *this);
-xlator_t *dht_last_up_subvol (xlator_t *this);
+xlator_t *dht_first_up_subvol (xlator_t *this);
+xlator_t *dht_last_up_subvol (xlator_t *this);
int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
xlator_t **subvol);
-int dht_rename_cleanup (call_frame_t *frame);
+int dht_rename_cleanup (call_frame_t *frame);
int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent);
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent);
int dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout);
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
+int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+
+/* migration/rebalance */
int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
-#endif /* _DHT_H */
+
+int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
+int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+
+
+/* FOPS */
+int32_t dht_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req);
+
+int32_t dht_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc);
+
+int32_t dht_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd);
+
+int32_t dht_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset);
+
+int32_t dht_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset);
+
+int32_t dht_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask);
+
+int32_t dht_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size);
+
+int32_t dht_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev, dict_t *params);
+
+int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dict_t *params);
+
+int32_t dht_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc);
+
+int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags);
+
+int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, dict_t *params);
+
+int32_t dht_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc);
+
+int32_t dht_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc);
+
+int32_t dht_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd, dict_t *params);
+
+int32_t dht_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd,
+ int32_t wbflags);
+
+int32_t dht_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset);
+
+int32_t dht_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ struct iobref *iobref);
+
+int32_t dht_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd);
+
+int32_t dht_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync);
+
+int32_t dht_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd);
+
+int32_t dht_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync);
+
+int32_t dht_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc);
+
+int32_t dht_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags);
+
+int32_t dht_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name);
+
+int32_t dht_fsetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags);
+
+int32_t dht_fgetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name);
+
+int32_t dht_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name);
+
+int32_t dht_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock);
+
+int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock);
+
+int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock);
+
+int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+int32_t dht_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off);
+
+int32_t dht_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off);
+
+int32_t dht_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict);
+
+int32_t dht_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict);
+
+int32_t dht_forget (xlator_t *this, inode_t *inode);
+int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid);
+int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid);
+
+int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+
+/* definitions for nufa/switch */
+int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent);
+int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent);
+
+
+
+#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 9e2327bffeb..d27d8bf91b8 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -98,7 +98,9 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
goto err;
}
- statfs_local = dht_local_init (statfs_frame);
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init (statfs_frame, NULL, NULL,
+ GF_FOP_MAXVALUE);
if (!statfs_local) {
goto err;
}
@@ -141,12 +143,13 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
- statfs_local = dht_local_init (statfs_frame);
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init (statfs_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
if (!statfs_local) {
goto err;
}
- loc_copy (&statfs_local->loc, loc);
loc_t tmp_loc = { .inode = NULL,
.path = "/",
};
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 44ed9c68237..99abe023b2a 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -224,27 +224,60 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
GF_FREE (local->key);
}
+ if (local->rebalance.vector)
+ GF_FREE (local->rebalance.vector);
+
+ if (local->rebalance.iobref)
+ iobref_unref (local->rebalance.iobref);
+
GF_FREE (local);
}
dht_local_t *
-dht_local_init (call_frame_t *frame)
+dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
/* TODO: use mem-pool */
- local = GF_CALLOC (1, sizeof (*local),
- gf_dht_mt_dht_local_t);
-
+ local = GF_CALLOC (1, sizeof (*local), gf_dht_mt_dht_local_t);
if (!local)
- return NULL;
+ goto out;
+
+ if (loc) {
+ ret = loc_copy (&local->loc, loc);
+ if (ret)
+ goto out;
- local->op_ret = -1;
+ inode = loc->inode;
+ }
+
+ if (fd) {
+ local->fd = fd_ref (fd);
+ if (!inode)
+ inode = fd->inode;
+ }
+
+ local->op_ret = -1;
local->op_errno = EUCLEAN;
+ local->fop = fop;
+
+ if (inode) {
+ local->layout = dht_layout_get (frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached (frame->this,
+ inode);
+ }
frame->local = local;
+out:
+ if (ret) {
+ if (local)
+ GF_FREE (local);
+ local = NULL;
+ }
return local;
}
@@ -496,3 +529,374 @@ err:
loc_wipe (child);
return -1;
}
+
+
+
+int
+dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+{
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+ if (!conf)
+ return -1;
+
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
+
+ conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
+
+ conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
+ gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
+
+ conf->last_event = GF_CALLOC (cnt, sizeof (int),
+ gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
+
+ conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
+
+ conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+
+static int
+dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->rebalance.target_op_fn (THIS, frame, op_ret);
+
+ return 0;
+}
+
+
+int
+dht_migration_complete_check_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ struct iatt stbuf = {0,};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {0,};
+ char *path = NULL;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+
+ src_node = local->cached_subvol;
+
+ /* getxattr on cached_subvol for 'linkto' value */
+ if (!local->loc.inode)
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ DHT_LINKFILE_KEY);
+ else
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ DHT_LINKFILE_KEY);
+
+ if (!ret)
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+
+ if (ret) {
+ if ((errno != ENOENT) || (!local->loc.inode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+ /* Need to do lookup on hashed subvol, then get the file */
+ ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL,
+ NULL);
+ if (ret)
+ goto out;
+ dst_node = dht_subvol_get_cached (this, local->loc.inode);
+ }
+
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the destination node",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ /* lookup on dst */
+ if (local->loc.inode) {
+ ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* update inode ctx (the layout) */
+ dht_layout_unref (this, local->layout);
+
+ if (!local->loc.inode)
+ ret = dht_layout_preset (this, dst_node, local->fd->inode);
+ else
+ ret = dht_layout_preset (this, dst_node, local->loc.inode);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: could not set preset layout for subvol %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+
+ layout = dht_layout_for_subvol (this, dst_node);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, dst_node ? dst_node->name : "<nil>");
+ ret = -1;
+ goto out;
+ }
+
+ if (!local->loc.inode)
+ ret = dht_layout_set (this, local->fd->inode, layout);
+ else
+ ret = dht_layout_set (this, local->loc.inode, layout);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set the new layout",
+ local->loc.path);
+ goto out;
+ }
+
+ local->cached_subvol = dst_node;
+ ret = 0;
+
+ if (!local->fd)
+ goto out;
+
+ /* once we detect the migration complete, the fd-ctx is no more
+ required.. delete the ctx, and do one extra 'fd_unref' for open fd */
+ ret = fd_ctx_del (local->fd, this, NULL);
+ if (!ret) {
+ fd_unref (local->fd);
+ ret = 0;
+ goto out;
+ }
+
+ /* if 'local->fd' (ie, fd based operation), send a 'open()' on
+ destination if not already done */
+ if (local->loc.inode) {
+ ret = syncop_open (dst_node, &local->loc,
+ local->fd->flags, local->fd);
+ } else {
+ tmp_loc.inode = local->fd->inode;
+ inode_path (local->fd->inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ ret = syncop_open (dst_node, &tmp_loc,
+ local->fd->flags, local->fd);
+ if (path)
+ GF_FREE (path);
+
+ }
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to send open() on target file at %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ /* need this unref for the fd on src_node */
+ fd_unref (local->fd);
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ ret = synctask_new (conf->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done,
+ frame, frame);
+ return ret;
+}
+
+/* During 'in-progress' state, both nodes should have the file */
+static int
+dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = sync_frame->local;
+
+ local->rebalance.target_op_fn (THIS, sync_frame, op_ret);
+
+ return 0;
+}
+
+static int
+dht_rebalance_inprogress_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {0,};
+ loc_t tmp_loc = {0,};
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+
+ src_node = local->cached_subvol;
+
+ /* getxattr on cached_subvol for 'linkto' value */
+ if (local->loc.inode)
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ DHT_LINKFILE_KEY);
+ else
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ DHT_LINKFILE_KEY);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr from dict",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ /* lookup on dst */
+ ret = syncop_lookup (dst_node, &local->loc, NULL,
+ &stbuf, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ if (!local->fd)
+ goto out;
+
+ if (local->loc.inode) {
+ ret = syncop_open (dst_node, &local->loc,
+ local->fd->flags, local->fd);
+ } else {
+ tmp_loc.inode = local->fd->inode;
+ inode_path (local->fd->inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ ret = syncop_open (dst_node, &tmp_loc,
+ local->fd->flags, local->fd);
+ if (path)
+ GF_FREE (path);
+ }
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to send open() on target file at %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set fd-ctx target file at %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+{
+
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ ret = synctask_new (conf->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done,
+ frame, frame);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
new file mode 100644
index 00000000000..1e9f54bdaf1
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -0,0 +1,1115 @@
+/*
+ Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_access2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_open2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
+
+int
+dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd);
+
+ return 0;
+}
+
+int
+dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = ENOENT;
+ if (op_ret)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ local->rebalance.wbflags);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int
+dht_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd, int wbflags)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.wbflags = wbflags;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ loc, flags, fd, wbflags);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int
+dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf)
+{
+ uint64_t tmp_subvol = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || (IA_ISREG (stbuf->ia_type) &&
+ ((st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type) &
+ ~S_IFMT) == DHT_LINKFILE_MODE))) {
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (ret) {
+ /* Phase 2 of migration */
+ local->rebalance.target_op_fn = dht_attr2;
+ ret = dht_rebalance_complete_check (this, frame);
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_attr2 (this, frame, 0);
+ ret = 0;
+ }
+ if (!ret)
+ goto err;
+ }
+out:
+ DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf);
+err:
+ return 0;
+}
+
+int
+dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ subvol = local->cached_subvol;
+ local->call_cnt = 2;
+
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, local->fd);
+ } else {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, &local->loc);
+ }
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int
+dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+ }
+err:
+ return 0;
+}
+
+int
+dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, loc);
+
+ return 0;
+ }
+
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->stat,
+ loc);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, fd);
+
+ return 0;
+ }
+
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->fstat,
+ fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int
+dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if ((op_ret == -1) && (op_errno != ENOENT))
+ goto out;
+
+ if ((op_ret == -1) || ((st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type) &
+ ~S_IFMT) == DHT_LINKFILE_MODE)) {
+ /* File would be migrated to other node */
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (ret) {
+ local->rebalance.target_op_fn = dht_readv2;
+ ret = dht_rebalance_complete_check (this, frame);
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2 (this, frame, 0);
+ }
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref);
+
+ return 0;
+}
+
+int
+dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size, local->rebalance.offset);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ return 0;
+}
+
+int
+dht_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_readv_cbk,
+ subvol, subvol->fops->readv,
+ fd, size, off);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* File would be migrated to other node */
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check (frame->this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (access, frame, op_ret, op_errno);
+ return 0;
+}
+
+int
+dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (access, frame, -1, op_errno);
+ return 0;
+}
+
+
+int
+dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ loc, mask);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (access, frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ local = frame->local;
+
+ local->op_errno = op_errno;
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ /* If context is set, then send flush() it to the destination */
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_flush2 (this, frame, 0);
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (flush, frame, op_ret, op_errno);
+
+ return 0;
+}
+
+int
+dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, local->fd);
+
+ return 0;
+}
+
+
+int
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (ret) {
+ local->rebalance.target_op_fn = dht_fsync2;
+
+ /* Check if the rebalance phase1 is true */
+ if (IA_ISREG (postbuf->ia_type) &&
+ (postbuf->ia_prot.sticky == 1) &&
+ (postbuf->ia_prot.sgid == 1)) {
+ ret = dht_rebalance_in_progress_check (this, frame);
+ }
+
+ /* Check if the rebalance phase2 is true */
+ if (IA_ISREG (postbuf->ia_type) &&
+ ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) &
+ ~S_IFMT) == DHT_LINKFILE_MODE)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ }
+ } else {
+ dht_fsync2 (this, frame, 0);
+ ret = 0;
+ }
+ if (!ret)
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
+ prebuf, postbuf);
+
+ return 0;
+}
+
+int
+dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags);
+
+ return 0;
+}
+
+int
+dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ fd, datasync);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+int
+dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_flock *flock)
+{
+ DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock);
+
+ return 0;
+}
+
+
+int
+dht_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct gf_flock *flock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
+ cmd, flock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+/* Symlinks are currently not migrated, so no need for any check here */
+int
+dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path, struct iatt *sbuf)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+err:
+ DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf);
+
+ return 0;
+}
+
+
+int
+dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readlink_cbk,
+ subvol, subvol->fops->readlink,
+ loc, size);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
+
+int
+dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict)
+{
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+
+int
+dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_xattrop_cbk,
+ subvol, subvol->fops->xattrop,
+ loc, flags, dict);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict)
+{
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+
+int
+dht_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ dht_fxattrop_cbk,
+ subvol, subvol->fops->fxattrop,
+ fd, flags, dict);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+dht_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_inodelk_cbk,
+ subvol, subvol->fops->inodelk,
+ volume, loc, cmd, lock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (inodelk, frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int
+dht_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ STACK_WIND (frame,
+ dht_finodelk_cbk,
+ subvol, subvol->fops->finodelk,
+ volume, fd, cmd, lock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (finodelk, frame, -1, op_errno);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
new file mode 100644
index 00000000000..21eca611740
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -0,0 +1,597 @@
+/*
+ Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+
+int
+dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ if (op_ret == -1) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_writev2;
+
+ /* Phase 2 of migration */
+ if (IA_ISREG (postbuf->ia_type) &&
+ ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) &
+ ~S_IFMT) == DHT_LINKFILE_MODE)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IA_ISREG (postbuf->ia_type) && (postbuf->ia_prot.sticky == 1) &&
+ (postbuf->ia_prot.sgid == 1)) {
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_writev2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
+
+ return 0;
+}
+
+int
+dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ local->fd, local->rebalance.vector, local->rebalance.count,
+ local->rebalance.offset, local->rebalance.iobref);
+
+ return 0;
+}
+
+int
+dht_writev (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iovec *vector, int count, off_t off,
+ struct iobref *iobref)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ local->rebalance.vector = iov_dup (vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.iobref = iobref_ref (iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ fd, vector, count, off, iobref);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int
+dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_truncate2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || (IA_ISREG (postbuf->ia_type) &&
+ ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) &
+ ~S_IFMT) == DHT_LINKFILE_MODE))) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ goto err;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IA_ISREG (postbuf->ia_type) && (postbuf->ia_prot.sticky == 1) &&
+ (postbuf->ia_prot.sgid == 1)) {
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_truncate2 (this, frame, 0);
+ goto err;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ goto err;
+ }
+
+out:
+ DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf);
+err:
+ return 0;
+}
+
+
+int
+dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset);
+ } else {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset);
+ }
+
+ return 0;
+}
+
+int
+dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->truncate,
+ loc, offset);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->ftruncate,
+ fd, offset);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+/* handle cases of migration here for 'setattr()' calls */
+int
+dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_setattr2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || (IA_ISREG (postbuf->ia_type) &&
+ ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) &
+ ~S_IFMT) == DHT_LINKFILE_MODE))) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ goto out;
+ }
+
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here */
+out:
+ DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ prebuf, postbuf);
+
+ return 0;
+}
+
+int
+dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags);
+ } else {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags);
+ }
+
+ return 0;
+}
+
+
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
+ dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr,
+ loc, stbuf, valid);
+
+ return 0;
+ }
+
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr,
+ fd, stbuf, valid);
+
+ return 0;
+ }
+
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr,
+ fd, stbuf, valid);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 1c90c61d676..2186b064a3d 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -140,13 +140,14 @@ dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
goto err;
}
- unlink_local = dht_local_init (unlink_frame);
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init (unlink_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
if (!unlink_local) {
goto err;
}
- loc_copy (&unlink_local->loc, loc);
-
STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
subvol, subvol->fops->unlink,
&unlink_local->loc);
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 60008f2bd64..7b04e8a2dc8 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -83,15 +83,6 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
break;
}
- /* do it regardless of all the above cases as we had to 'write' the
- given number of bytes */
- ret = syncop_ftruncate (to, fd, offset + size);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to perform truncate on %s", to->name);
- goto out;
- }
-
ret = size;
out:
return ret;
@@ -102,8 +93,7 @@ static inline int
__is_file_migratable (xlator_t *this, loc_t *loc, dict_t *rsp_dict,
struct iatt *stbuf)
{
- int ret = -1;
- int open_fd_count = 0;
+ int ret = -1;
if (!IA_ISREG (stbuf->ia_type)) {
gf_log (this->name, GF_LOG_WARNING,
@@ -121,14 +111,6 @@ __is_file_migratable (xlator_t *this, loc_t *loc, dict_t *rsp_dict,
goto out;
}
- ret = dict_get_int32 (rsp_dict, GLUSTERFS_OPEN_FD_COUNT, &open_fd_count);
- if (!ret && (open_fd_count > 0)) {
- /* TODO: support migration of files with open fds */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: file has open fds, not attempting migration",
- loc->path);
- goto out;
- }
ret = 0;
out:
@@ -137,11 +119,10 @@ out:
static inline int
__dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd, int *need_unlink)
+ dict_t *dict, fd_t **dst_fd)
{
xlator_t *this = NULL;
int ret = -1;
- mode_t mode = 0;
fd_t *fd = NULL;
struct iatt new_stbuf = {0,};
@@ -154,6 +135,13 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,
goto out;
}
+ ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
fd = fd_create (loc->inode, DHT_REBALANCE_PID);
if (!fd) {
gf_log (this->name, GF_LOG_ERROR,
@@ -163,40 +151,27 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,
}
ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to lookup %s on %s",
- loc->path, to->name);
-
- mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
- ret = syncop_create (to, loc, O_WRONLY, mode, fd, dict);
- if (ret < 0) {
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
gf_log (this->name, GF_LOG_ERROR,
- "failed to create %s on %s", loc->path, to->name);
+ "file %s exits in %s with different gfid",
+ loc->path, to->name);
+ fd_unref (fd);
goto out;
}
-
- *need_unlink = 1;
- goto done;
- }
-
- /* File exits in the destination, just do the open if gfid matches */
- if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "file %s exits in %s with different gfid",
- loc->path, to->name);
- fd_unref (fd);
- goto out;
}
- ret = syncop_open (to, loc, O_WRONLY, fd);
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, it will just open the file */
+ ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
+ dict);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s on %s",
- loc->path, to->name);
- fd_unref (fd);
+ "failed to create %s on %s", loc->path, to->name);
goto out;
}
-done:
+
if (dst_fd)
*dst_fd = fd;
@@ -254,16 +229,21 @@ out:
static inline int
__dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
- int hole_exists)
+ uint64_t ia_size, int hole_exists)
{
- int ret = -1;
+ int ret = 0;
int count = 0;
off_t offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
-
- while (1) {
- ret = syncop_readv (from, src, DHT_REBALANCE_BLKSIZE,
+ uint64_t total = 0;
+ size_t read_size = 0;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
+ DHT_REBALANCE_BLKSIZE : (ia_size - total));
+ ret = syncop_readv (from, src, read_size,
offset, &vector, &count, &iobref);
if (!ret || (ret < 0)) {
break;
@@ -279,6 +259,7 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
break;
}
offset += ret;
+ total += ret;
if (vector)
GF_FREE (vector);
@@ -298,6 +279,84 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
return ret;
}
+
+static inline int
+__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *stbuf, fd_t **src_fd)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ struct iatt iatt = {0,};
+
+ this = THIS;
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: fd create failed (source)", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open (from, loc, O_RDWR, fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open file %s on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ ret = -1;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)",
+ loc->path, to->name);
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr (from, loc, dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr on %s in %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set mode on %s in %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ if (src_fd)
+ *src_fd = fd;
+
+ /* success */
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
int
dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
int flag)
@@ -305,13 +364,13 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
int ret = -1;
struct iatt new_stbuf = {0,};
struct iatt stbuf = {0,};
+ struct iatt empty_iatt = {0,};
fd_t *src_fd = NULL;
fd_t *dst_fd = NULL;
dict_t *dict = NULL;
dict_t *xattr = NULL;
dict_t *rsp_dict = NULL;
int file_has_holes = 0;
- int need_unlink = 0;
gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -326,6 +385,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
"%s: failed to set fd-count key in dict, may attempt "
"migration of file which has open fds", loc->path);
+ /* Phase 1 - Data migration is in progress from now on */
ret = syncop_lookup (from, loc, dict, &stbuf, &rsp_dict, NULL);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s",
@@ -338,9 +398,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret)
goto out;
- /* create the destination */
- ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd,
- &need_unlink);
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd);
if (ret)
goto out;
@@ -351,81 +410,142 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
-
- src_fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!src_fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: fd create failed (source)", loc->path);
- ret = -1;
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s",
+ loc->path, from->name);
goto out;
}
- ret = syncop_open (from, loc, O_RDONLY, src_fd);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s on %s",
+ ret = syncop_fstat (from, src_fd, &stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s",
loc->path, from->name);
goto out;
}
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
/* All I/O happens in this function */
ret = __dht_rebalane_migrate_data (from, to, src_fd, dst_fd,
- file_has_holes);
+ stbuf.ia_size, file_has_holes);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",
loc->path);
goto out;
}
- ret = syncop_lookup (from, loc, NULL, &new_stbuf, NULL, NULL);
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get xattr from %s", loc->path, from->name);
+
+ ret = syncop_setxattr (to, loc, xattr, 0);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set xattr on %s", loc->path, to->name);
+
+ /* TODO: Sync the locks */
+
+ ret = syncop_fsync (to, dst_fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to fsync on %s", loc->path, to->name);
+
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat (from, src_fd, &new_stbuf);
if (ret < 0) {
/* Failed to get the stat info */
gf_log (this->name, GF_LOG_ERROR,
- "failed to lookup file %s on %s",
+ "failed to fstat file %s on %s",
loc->path, from->name);
- need_unlink = 0;
goto out;
}
- /* No need to rebalance, if there is some
- activity on source file */
- if (new_stbuf.ia_mtime != stbuf.ia_mtime) {
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination */
+ new_stbuf.ia_prot.sticky = 0;
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: ignoring destination file as source has "
- "undergone some changes while migration was happening",
- loc->path);
- ret = -1;
- goto out;
+ "%s: failed to perform setattr on %s",
+ loc->path, to->name);
}
+ /* Because 'futimes' is not portable */
ret = syncop_setattr (to, loc, &new_stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_MODE | GF_SET_ATTR_ATIME |
- GF_SET_ATTR_MTIME), NULL, NULL);
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
+ NULL, NULL);
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to perform setattr on %s",
loc->path, to->name);
}
- ret = syncop_listxattr (from, loc, &xattr);
- if (ret == -1)
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr (from, src_fd, &empty_iatt,
+ GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, \
+ "%s: failed to perform setattr on %s",
+ loc->path, from->name);
+ }
+
+ /* Do a stat and check the gfid before unlink */
+ ret = syncop_stat (from, loc, &empty_iatt);
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get xattr from %s", loc->path, from->name);
+ "%s: failed to do a stat on %s",
+ loc->path, from->name);
+ }
- ret = syncop_setxattr (to, loc, xattr, 0);
- if (ret == -1)
+ if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) {
+ /* take out the source from namespace */
+ ret = syncop_unlink (from, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform unlink on %s",
+ loc->path, from->name);
+ }
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate (from, src_fd, 0);
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set xattr on %s", loc->path, to->name);
+ "%s: failed to perform truncate on %s",
+ loc->path, from->name);
+ }
- /* rebalance complete */
- syncop_close (dst_fd);
- syncop_close (src_fd);
- syncop_unlink (from, loc);
- need_unlink = 0;
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_removexattr (to, loc, DHT_LINKFILE_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s",
+ loc->path, to->name);
+ }
+
+ ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to lookup the file on subvolumes",
+ loc->path);
+ }
gf_log (this->name, GF_LOG_INFO,
"completed migration of %s from subvolume %s to %s",
@@ -436,14 +556,10 @@ out:
if (dict)
dict_unref (dict);
- if (ret) {
- if (dst_fd)
- syncop_close (dst_fd);
- if (src_fd)
- syncop_close (src_fd);
- if (need_unlink)
- syncop_unlink (to, loc);
- }
+ if (dst_fd)
+ syncop_close (dst_fd);
+ if (src_fd)
+ syncop_close (src_fd);
return ret;
}
@@ -461,8 +577,8 @@ rebalance_task (void *data)
/* This function is 'synchrounous', hence if it returns,
we are done with the task */
- ret = dht_migrate_file (THIS, &local->loc, local->from_subvol,
- local->to_subvol, local->flags);
+ ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags);
return ret;
}
@@ -488,7 +604,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
dht_layout_unref (this, layout);
}
- ret = dht_layout_preset (this, local->to_subvol,
+ ret = dht_layout_preset (this, local->rebalance.target_node,
local->loc.inode);
if (ret)
gf_log (this->name, GF_LOG_WARNING,
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index bdc4dd2b678..226ce280d8b 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -487,17 +487,18 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
- link_local = dht_local_init (link_frame);
+ /* fop value sent as maxvalue because it is not used
+ anywhere in this case */
+ link_local = dht_local_init (link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
if (!link_local) {
goto err;
}
- loc_copy (&link_local->loc, &local->loc2);
if (link_local->loc.inode)
inode_unref (link_local->loc.inode);
link_local->loc.inode = inode_ref (local->loc.inode);
uuid_copy (link_local->gfid, local->loc.inode->gfid);
- link_frame->local = link_local;
dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
src_cached, dst_hashed, &link_local->loc);
@@ -813,17 +814,14 @@ dht_rename (call_frame_t *frame, xlator_t *this,
if (newloc->inode)
dst_cached = dht_subvol_get_cached (this, newloc->inode);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
if (!local) {
op_errno = ENOMEM;
goto err;
}
-
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
ret = loc_copy (&local->loc2, newloc);
if (ret == -1) {
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 6c4c1ffcdcd..87a5756546f 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -26,7 +26,7 @@
/* TODO: add NS locking */
#include "statedump.h"
-#include "dht-common.c"
+#include "dht-common.h"
/* TODO:
- use volumename in xattr instead of "dht"
@@ -419,23 +419,8 @@ struct xlator_fops fops = {
.mknod = dht_mknod,
.create = dht_create,
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
.open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
.statfs = dht_statfs,
- .lk = dht_lk,
.opendir = dht_opendir,
.readdir = dht_readdir,
.readdirp = dht_readdirp,
@@ -446,10 +431,29 @@ struct xlator_fops fops = {
.mkdir = dht_mkdir,
.rmdir = dht_rmdir,
.rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
.entrylk = dht_entrylk,
.fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+
+ /* Inode write operations */
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
.xattrop = dht_xattrop,
.fxattrop = dht_fxattrop,
.setattr = dht_setattr,
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 845c6b74ee1..9dcf224d177 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -23,7 +23,7 @@
#include "config.h"
#endif
-#include "dht-common.c"
+#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
@@ -169,21 +169,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
- goto err;
- }
-
if (xattr_req) {
local->xattr_req = dict_ref (xattr_req);
} else {
@@ -191,14 +182,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
}
hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+ cached_subvol = local->cached_subvol;
- local->cached_subvol = cached_subvol;
local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -215,7 +204,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
goto do_fresh_lookup;
}
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->call_cnt = layout->cnt;
call_cnt = local->call_cnt;
@@ -314,7 +303,6 @@ nufa_create (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -324,7 +312,7 @@ nufa_create (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -348,13 +336,6 @@ nufa_create (call_frame_t *frame, xlator_t *this,
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
local->params = dict_ref (params);
local->mode = mode;
local->flags = flags;
@@ -421,7 +402,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -431,7 +411,7 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -456,11 +436,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
if (avail_subvol != subvol) {
/* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
local->params = dict_ref (params);
local->mode = mode;
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index e6a2e5d5cb7..fd3f22ea053 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -23,7 +23,7 @@
#include "config.h"
#endif
-#include "dht-common.c"
+#include "dht-common.h"
#include "dht-mem-types.h"
#include <sys/time.h>
@@ -246,21 +246,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
conf = this->private;
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
- goto err;
- }
-
if (xattr_req) {
local->xattr_req = dict_ref (xattr_req);
} else {
@@ -268,14 +259,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
}
hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+ cached_subvol = local->cached_subvol;
- local->cached_subvol = cached_subvol;
local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -418,7 +407,6 @@ switch_create (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -428,7 +416,7 @@ switch_create (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -451,13 +439,6 @@ switch_create (call_frame_t *frame, xlator_t *this,
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
local->mode = mode;
local->flags = flags;
@@ -520,7 +501,6 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
int op_errno = -1;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -530,7 +510,7 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -554,11 +534,6 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
if (avail_subvol != subvol) {
/* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
local->params = dict_ref (params);
local->mode = mode;
diff --git a/xlators/protocol/server/src/server3_1-fops.c b/xlators/protocol/server/src/server3_1-fops.c
index 5b6bf29fc6e..e869cb48ea5 100644
--- a/xlators/protocol/server/src/server3_1-fops.c
+++ b/xlators/protocol/server/src/server3_1-fops.c
@@ -2920,13 +2920,18 @@ server_create (rpcsvc_request_t *req)
buf = NULL;
}
- state->resolve.type = RESOLVE_NOT;
state->resolve.path = gf_strdup (args.path);
state->resolve.bname = gf_strdup (args.bname);
state->mode = args.mode;
state->flags = gf_flags_to_flags (args.flags);
memcpy (state->resolve.pargfid, args.pargfid, 16);
+ if (state->flags & O_EXCL) {
+ state->resolve.type = RESOLVE_NOT;
+ } else {
+ state->resolve.type = RESOLVE_DONTCARE;
+ }
+
ret = 0;
resolve_and_resume (frame, server_create_resume);
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 30f246c3e6a..09818ce8902 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -3218,8 +3218,8 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "ftruncate failed on fd=%p: %s",
- fd, strerror (errno));
+ "ftruncate failed on fd=%p (%"PRId64": %s",
+ fd, offset, strerror (errno));
goto out;
}