diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6353 |
1 files changed, 3498 insertions, 2855 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index be9c2b64a..8f61339e6 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2009-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -27,12 +18,88 @@ #include "glusterfs.h" #include "xlator.h" +#include "libxlator.h" #include "dht-common.h" #include "defaults.h" +#include "byte-order.h" +#include "glusterfs-acl.h" #include <sys/time.h> #include <libgen.h> +int +dht_aggregate (dict_t *this, char *key, data_t *value, void *data) +{ + dict_t *dst = NULL; + int64_t *ptr = 0, *size = NULL; + int32_t ret = -1; + data_t *dict_data = NULL; + + dst = data; + + if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) { + ret = dict_get_bin (dst, key, (void **)&size); + if (ret < 0) { + size = GF_CALLOC (1, sizeof (int64_t), + gf_common_mt_char); + if (size == NULL) { + gf_log ("dht", GF_LOG_WARNING, + "memory allocation failed"); + return -1; + } + ret = dict_set_bin (dst, key, size, sizeof (int64_t)); + if (ret < 0) { + gf_log ("dht", GF_LOG_WARNING, + "dht aggregate dict set failed"); + GF_FREE (size); + return -1; + } + } + + ptr = data_to_bin (value); + if (ptr == NULL) { + gf_log ("dht", GF_LOG_WARNING, "data to bin failed"); + return -1; + } + + *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr)); + + } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) { + ret = gf_get_min_stime (THIS, dst, key, value); + if (ret < 0) + return ret; + } else { + /* compare user xattrs only */ + if (!strncmp (key, "user.", strlen ("user."))) { + ret = dict_lookup (dst, key, &dict_data); + if (!ret && dict_data && value) { + ret = is_data_equal (dict_data, value); + if (!ret) + gf_log ("dht", GF_LOG_DEBUG, + "xattr mismatch for %s", key); + } + } + ret = dict_set (dst, key, value); + if (ret) + gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed"); + } + + return 0; +} + + +void +dht_aggregate_xattr (dict_t *dst, dict_t *src) +{ + if ((dst == NULL) || (src == NULL)) { + goto out; + } + + dict_foreach (src, dht_aggregate, dst); +out: + return; +} + /* TODO: - use volumename in xattr instead of "dht" - use NS locks @@ -43,40 +110,289 @@ int dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int op_ret, int op_errno) + xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int ret = 0; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int ret = -1; - local = frame->local; - ret = op_ret; + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - dht_frame_su_undo (frame); + local = frame->local; + ret = op_ret; - if (ret == 0) { - layout = local->selfheal.layout; - ret = dht_layout_set (this, local->inode, layout); + FRAME_SU_UNDO (frame, dht_local_t); - if (local->ia_ino) { - local->stbuf.ia_ino = local->ia_ino; - } else { - gf_log (this->name, GF_LOG_DEBUG, - "could not find hashed subvolume for %s", - local->loc.path); - } + if (ret == 0) { + layout = local->selfheal.layout; + ret = dht_layout_set (this, local->inode, layout); + } - if (local->loc.parent) - local->postparent.ia_ino = local->loc.parent->ino; - } + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } - WIPE (&local->postparent); + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode, - &local->stbuf, local->xattr, &local->postparent); + DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode, + &local->stbuf, local->xattr, &local->postparent); - return 0; +out: + return ret; +} + + +int +dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) +{ + dht_local_t *local = NULL; + call_frame_t *main_frame = NULL; + int op_errno = 0; + int ret = -1; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + + local = discover_frame->local; + layout = local->layout; + conf = this->private; + + LOCK(&discover_frame->lock); + { + main_frame = local->main_frame; + local->main_frame = NULL; + } + UNLOCK(&discover_frame->lock); + + if (!main_frame) + return 0; + + if (local->file_count && local->dir_count) { + gf_log (this->name, GF_LOG_ERROR, + "path %s exists as a file on one subvolume " + "and directory on another. " + "Please fix it manually", + local->loc.path); + op_errno = EIO; + goto out; + } + + if (local->cached_subvol) { + ret = dht_layout_preset (this, local->cached_subvol, + local->inode); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set layout for subvolume %s", + local->cached_subvol ? local->cached_subvol->name : "<nil>"); + op_errno = EINVAL; + goto out; + } + } else { + ret = dht_layout_normalize (this, &local->loc, layout); + if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) { + /* either the layout is incorrect or the directory is + * not found even in one subvolume. + */ + gf_log (this->name, GF_LOG_DEBUG, + "normalizing failed on %s " + "(overlaps/holes present: %s, " + "ENOENT errors: %d)", local->loc.path, + (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0); + if ((ret > 0) && (ret == conf->subvolume_cnt)) { + op_errno = ESTALE; + goto out; + } + } + + if (local->inode) + dht_layout_set (this, local->inode, layout); + } + + DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + return 0; +out: + DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL, + NULL); + + return ret; +} + + +int +dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; + int is_linkfile = 0; + int attempt_unwind = 0; + dht_conf_t *conf = 0; + + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", this->private, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + + local = frame->local; + prev = cookie; + conf = this->private; + + layout = local->layout; + + /* Check if the gfid is different for file from other node */ + if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid different on %s", + local->loc.path, prev->this->name); + } + + + LOCK (&frame->lock); + { + /* TODO: assert equal mode on stbuf->st_mode and + local->stbuf->st_mode + + else mkdir/chmod/chown and fix + */ + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, xattr); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to merge layouts", local->loc.path); + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "lookup of %s on %s returned error (%s)", + local->loc.path, prev->this->name, + strerror (op_errno)); + + goto unlock; + } + + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); + is_dir = check_is_dir (inode, stbuf, xattr); + + if (is_dir) { + local->dir_count ++; + } else { + local->file_count ++; + + if (!is_linkfile) { + /* real file */ + local->cached_subvol = prev->this; + attempt_unwind = 1; + } else { + goto unlock; + } + } + + local->op_ret = 0; + + if (local->xattr == NULL) { + local->xattr = dict_ref (xattr); + } else { + dht_aggregate_xattr (local->xattr, xattr); + } + + if (local->inode == NULL) + local->inode = inode_ref (inode); + + dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); + dht_iatt_merge (this, &local->postparent, postparent, + prev->this); + } +unlock: + UNLOCK (&frame->lock); +out: + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt) || attempt_unwind) { + dht_discover_complete (this, frame); + } + + if (is_last_call (this_call_cnt)) + DHT_STACK_DESTROY (frame); + + return 0; +} + + +int +dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + int ret; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int call_cnt = 0; + int op_errno = EINVAL; + int i = 0; + call_frame_t *discover_frame = NULL; + + conf = this->private; + local = frame->local; + + ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set '%s' key", + loc->path, conf->xattr_name); + + ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set '%s' key", + loc->path, conf->link_xattr_name); + + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } + + uuid_copy (local->gfid, loc->gfid); + + discover_frame = copy_frame (frame); + if (!discover_frame) { + op_errno = ENOMEM; + goto err; + } + + discover_frame->local = local; + frame->local = NULL; + local->main_frame = frame; + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (discover_frame, dht_discover_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + + return 0; + +err: + DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, + NULL); + + return 0; } @@ -86,65 +402,77 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *postparent) { - dht_conf_t *conf = NULL; dht_local_t *local = NULL; int this_call_cnt = 0; call_frame_t *prev = NULL; - dht_layout_t *layout = NULL; - int ret = 0; - int is_dir = 0; + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; + + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", this->private, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); - conf = this->private; local = frame->local; prev = cookie; - layout = local->layout; + layout = local->layout; + + if (!op_ret && uuid_is_null (local->gfid)) + memcpy (local->gfid, stbuf->ia_gfid, 16); + + /* Check if the gfid is different for file from other node */ + if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid different on %s", + local->loc.path, prev->this->name); + } LOCK (&frame->lock); { /* TODO: assert equal mode on stbuf->st_mode and - local->stbuf->st_mode + local->stbuf->st_mode - else mkdir/chmod/chown and fix - */ - ret = dht_layout_merge (this, layout, prev->this, - op_ret, op_errno, xattr); + else mkdir/chmod/chown and fix + */ + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, xattr); - if (op_ret == -1) { - local->op_errno = ENOENT; - gf_log (this->name, GF_LOG_DEBUG, - "lookup of %s on %s returned error (%s)", - local->loc.path, prev->this->name, - strerror (op_errno)); + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "lookup of %s on %s returned error (%s)", + local->loc.path, prev->this->name, + strerror (op_errno)); - goto unlock; - } + goto unlock; + } - is_dir = check_is_dir (inode, stbuf, xattr); - if (!is_dir) { + is_dir = check_is_dir (inode, stbuf, xattr); + if (!is_dir) { gf_log (this->name, GF_LOG_DEBUG, "lookup of %s on %s returned non dir 0%o", local->loc.path, prev->this->name, stbuf->ia_type); local->need_selfheal = 1; - goto unlock; + goto unlock; } - local->op_ret = 0; - if (local->xattr == NULL) - local->xattr = dict_ref (xattr); - if (local->inode == NULL) - local->inode = inode_ref (inode); + local->op_ret = 0; + if (local->xattr == NULL) { + local->xattr = dict_ref (xattr); + } else { + dht_aggregate_xattr (local->xattr, xattr); + } - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); + if (local->inode == NULL) + local->inode = inode_ref (inode); + + dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); dht_iatt_merge (this, &local->postparent, postparent, prev->this); - - if (prev->this == dht_first_up_subvol (this)) { - local->ia_ino = local->stbuf.ia_ino; - local->ia_gen = local->stbuf.ia_gen; - } - } unlock: UNLOCK (&frame->lock); @@ -159,45 +487,39 @@ unlock: return 0; } - if (local->op_ret == 0) { - ret = dht_layout_normalize (this, &local->loc, layout); + if (local->op_ret == 0) { + ret = dht_layout_normalize (this, &local->loc, layout); - if (ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "fixing assignment on %s", - local->loc.path); - goto selfheal; - } - - dht_layout_set (this, local->inode, layout); - - if (local->ia_ino) { - local->stbuf.ia_ino = local->ia_ino; - local->stbuf.ia_gen = local->ia_gen; - } else { - gf_log (this->name, GF_LOG_DEBUG, - "could not find hashed subvol for %s", - local->loc.path); - } + if (ret != 0) { + gf_log (this->name, GF_LOG_DEBUG, + "fixing assignment on %s", + local->loc.path); + goto selfheal; + } - if (local->loc.parent) - local->postparent.ia_ino = - local->loc.parent->ino; - } + dht_layout_set (this, local->inode, layout); + } + + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } - DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, &local->postparent); } - return 0; + return 0; selfheal: - dht_frame_su_do (frame); - ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk, - &local->loc, layout); - - return 0; + FRAME_SU_DO (frame, dht_local_t); + uuid_copy (local->loc.gfid, local->gfid); + ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk, + &local->loc, layout); +out: + return ret; } int @@ -209,138 +531,224 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; int this_call_cnt = 0; call_frame_t *prev = NULL; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - int is_dir = 0; - int is_linkfile = 0; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int is_dir = 0; + int is_linkfile = 0; + call_frame_t *copy = NULL; + dht_local_t *copy_local = NULL; + + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, err); + GF_VALIDATE_OR_GOTO ("dht", frame->local, err); + GF_VALIDATE_OR_GOTO ("dht", cookie, err); local = frame->local; prev = cookie; - conf = this->private; + conf = this->private; + if (!conf) + goto out; LOCK (&frame->lock); { - if (op_ret == -1) { - local->op_errno = op_errno; + if (op_ret == -1) { + local->op_errno = op_errno; - if ((op_errno != ENOTCONN) + if ((op_errno != ENOTCONN) && (op_errno != ENOENT) && (op_errno != ESTALE)) { - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); + gf_log (this->name, GF_LOG_INFO, + "subvolume %s for %s returned -1 (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); } - if (op_errno == ESTALE) { - /* propogate the ESTALE to parent. - * setting local->layout_mismatch would send + /* propagate the ESTALE to parent. + * setting local->return_estale would send * ESTALE to parent. */ - local->layout_mismatch = 1; + local->return_estale = 1; } - goto unlock; - } + /* if it is ENOENT, we may have to do a + * 'lookup_everywhere()' to make sure + * the file is not migrated */ + if (op_errno == ENOENT) { + if (IA_ISREG (local->loc.inode->ia_type)) { + local->need_lookup_everywhere = 1; + } + } + goto unlock; + } - if (stbuf->ia_type != local->inode->ia_type) { - gf_log (this->name, GF_LOG_DEBUG, - "mismatching filetypes 0%o v/s 0%o for %s", - (stbuf->ia_type), (local->inode->ia_type), - local->loc.path); + if (stbuf->ia_type != local->inode->ia_type) { + gf_log (this->name, GF_LOG_INFO, + "mismatching filetypes 0%o v/s 0%o for %s", + (stbuf->ia_type), (local->inode->ia_type), + local->loc.path); - local->op_ret = -1; - local->op_errno = EINVAL; + local->op_ret = -1; + local->op_errno = EINVAL; - goto unlock; - } + goto unlock; + } - layout = local->layout; - - is_dir = check_is_dir (inode, stbuf, xattr); - is_linkfile = check_is_linkfile (inode, stbuf, xattr); - - if (is_linkfile) { - gf_log (this->name, GF_LOG_DEBUG, - "linkfile found in revalidate for %s", - local->loc.path); - local->layout_mismatch = 1; - - goto unlock; - } + layout = local->layout; - if (is_dir) { - ret = dht_layout_dir_mismatch (this, layout, - prev->this, &local->loc, - xattr); - if (ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "mismatching layouts for %s", - local->loc.path); - - local->layout_mismatch = 1; - - goto unlock; - } - } - - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); + is_dir = check_is_dir (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); + + if (is_linkfile) { + gf_log (this->name, GF_LOG_INFO, + "linkfile found in revalidate for %s", + local->loc.path); + local->return_estale = 1; + + goto unlock; + } + + if (is_dir) { + ret = dht_dir_has_layout (xattr, conf->xattr_name); + if (ret >= 0) { + if (is_greater_time(local->stbuf.ia_ctime, + local->stbuf.ia_ctime_nsec, + stbuf->ia_ctime, + stbuf->ia_ctime_nsec)) { + local->prebuf.ia_gid = stbuf->ia_gid; + local->prebuf.ia_uid = stbuf->ia_uid; + } + } + if (local->stbuf.ia_type != IA_INVAL) + { + if ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid)) { + local->need_selfheal = 1; + } + } + ret = dht_layout_dir_mismatch (this, layout, + prev->this, &local->loc, + xattr); + if (ret != 0) { + gf_log (this->name, GF_LOG_INFO, + "mismatching layouts for %s", + local->loc.path); + + local->layout_mismatch = 1; + + goto unlock; + } + } + + dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); dht_iatt_merge (this, &local->postparent, postparent, prev->this); - - local->op_ret = 0; - local->stbuf.ia_ino = local->ia_ino; - local->stbuf.ia_gen = local->loc.inode->generation; - if (local->loc.parent) - local->postparent.ia_ino = local->loc.parent->ino; + local->op_ret = 0; - if (!local->xattr) - local->xattr = dict_ref (xattr); - } + if (!local->xattr) { + local->xattr = dict_ref (xattr); + } else if (is_dir) { + dht_aggregate_xattr (local->xattr, xattr); + } + } unlock: - UNLOCK (&frame->lock); - + UNLOCK (&frame->lock); +out: this_call_cnt = dht_frame_return (frame); if (is_last_call (this_call_cnt)) { - if (!IA_ISDIR (local->stbuf.ia_type) - && (local->hashed_subvol != local->cached_subvol) - && (local->stbuf.ia_nlink == 1) - && (conf->unhashed_sticky_bit)) { - local->stbuf.ia_prot.sticky = 1; - } - + if (!IA_ISDIR (local->stbuf.ia_type) + && (local->hashed_subvol != local->cached_subvol) + && (local->stbuf.ia_nlink == 1) + && (conf && conf->unhashed_sticky_bit)) { + local->stbuf.ia_prot.sticky = 1; + } + if (local->need_selfheal) { + local->need_selfheal = 0; + uuid_copy (local->gfid, local->stbuf.ia_gfid); + local->stbuf.ia_gid = local->prebuf.ia_gid; + local->stbuf.ia_uid = local->prebuf.ia_uid; + copy = create_frame (this, this->ctx->pool); + if (copy) { + copy_local = dht_local_init (copy, &local->loc, + NULL, 0); + if (!copy_local) + goto cont; + copy_local->stbuf = local->stbuf; + copy->local = copy_local; + FRAME_SU_DO (copy, dht_local_t); + ret = synctask_new (this->ctx->env, + dht_dir_attr_heal, + dht_dir_attr_heal_done, + copy, copy); + } + } +cont: if (local->layout_mismatch) { - local->op_ret = -1; - local->op_errno = ESTALE; - } + /* Found layout mismatch in the directory, need to + fix this in the inode context */ + dht_layout_unref (this, local->layout); + local->layout = NULL; + dht_lookup_directory (frame, this, &local->loc); + return 0; + } - WIPE (&local->postparent); + if (local->need_lookup_everywhere) { + /* As the current layout gave ENOENT error, we would + need a new layout */ + dht_layout_unref (this, local->layout); + local->layout = NULL; + + /* We know that current cached subvol is no more + valid, get the new one */ + local->cached_subvol = NULL; + dht_lookup_everywhere (frame, this, &local->loc); + return 0; + } + if (local->return_estale) { + local->op_ret = -1; + local->op_errno = ESTALE; + } + + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } - DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, &local->postparent); - } + } - return 0; +err: + return ret; } int dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, + xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent) + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; int ret = -1; - local = frame->local; - cached_subvol = local->cached_subvol; - conf = this->private; + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", this->private, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + + local = frame->local; + cached_subvol = local->cached_subvol; + conf = this->private; ret = dht_layout_preset (this, local->cached_subvol, inode); if (ret < 0) { @@ -352,72 +760,232 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, goto unwind; } - local->op_ret = 0; - if ((local->stbuf.ia_nlink == 1) - && (conf->unhashed_sticky_bit)) { - local->stbuf.ia_prot.sticky = 1; - } + local->op_ret = 0; + if ((local->stbuf.ia_nlink == 1) + && (conf && conf->unhashed_sticky_bit)) { + local->stbuf.ia_prot.sticky = 1; + } - if (local->loc.parent) - local->postparent.ia_ino = local->loc.parent->ino; + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } unwind: - WIPE (&local->postparent); + if (local->linked == _gf_true) + dht_linkfile_attr_heal (frame, this); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, &local->postparent); - return 0; +out: + return ret; +} + + +int +dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + dht_local_t *local = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + dht_layout_t *layout = NULL; + + local = frame->local; + hashed_subvol = local->hashed_subvol; + cached_subvol = local->cached_subvol; + + if (local->file_count && local->dir_count) { + gf_log (this->name, GF_LOG_ERROR, + "path %s exists as a file on one subvolume " + "and directory on another. " + "Please fix it manually", + local->loc.path); + DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL, + NULL); + return 0; + } + + if (local->dir_count) { + dht_lookup_directory (frame, this, &local->loc); + return 0; + } + + if (!cached_subvol) { + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL, + NULL); + return 0; + } + + if (local->need_lookup_everywhere) { + if (uuid_compare (local->gfid, local->inode->gfid)) { + /* GFID different, return error */ + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, + NULL, NULL, NULL); + return 0; + } + local->op_ret = 0; + local->op_errno = 0; + layout = dht_layout_for_subvol (this, cached_subvol); + if (!layout) { + gf_log (this->name, GF_LOG_INFO, + "%s: no pre-set layout for subvolume %s", + local->loc.path, (cached_subvol ? + cached_subvol->name : + "<nil>")); + } + + ret = dht_layout_set (this, local->inode, layout); + if (ret < 0) { + gf_log (this->name, GF_LOG_INFO, + "%s: failed to set layout for subvol %s", + local->loc.path, (cached_subvol ? + cached_subvol->name : + "<nil>")); + } + + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } + + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, + local->op_errno, local->inode, + &local->stbuf, local->xattr, + &local->postparent); + return 0; + } + + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_INFO, + "cannot create linkfile file for %s on %s: " + "hashed subvolume cannot be found.", + local->loc.path, cached_subvol->name); + + local->op_ret = 0; + local->op_errno = 0; + + ret = dht_layout_preset (frame->this, cached_subvol, + local->inode); + if (ret < 0) { + gf_log (this->name, GF_LOG_INFO, + "failed to set layout for subvol %s", + cached_subvol ? cached_subvol->name : + "<nil>"); + local->op_ret = -1; + local->op_errno = EINVAL; + } + + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } + + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, + local->op_errno, local->inode, + &local->stbuf, local->xattr, + &local->postparent); + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "linking file %s existing on %s to %s (hash)", + local->loc.path, cached_subvol->name, + hashed_subvol->name); + + ret = dht_linkfile_create (frame, + dht_lookup_linkfile_create_cbk, this, + cached_subvol, hashed_subvol, &local->loc); + + return ret; +} + + +int +dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int this_call_cnt = 0; + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + dht_lookup_everywhere_done (frame, this); + } + + return 0; } int dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, + int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xattr, struct iatt *postparent) { - dht_conf_t *conf = NULL; dht_local_t *local = NULL; int this_call_cnt = 0; call_frame_t *prev = NULL; - int is_linkfile = 0; - int is_dir = 0; - xlator_t *subvol = NULL; - loc_t *loc = NULL; - xlator_t *link_subvol = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *cached_subvol = NULL; - int ret = -1; + int is_linkfile = 0; + int is_dir = 0; + xlator_t *subvol = NULL; + loc_t *loc = NULL; + xlator_t *link_subvol = NULL; + int ret = -1; + int32_t fd_count = 0; + dht_conf_t *conf = NULL; - conf = this->private; + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + GF_VALIDATE_OR_GOTO ("dht", this->private, out); - local = frame->local; - loc = &local->loc; + local = frame->local; + loc = &local->loc; + conf = this->private; - prev = cookie; - subvol = prev->this; + prev = cookie; + subvol = prev->this; - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno != ENOENT) - local->op_errno = op_errno; - goto unlock; - } + LOCK (&frame->lock); + { + if (op_ret == -1) { + if (op_errno != ENOENT) + local->op_errno = op_errno; + goto unlock; + } - is_linkfile = check_is_linkfile (inode, buf, xattr); - is_dir = check_is_dir (inode, buf, xattr); - - if (is_linkfile) { - link_subvol = dht_linkfile_subvol (this, inode, buf, - xattr); - gf_log (this->name, GF_LOG_DEBUG, - "found on %s linkfile %s (-> %s)", - subvol->name, loc->path, - link_subvol ? link_subvol->name : "''"); - goto unlock; - } + if (uuid_is_null (local->gfid)) + uuid_copy (local->gfid, buf->ia_gfid); + + if (uuid_compare (local->gfid, buf->ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid differs on subvolume %s", + loc->path, prev->this->name); + } + + is_linkfile = check_is_linkfile (inode, buf, xattr, + conf->link_xattr_name); + is_dir = check_is_dir (inode, buf, xattr); + + if (is_linkfile) { + link_subvol = dht_linkfile_subvol (this, inode, buf, + xattr); + gf_log (this->name, GF_LOG_DEBUG, + "found on %s linkfile %s (-> %s)", + subvol->name, loc->path, + link_subvol ? link_subvol->name : "''"); + goto unlock; + } + + /* non linkfile GFID takes precedence */ + uuid_copy (local->gfid, buf->ia_gfid); if (is_dir) { local->dir_count++; @@ -437,126 +1005,82 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_log (this->name, GF_LOG_DEBUG, "found on %s file %s", subvol->name, loc->path); - + dht_iatt_merge (this, &local->postparent, postparent, subvol); } else { - gf_log (this->name, GF_LOG_DEBUG, + /* This is where we need 'rename' both entries logic */ + gf_log (this->name, GF_LOG_WARNING, "multiple subvolumes (%s and %s) have " - "file %s", local->cached_subvol->name, + "file %s (preferably rename the file " + "in the backend, and do a fresh lookup)", + local->cached_subvol->name, subvol->name, local->loc.path); } } - } + } unlock: - UNLOCK (&frame->lock); - - if (is_linkfile) { - gf_log (this->name, GF_LOG_DEBUG, - "deleting stale linkfile %s on %s", - loc->path, subvol->name); - dht_linkfile_unlink (frame, this, subvol, loc); - } - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - hashed_subvol = local->hashed_subvol; - cached_subvol = local->cached_subvol; - - if (local->file_count && local->dir_count) { - gf_log (this->name, GF_LOG_ERROR, - "path %s exists as a file on one subvolume " - "and directory on another. " - "Please fix it manually", - loc->path); - DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL, - NULL); - return 0; - } - - if (local->dir_count) { - dht_lookup_directory (frame, this, &local->loc); - return 0; - } - - if (!cached_subvol) { - DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL, - NULL); - return 0; - } - - if (!hashed_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "cannot create linkfile file for %s on %s: " - "hashed subvolume cannot be found.", - loc->path, cached_subvol->name); - - local->op_ret = 0; - local->op_errno = 0; - - ret = dht_layout_preset (frame->this, cached_subvol, - local->inode); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "failed to set layout for subvol %s", - cached_subvol ? cached_subvol->name : - "<nil>"); - local->op_ret = -1; - local->op_errno = EINVAL; - } - - if (local->loc.parent) - local->postparent.ia_ino = - local->loc.parent->ino; - - WIPE (&local->postparent); + UNLOCK (&frame->lock); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, - local->op_errno, local->inode, - &local->stbuf, local->xattr, - &local->postparent); + if (is_linkfile) { + ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count); + /* Delete the linkfile only if there are no open fds on it. + if there is a open-fd, it may be in migration */ + if (!ret && (fd_count == 0)) { + gf_log (this->name, GF_LOG_INFO, + "deleting stale linkfile %s on %s", + loc->path, subvol->name); + STACK_WIND (frame, dht_lookup_unlink_cbk, + subvol, subvol->fops->unlink, loc, 0, NULL); return 0; } + } - gf_log (this->name, GF_LOG_DEBUG, - "linking file %s existing on %s to %s (hash)", - loc->path, cached_subvol->name, - hashed_subvol->name); - - dht_linkfile_create (frame, - dht_lookup_linkfile_create_cbk, - cached_subvol, hashed_subvol, loc); - } + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + dht_lookup_everywhere_done (frame, this); + } - return 0; +out: + return ret; } int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc) { - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int i = 0; - int call_cnt = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int i = 0; + int call_cnt = 0; - conf = this->private; - local = frame->local; + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", this->private, out); + GF_VALIDATE_OR_GOTO ("dht", loc, out); - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; + conf = this->private; + local = frame->local; - if (!local->inode) - local->inode = inode_ref (loc->inode); + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - for (i = 0; i < call_cnt; i++) { - STACK_WIND (frame, dht_lookup_everywhere_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - loc, local->xattr_req); - } + if (!local->inode) + local->inode = inode_ref (loc->inode); - return 0; + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_everywhere_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + loc, local->xattr_req); + } + + return 0; +out: + DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); +err: + return -1; } @@ -567,60 +1091,82 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, struct iatt *postparent) { call_frame_t *prev = NULL; - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - loc_t *loc = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + loc_t *loc = NULL; + dht_conf_t *conf = NULL; int ret = 0; + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, unwind); + GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind); + GF_VALIDATE_OR_GOTO ("dht", this->private, unwind); + GF_VALIDATE_OR_GOTO ("dht", cookie, unwind); + prev = cookie; - subvol = prev->this; - conf = this->private; - local = frame->local; - loc = &local->loc; + subvol = prev->this; + conf = this->private; + local = frame->local; + loc = &local->loc; if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "lookup of %s on %s (following linkfile) failed (%s)", - local->loc.path, subvol->name, strerror (op_errno)); - goto err; - } + gf_log (this->name, GF_LOG_INFO, + "lookup of %s on %s (following linkfile) failed (%s)", + local->loc.path, subvol->name, strerror (op_errno)); + + /* If cached subvol returned ENOTCONN, do not do + lookup_everywhere. We need to make sure linkfile does not get + removed, which can take away the namespace, and subvol is + anyways down. */ + + if (op_errno != ENOTCONN) + goto err; + else + goto unwind; + } if (check_is_dir (inode, stbuf, xattr)) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "lookup of %s on %s (following linkfile) reached dir", local->loc.path, subvol->name); goto err; } - if (check_is_linkfile (inode, stbuf, xattr)) { - gf_log (this->name, GF_LOG_DEBUG, + if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { + gf_log (this->name, GF_LOG_INFO, "lookup of %s on %s (following linkfile) reached link", local->loc.path, subvol->name); goto err; } - if ((stbuf->ia_nlink == 1) - && (conf->unhashed_sticky_bit)) { - stbuf->ia_prot.sticky = 1; - } - dht_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino); - if (local->loc.parent) - postparent->ia_ino = local->loc.parent->ino; - - ret = dht_layout_preset (this, prev->this, inode); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + if (uuid_compare (local->gfid, stbuf->ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid different on data file on %s", + local->loc.path, subvol->name); + goto err; + } + + if ((stbuf->ia_nlink == 1) + && (conf && conf->unhashed_sticky_bit)) { + stbuf->ia_prot.sticky = 1; + } + + ret = dht_layout_preset (this, prev->this, inode); + if (ret < 0) { + gf_log (this->name, GF_LOG_INFO, "failed to set layout for subvolume %s", prev->this->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + op_ret = -1; + op_errno = EINVAL; + } -out: - WIPE (postparent); + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } +unwind: + DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr, postparent); @@ -628,7 +1174,7 @@ out: err: dht_lookup_everywhere (frame, this, loc); - +out: return 0; } @@ -640,21 +1186,38 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc) int i = 0; dht_conf_t *conf = NULL; dht_local_t *local = NULL; + int ret = 0; + + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, unwind); + GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind); + GF_VALIDATE_OR_GOTO ("dht", this->private, unwind); + GF_VALIDATE_OR_GOTO ("dht", loc, unwind); conf = this->private; local = frame->local; call_cnt = conf->subvolume_cnt; local->call_cnt = call_cnt; - + local->layout = dht_layout_new (this, conf->subvolume_cnt); if (!local->layout) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); - return 0; + goto unwind; } - + + if (local->xattr != NULL) { + dict_unref (local->xattr); + local->xattr = NULL; + } + + if (!uuid_is_null (local->gfid)) { + ret = dict_set_static_bin (local->xattr_req, "gfid-req", + local->gfid, 16); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set gfid", local->loc.path); + } + for (i = 0; i < call_cnt; i++) { STACK_WIND (frame, dht_lookup_dir_cbk, conf->subvolumes[i], @@ -662,6 +1225,11 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc) &local->loc, local->xattr_req); } return 0; +unwind: + DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); +out: + return 0; + } @@ -679,105 +1247,154 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc_t *loc = NULL; call_frame_t *prev = NULL; int ret = 0; - uint64_t tmp_layout = 0; dht_layout_t *parent_layout = NULL; + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + GF_VALIDATE_OR_GOTO ("dht", this->private, out); + conf = this->private; prev = cookie; local = frame->local; loc = &local->loc; - if (ENTRY_MISSING (op_ret, op_errno)) { + /* This is required for handling stale linkfile deletion, + * or any more call which happens from this 'loc'. + */ + if (!op_ret && uuid_is_null (local->gfid)) + memcpy (local->gfid, stbuf->ia_gfid, 16); + + if (ENTRY_MISSING (op_ret, op_errno)) { + gf_log (this->name, GF_LOG_TRACE, "Entry %s missing on subvol" + " %s", loc->path, prev->this->name); if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) { - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, loc); - return 0; - } + local->op_errno = ENOENT; + dht_lookup_everywhere (frame, this, loc); + return 0; + } if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) && (loc->parent)) { - ret = inode_ctx_get (loc->parent, this, &tmp_layout); - parent_layout = (dht_layout_t *)(long)tmp_layout; + ret = dht_inode_ctx_layout_get (loc->parent, this, + &parent_layout); + if (ret || !parent_layout) + goto out; if (parent_layout->search_unhashed) { local->op_errno = ENOENT; dht_lookup_everywhere (frame, this, loc); return 0; } } - } + } - if (op_ret == 0) { - is_dir = check_is_dir (inode, stbuf, xattr); - if (is_dir) { - local->inode = inode_ref (inode); - local->xattr = dict_ref (xattr); - } - } + if (op_ret == 0) { + is_dir = check_is_dir (inode, stbuf, xattr); + if (is_dir) { + local->inode = inode_ref (inode); + local->xattr = dict_ref (xattr); + } + } - if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { + if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { dht_lookup_directory (frame, this, &local->loc); return 0; - } - - if (op_ret == -1) + } + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "Lookup of %s for subvolume" + " %s failed with error %s", loc->path, prev->this->name, + strerror (op_errno)); goto out; + } - is_linkfile = check_is_linkfile (inode, stbuf, xattr); - is_dir = check_is_dir (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); - if (!is_dir && !is_linkfile) { + if (!is_linkfile) { /* non-directory and not a linkfile */ - dht_itransform (this, prev->this, stbuf->ia_ino, - &stbuf->ia_ino); - if (loc->parent) - postparent->ia_ino = loc->parent->ino; - - ret = dht_layout_preset (this, prev->this, inode); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "could not set pre-set layout for subvolume %s", - prev->this->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - goto out; - } - - if (is_linkfile) { - subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); - - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "linkfile not having link subvolume. path=%s", - loc->path); - dht_lookup_everywhere (frame, this, loc); - return 0; + ret = dht_layout_preset (this, prev->this, inode); + if (ret < 0) { + gf_log (this->name, GF_LOG_INFO, + "could not set pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; } + goto out; + } - STACK_WIND (frame, dht_lookup_linkfile_cbk, - subvol, subvol->fops->lookup, - &local->loc, local->xattr_req); + subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "linkfile not having link subvolume. path=%s", + loc->path); + dht_lookup_everywhere (frame, this, loc); + return 0; } + STACK_WIND (frame, dht_lookup_linkfile_cbk, + subvol, subvol->fops->lookup, + &local->loc, local->xattr_req); + return 0; out: - /* - * FIXME: postparent->ia_size and postparent->st_blocks do not have - * correct values. since, postparent corresponds to a directory these + /* + * FIXME: postparent->ia_size and postparent->st_blocks do not have + * correct values. since, postparent corresponds to a directory these * two members should have values equal to sum of corresponding values * from each of the subvolume. See dht_iatt_merge for reference. */ - WIPE (postparent); + if (!op_ret && local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } + DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr, postparent); +err: return 0; } +/* For directories, check if acl xattrs have been requested (by the acl xlator), + * if not, request for them. These xattrs are needed for dht dir self-heal to + * perform proper self-healing of dirs + */ +void +dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req) +{ + int ret = 0; + + GF_ASSERT (inode); + GF_ASSERT (xattr_req); + + if (inode->ia_type != IA_IFDIR) + return; + + if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) { + ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set key %s", + POSIX_ACL_ACCESS_XATTR); + } + + if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) { + ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set key %s", + POSIX_ACL_DEFAULT_XATTR); + } + + return; +} int dht_lookup (call_frame_t *frame, xlator_t *this, @@ -785,56 +1402,68 @@ dht_lookup (call_frame_t *frame, xlator_t *this, { xlator_t *subvol = NULL; xlator_t *hashed_subvol = NULL; - xlator_t *cached_subvol = NULL; dht_local_t *local = NULL; - dht_conf_t *conf = NULL; + dht_conf_t *conf = NULL; int ret = -1; int op_errno = -1; - dht_layout_t *layout = NULL; - int i = 0; - int call_cnt = 0; - + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + loc_t new_loc = {0,}; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - conf = this->private; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + conf = this->private; + if (!conf) + goto err; - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "copying location failed for path=%s", - loc->path); + local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); + if (!local) { + op_errno = ENOMEM; goto err; } - - if (xattr_req) { - local->xattr_req = dict_ref (xattr_req); - } else { - local->xattr_req = dict_new (); - } - hashed_subvol = dht_subvol_get_hashed (this, loc); - cached_subvol = dht_subvol_get_cached (this, loc->inode); + ret = dht_filter_loc_subvol_key (this, loc, &new_loc, + &hashed_subvol); + if (ret) { + loc_wipe (&local->loc); + ret = loc_dup (&new_loc, &local->loc); - local->cached_subvol = cached_subvol; - local->hashed_subvol = hashed_subvol; + /* we no more need 'new_loc' entries */ + loc_wipe (&new_loc); - if (is_revalidate (loc)) { - local->layout = layout = dht_layout_get (this, loc->inode); + /* check if loc_dup() is successful */ + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_DEBUG, + "copying location failed for path=%s", + loc->path); + goto err; + } + } + + if (xattr_req) { + local->xattr_req = dict_ref (xattr_req); + } else { + local->xattr_req = dict_new (); + } + + if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) && + !__is_root_gfid (loc->inode->gfid)) { + local->cached_subvol = NULL; + dht_discover (frame, this, loc); + return 0; + } + if (!hashed_subvol) + hashed_subvol = dht_subvol_get_hashed (this, loc); + local->hashed_subvol = hashed_subvol; + + if (is_revalidate (loc)) { + layout = local->layout; if (!layout) { gf_log (this->name, GF_LOG_DEBUG, "revalidate without cache. path=%s", @@ -843,71 +1472,93 @@ dht_lookup (call_frame_t *frame, xlator_t *this, goto err; } - if (layout->gen && (layout->gen < conf->gen)) { - gf_log (this->name, GF_LOG_TRACE, - "incomplete layout failure for path=%s", - loc->path); + if (layout->gen && (layout->gen < conf->gen)) { + gf_log (this->name, GF_LOG_TRACE, + "incomplete layout failure for path=%s", + loc->path); dht_layout_unref (this, local->layout); local->layout = NULL; - goto do_fresh_lookup; - } + local->cached_subvol = NULL; + goto do_fresh_lookup; + } - local->inode = inode_ref (loc->inode); - local->ia_ino = loc->inode->ino; - - local->call_cnt = layout->cnt; - call_cnt = local->call_cnt; - - /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, - * revalidates directly go to the cached-subvolume. - */ - ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); - - for (i = 0; i < layout->cnt; i++) { + local->inode = inode_ref (loc->inode); + + /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, + * revalidates directly go to the cached-subvolume. + */ + ret = dict_set_uint32 (local->xattr_req, + conf->xattr_name, 4 * 4); + + if (IA_ISDIR (local->inode->ia_type)) { + local->call_cnt = call_cnt = conf->subvolume_cnt; + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_revalidate_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + loc, local->xattr_req); + } + return 0; + } + + call_cnt = local->call_cnt = layout->cnt; + + /* need it for self-healing linkfiles which is + 'in-migration' state */ + ret = dict_set_uint32 (local->xattr_req, + GLUSTERFS_OPEN_FD_COUNT, 4); + + /* need it for dir self-heal */ + dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req); + + for (i = 0; i < call_cnt; i++) { subvol = layout->list[i].xlator; - + STACK_WIND (frame, dht_revalidate_cbk, subvol, subvol->fops->lookup, - loc, local->xattr_req); + &local->loc, local->xattr_req); - if (!--call_cnt) - break; } } else { do_fresh_lookup: - /* TODO: remove the hard-coding */ - ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); + /* TODO: remove the hard-coding */ + ret = dict_set_uint32 (local->xattr_req, + conf->xattr_name, 4 * 4); + + ret = dict_set_uint32 (local->xattr_req, + conf->link_xattr_name, 256); - ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht.linkto", 256); + /* need it for self-healing linkfiles which is + 'in-migration' state */ + ret = dict_set_uint32 (local->xattr_req, + GLUSTERFS_OPEN_FD_COUNT, 4); + + /* need it for dir self-heal */ + dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req); if (!hashed_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume in layout for path=%s, " - "checking on all the subvols to see if " - "it is a directory", loc->path); - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; - - local->layout = dht_layout_new (this, + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume in layout for path=%s, " + "checking on all the subvols to see if " + "it is a directory", loc->path); + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); - if (!local->layout) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - for (i = 0; i < call_cnt; i++) { - STACK_WIND (frame, dht_lookup_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } - return 0; + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + return 0; } STACK_WIND (frame, dht_lookup_cbk, @@ -918,335 +1569,52 @@ dht_lookup (call_frame_t *frame, xlator_t *this, return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, + NULL); return 0; } int -dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - - dht_iatt_merge (this, &local->prebuf, prebuf, prev->this); - dht_iatt_merge (this, &local->stbuf, postbuf, prev->this); - - if (local->inode) { - local->stbuf.ia_ino = local->inode->ino; - local->prebuf.ia_ino = local->inode->ino; - } - - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (truncate, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->stbuf); - - return 0; -} - - - -int -dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *stbuf) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); - - if (local->inode) - local->stbuf.ia_ino = local->inode->ino; - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, - &local->stbuf); - - return 0; -} - - -int -dht_stat (call_frame_t *frame, xlator_t *this, - loc_t *loc) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int i = 0; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - local->layout = layout = dht_layout_get (this, loc->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = layout->cnt; - - for (i = 0; i < layout->cnt; i++) { - subvol = layout->list[i].xlator; - - STACK_WIND (frame, dht_attr_cbk, - subvol, subvol->fops->stat, - loc); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int i = 0; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - local->layout = layout = dht_layout_get (this, fd->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local->inode = inode_ref (fd->inode); - local->call_cnt = layout->cnt;; - - for (i = 0; i < layout->cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND (frame, dht_attr_cbk, - subvol, subvol->fops->fstat, - fd); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = 1; - - STACK_WIND (frame, dht_truncate_cbk, - subvol, subvol->fops->truncate, - loc, offset); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int -dht_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - local->inode = inode_ref (fd->inode); - local->call_cnt = 1; - - STACK_WIND (frame, dht_truncate_cbk, - subvol, subvol->fops->ftruncate, - fd, offset); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - call_frame_t *prev = NULL; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; - LOCK (&frame->lock); - { - if (op_ret == -1) { + LOCK (&frame->lock); + { + if (op_ret == -1) { local->op_ret = -1; - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } - preparent->ia_ino = local->loc.parent->ino; - postparent->ia_ino = local->loc.parent->ino; - local->op_ret = 0; + local->op_ret = 0; local->postparent = *postparent; local->preparent = *preparent; - WIPE (&local->postparent); - WIPE (&local->preparent); - } + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->preparent, 0); + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } + } unlock: - UNLOCK (&frame->lock); + UNLOCK (&frame->lock); DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent); + &local->preparent, &local->postparent, NULL); return 0; } @@ -1255,32 +1623,33 @@ unlock: int dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - call_frame_t *prev = NULL; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; xlator_t *cached_subvol = NULL; - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } + local = frame->local; + prev = cookie; - local->op_ret = 0; - } + LOCK (&frame->lock); + { + if ((op_ret == -1) && !((op_errno == ENOENT) || + (op_errno == ENOTCONN))) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + local->op_ret = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK (&frame->lock); - if (op_ret == -1) + if (local->op_ret == -1) goto err; cached_subvol = dht_subvol_get_cached (this, local->loc.inode); @@ -1294,314 +1663,311 @@ unlock: STACK_WIND (frame, dht_unlink_cbk, cached_subvol, cached_subvol->fops->unlink, - &local->loc); + &local->loc, local->flags, NULL); return 0; err: DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno, - NULL, NULL); + NULL, NULL, NULL); return 0; } - int -dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct iatt *prebuf, struct iatt *postbuf) +dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + local = frame->local; + prev = cookie; - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } - local->op_ret = 0; - } + local->op_ret = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK (&frame->lock); - if (local && (op_ret == 0)) { - prebuf->ia_ino = local->ia_ino; - postbuf->ia_ino = local->ia_ino; + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (setxattr, frame, local->op_ret, + local->op_errno, NULL); } - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, - prebuf, postbuf); - return 0; } - - -int -dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno) +static void +fill_layout_info (dht_layout_t *layout, char *buf) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); + int i = 0; + char tmp_buf[128] = {0,}; - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, local->op_errno); + for (i = 0; i < layout->cnt; i++) { + snprintf (tmp_buf, 128, "(%s %u %u)", + layout->list[i].xlator->name, + layout->list[i].start, + layout->list[i].stop); + if (i) + strcat (buf, " "); + strcat (buf, tmp_buf); } - - return 0; } +void +dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local, + char *xattr_buf, int32_t alloc_len, + int flag, char *layout_buf) +{ + if (flag && local->xattr_val) + snprintf (xattr_buf, alloc_len, + "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))", + this->name, local->xattr_val, this->name, + layout_buf); + else if (local->xattr_val) + snprintf (xattr_buf, alloc_len, + "(<"DHT_PATHINFO_HEADER"%s> %s)", + this->name, local->xattr_val); + else if (flag) + snprintf (xattr_buf, alloc_len, "(%s-layout %s)", + this->name, layout_buf); +} int -dht_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask) +dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this, + int op_errno) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + int ret = -1; + char *value = NULL; + int32_t plen = 0; - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + ret = dict_get_str (xattr, local->xsel, &value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Subvolume %s returned -1 (%s)", this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + goto out; + } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + local->alloc_len += strlen(value); - local->call_cnt = 1; + if (!local->xattr_val) { + local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10); + local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char), + gf_common_mt_char); + if (!local->xattr_val) { + ret = -1; + goto out; + } + } - STACK_WIND (frame, dht_err_cbk, - subvol, subvol->fops->access, - loc, mask); + if (local->xattr_val) { + plen = strlen (local->xattr_val); + if (plen) { + /* extra byte(s) for \0 to be safe */ + local->alloc_len += (plen + 2); + local->xattr_val = GF_REALLOC (local->xattr_val, + local->alloc_len); + if (!local->xattr_val) { + ret = -1; + goto out; + } + } - return 0; + (void) strcat (local->xattr_val, value); + (void) strcat (local->xattr_val, " "); + local->op_ret = 0; + } -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (access, frame, -1, op_errno); + ret = 0; - return 0; + out: + return ret; } - int -dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, const char *path, struct iatt *sbuf) +dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this, + gf_boolean_t flag) { - dht_local_t *local = NULL; + int ret = -1; + char *xattr_buf = NULL; + char layout_buf[8192] = {0,}; - local = frame->local; - if (op_ret == -1) - goto err; + if (flag) + fill_layout_info (local->layout, layout_buf); + + *dict = dict_new (); + if (!*dict) + goto out; - if (local) { - sbuf->ia_ino = local->ia_ino; + local->xattr_val[strlen (local->xattr_val) - 1] = '\0'; + + /* we would need max this many bytes to create xattr string + * extra 40 bytes is just an estimated amount of additional + * space required as we include translator name and some + * spaces, brackets etc. when forming the pathinfo string. + * + * For node-uuid we just don't have all the pretty formatting, + * but since this is a generic routine for pathinfo & node-uuid + * we dont have conditional space allocation and try to be + * generic + */ + local->alloc_len += (2 * strlen (this->name)) + + strlen (layout_buf) + + 40; + xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char), + gf_common_mt_char); + if (!xattr_buf) + goto out; + + if (XATTR_IS_PATHINFO (local->xsel)) { + (void) dht_fill_pathinfo_xattr (this, local, xattr_buf, + local->alloc_len, flag, + layout_buf); + } else if (XATTR_IS_NODE_UUID (local->xsel)) { + (void) snprintf (xattr_buf, local->alloc_len, "%s", + local->xattr_val); } else { - op_ret = -1; - op_errno = EINVAL; + gf_log (this->name, GF_LOG_WARNING, + "Unknown local->xsel (%s)", local->xsel); + goto out; } -err: - DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf); + ret = dict_set_dynstr (*dict, local->xsel, xattr_buf); + GF_FREE (local->xattr_val); - return 0; + out: + return ret; } - int -dht_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size) +dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; + int ret = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + dict_t *dict = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + local = frame->local; - local->ia_ino = loc->inode->ino; - - STACK_WIND (frame, dht_readlink_cbk, - subvol, subvol->fops->readlink, - loc, size); + LOCK (&frame->lock); + { + this_call_cnt = --local->call_cnt; + if (op_ret < 0) { + if (op_errno != ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "getxattr err (%s) for dir", + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } - return 0; + goto unlock; + } -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL); + ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, + op_errno); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "alloc or fill failure"); + } + unlock: + UNLOCK (&frame->lock); - return 0; -} + if (!is_last_call (this_call_cnt)) + goto out; + /* -- last call: do patch ups -- */ -int -dht_fix_layout_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) -{ - DHT_STACK_UNWIND (getxattr, frame, -1, ENODATA, NULL); + if (local->op_ret == -1) { + goto unwind; + } - return 0; -} + ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true); + if (ret) + goto unwind; -static void -fill_layout_info (dht_layout_t *layout, char *buf) -{ - int i = 0; - char tmp_buf[128] = {0,}; + DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); + goto cleanup; - for (i = 0; i < layout->cnt; i++) { - snprintf (tmp_buf, 128, "(%s %u %u)", - layout->list[i].xlator->name, - layout->list[i].start, - layout->list[i].stop); - if (i) - strcat (buf, " "); - strcat (buf, tmp_buf); - } + unwind: + DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL); + cleanup: + if (dict) + dict_unref (dict); + out: + return 0; } int -dht_pathinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr) +dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { - dht_local_t *local = NULL; - int ret = 0; - int flag = 0; - int this_call_cnt = 0; - char *value_got = NULL; - char layout_buf[8192] = {0,}; - char xattr_buf[8192 + 1024] = {0,}; - dict_t *dict = NULL; + dht_local_t *local = NULL; + int ret = 0; + dict_t *dict = NULL; + call_frame_t *prev = NULL; + gf_boolean_t flag = _gf_true; local = frame->local; + prev = cookie; - if (op_ret != -1) { - ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value_got); - if (!ret) { - if (!local->pathinfo) - local->pathinfo = GF_CALLOC (8192, sizeof (char), - gf_common_mt_char); - if (local->pathinfo) - strcat (local->pathinfo, value_got); - } + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 " + "(%s)", prev->this->name, strerror (op_errno)); + goto unwind; } - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - if (local->layout->cnt > 1) { - /* Set it for directory */ - fill_layout_info (local->layout, layout_buf); - flag = 1; - } - - dict = dict_new (); - - if (flag && local->pathinfo) - snprintf (xattr_buf, 9216, "((%s %s) (%s-layout %s))", - this->name, local->pathinfo, this->name, - layout_buf); - else if (local->pathinfo) - snprintf (xattr_buf, 9216, "(%s %s)", - this->name, local->pathinfo); - else if (flag) - snprintf (xattr_buf, 9216, "(%s-layout %s)", - this->name, layout_buf); - - ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY, - xattr_buf); - - if (local->pathinfo) - GF_FREE (local->pathinfo); - GF_FREE (local->key); - - DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict); + ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, + op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "alloc or fill failure"); + goto unwind; + } - if (dict) - dict_unref (dict); + flag = (local->layout->cnt > 1) ? _gf_true : _gf_false; - return 0; - } + ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag); + if (ret) + goto unwind; - if (local->pathinfo) - strcat (local->pathinfo, " Link: "); + DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); + goto cleanup; - /* This will happen if there pending */ - STACK_WIND (frame, dht_pathinfo_getxattr_cbk, local->hashed_subvol, - local->hashed_subvol->fops->getxattr, - &local->loc, local->key); + unwind: + DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, + NULL, NULL); + cleanup: + if (dict) + dict_unref (dict); return 0; } int dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr) + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) { int ret = 0; char *value = NULL; @@ -1616,90 +1982,234 @@ dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } } - DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr); + DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata); return 0; } int dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr) + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { - if (op_ret != -1) { - if (dict_get (xattr, "trusted.glusterfs.dht")) { - dict_del (xattr, "trusted.glusterfs.dht"); - } + int this_call_cnt = 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); + VALIDATE_OR_GOTO (this->private, out); + + conf = this->private; + local = frame->local; + + this_call_cnt = dht_frame_return (frame); + + if (!xattr || (op_ret == -1)) + goto out; + + if (dict_get (xattr, conf->xattr_name)) { + dict_del (xattr, conf->xattr_name); } + local->op_ret = 0; - DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr); + if (!local->xattr) { + local->xattr = dict_copy_with_ref (xattr, NULL); + } else { + /* first aggregate everything into xattr and then copy into + * local->xattr. This is required as we want to have + * 'local->xattr' as the proper final dictionary passed above + * distribute xlator. + */ + dht_aggregate_xattr (xattr, local->xattr); + local->xattr = dict_copy (xattr, local->xattr); + } +out: + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, + local->xattr, NULL); + } + return 0; +} +int32_t +dht_getxattr_unwind (call_frame_t *frame, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) +{ + DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); return 0; } int +dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) +{ + int this_call_cnt = 0; + dht_local_t *local = NULL; + + + local = frame->local; + + if (op_ret != -1) { + if (local->xattr) + dict_unref (local->xattr); + local->xattr = dict_ref (xattr); + + if (local->xattr_req) + dict_unref (local->xattr_req); + local->xattr_req = dict_ref (xdata); + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, + local->xattr, local->xattr_req); + } + + return 0; +} + + +int +dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key, dict_t *xdata) +{ + dht_local_t *local = NULL; + int i = 0; + dht_layout_t *layout = NULL; + int cnt = 0; + xlator_t *subvol = NULL; + + + local = frame->local; + layout = local->layout; + + cnt = local->call_cnt = layout->cnt; + + local->op_ret = -1; + local->op_errno = ENODATA; + + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_getxattr_get_real_filename_cbk, + subvol, subvol->fops->getxattr, + loc, key, xdata); + } + + return 0; +} + + +int dht_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *key) + loc_t *loc, const char *key, dict_t *xdata) +#define DHT_IS_DIR(layout) (layout->cnt > 1) { - xlator_t *subvol = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; + + xlator_t *subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; dht_layout_t *layout = NULL; + xlator_t **sub_volumes = NULL; int op_errno = -1; - int ret = 0; + int i = 0; + int cnt = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (this->private, err); conf = this->private; - layout = dht_layout_get (this, loc->inode); - if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) { - hashed_subvol = dht_subvol_get_hashed (this, loc); - cached_subvol = dht_subvol_get_cached (this, loc->inode); - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR); + if (!local) { + op_errno = ENOMEM; - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + goto err; + } + + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "layout is NULL"); + op_errno = ENOENT; + goto err; + } + + if (key) { local->key = gf_strdup (key); if (!local->key) { op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); goto err; } - local->layout = layout; + } - local->call_cnt = 1; - if (hashed_subvol != cached_subvol) { - local->call_cnt = 2; - local->hashed_subvol = hashed_subvol; + if (key && + (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) + && DHT_IS_DIR(layout)) { + dht_getxattr_get_real_filename (frame, this, loc, key, xdata); + return 0; + } + + /* for file use cached subvolume (obviously!): see if {} + * below + * for directory: + * wind to all subvolumes and exclude subvolumes which + * return ENOTCONN (in callback) + * + * NOTE: Don't trust inode here, as that may not be valid + * (until inode_link() happens) + */ + if (key && DHT_IS_DIR(layout) && + ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0) + || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) { + (void) strncpy (local->xsel, key, 256); + cnt = local->call_cnt = layout->cnt; + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_vgetxattr_dir_cbk, + subvol, subvol->fops->getxattr, + loc, key, NULL); } + return 0; + } + + /* node-uuid or pathinfo for files */ + if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0) + || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) { + cached_subvol = local->cached_subvol; + (void) strncpy (local->xsel, key, 256); - STACK_WIND (frame, dht_pathinfo_getxattr_cbk, cached_subvol, - cached_subvol->fops->getxattr, loc, key); + local->call_cnt = 1; + STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol, + cached_subvol->fops->getxattr, loc, key, NULL); return 0; } + if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) { hashed_subvol = dht_subvol_get_hashed (this, loc); + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get" + "hashed subvol for %s", loc->path); + op_errno = EINVAL; + goto err; + } + cached_subvol = dht_subvol_get_cached (this, loc->inode); + if (!cached_subvol) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get" + "cached subvol for %s", loc->path); + op_errno = EINVAL; + goto err; + } + if (hashed_subvol == cached_subvol) { op_errno = ENODATA; goto err; @@ -1707,696 +2217,845 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, if (hashed_subvol) { STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol, hashed_subvol->fops->getxattr, loc, - GF_XATTR_PATHINFO_KEY); + GF_XATTR_PATHINFO_KEY, NULL); return 0; } op_errno = ENODATA; goto err; } - if (key && (strcmp (key, GF_XATTR_FIX_LAYOUT_KEY) == 0)) { - if (layout->cnt < conf->subvolume_cnt) { - gf_log (this->name, GF_LOG_INFO, - "expanding layout of %s from %d to %d", - loc->path, layout->cnt, conf->subvolume_cnt); - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); + if (key && (!strcmp (GF_XATTR_MARKER_KEY, key)) + && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + if (DHT_IS_DIR(layout)) { + cnt = layout->cnt; + } else { + cnt = 1; + } + + sub_volumes = alloca ( cnt * sizeof (xlator_t *)); + for (i = 0; i < cnt; i++) + *(sub_volumes + i) = layout->list[i].xlator; + + if (cluster_getmarkerattr (frame, this, loc, key, + local, dht_getxattr_unwind, + sub_volumes, cnt, + MARKER_UUID_TYPE, marker_uuid_default_gauge, + conf->vol_uuid)) { + op_errno = EINVAL; + goto err; + } + + return 0; + } + + if (key && *conf->vol_uuid) { + if ((match_uuid_local (key, conf->vol_uuid) == 0) && + (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + if (DHT_IS_DIR(layout)) { + cnt = layout->cnt; + } else { + cnt = 1; + } + sub_volumes = alloca ( cnt * sizeof (xlator_t *)); + for (i = 0; i < cnt; i++) + sub_volumes[i] = layout->list[i].xlator; + + if (cluster_getmarkerattr (frame, this, loc, key, + local, dht_getxattr_unwind, + sub_volumes, cnt, + MARKER_XTIME_TYPE, + marker_xtime_default_gauge, + conf->vol_uuid)) { + op_errno = EINVAL; goto err; } - local->layout = layout; - dht_selfheal_new_directory (frame, dht_fix_layout_cbk, - layout); + return 0; } - op_errno = ENODATA; - goto err; } - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - STACK_WIND (frame, dht_getxattr_cbk, - subvol, subvol->fops->getxattr, - loc, key); + if (DHT_IS_DIR(layout)) { + cnt = local->call_cnt = layout->cnt; + } else { + cnt = local->call_cnt = 1; + } - return 0; + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_getxattr_cbk, + subvol, subvol->fops->getxattr, + loc, key, NULL); + } + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - +#undef DHT_IS_DIR int -dht_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr, int flags) +dht_fgetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *key, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int op_errno = -1; + int i = 0; + int cnt = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + VALIDATE_OR_GOTO (this->private, err); - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR); + if (!local) { + op_errno = ENOMEM; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + goto err; + } + + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "layout is NULL"); + op_errno = ENOENT; + goto err; + } - local->call_cnt = 1; + if (key) { + local->key = gf_strdup (key); + if (!local->key) { + op_errno = ENOMEM; + goto err; + } + } - STACK_WIND (frame, dht_err_cbk, - subvol, subvol->fops->setxattr, - loc, xattr, flags); + if ((fd->inode->ia_type == IA_IFDIR) + && (strncmp (key, GF_XATTR_LOCKINFO_KEY, + strlen (GF_XATTR_LOCKINFO_KEY) != 0))) { + cnt = local->call_cnt = layout->cnt; + } else { + cnt = local->call_cnt = 1; + } - return 0; + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_getxattr_cbk, + subvol, subvol->fops->fgetxattr, + fd, key, NULL); + } + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (setxattr, frame, -1, op_errno); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - int -dht_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *key) +dht_fsetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *xattr, int flags, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + int op_errno = EINVAL; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + VALIDATE_OR_GOTO (this->private, err); - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + conf = this->private; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, + op_errno, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } - local->call_cnt = 1; + local->call_cnt = 1; - STACK_WIND (frame, dht_err_cbk, - subvol, subvol->fops->removexattr, - loc, key); + STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr, + fd, xattr, flags, NULL); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (removexattr, frame, -1, op_errno); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - return 0; + return 0; } +static int +dht_common_setxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xdata) +{ + DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + int -dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd) +dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; + int i = -1; + int ret = -1; + char *value = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + local = frame->local; + prev = cookie; + conf = this->private; - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } + if (op_ret == -1) + goto out; - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, - local->fd); + ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value); + if (ret) + goto out; + + if (!strcmp (value, local->key)) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev->this) + conf->decommissioned_bricks[i] = prev->this; + } + } +out: + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL); + } return 0; -} +} int -dht_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags, fd_t *fd, int wbflags) +dht_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr, int flags, dict_t *xdata) { - xlator_t *subvol = NULL; - int ret = -1; - int op_errno = -1; - dht_local_t *local = NULL; - + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + int i = 0; + int op_errno = EINVAL; + int ret = -1; + data_t *tmp = NULL; + uint32_t dir_spread = 0; + char value[4096] = {0,}; + gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA; + int call_cnt = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + conf = this->private; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, + op_errno, err); - local->fd = fd_ref (fd); - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, dht_fd_cbk, - subvol, subvol->fops->open, - loc, flags, fd, wbflags); + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } - return 0; + local->call_cnt = call_cnt = layout->cnt; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL); + tmp = dict_get (xattr, "distribute.migrate-data"); + if (tmp) { + if (IA_ISDIR (loc->inode->ia_type)) { + op_errno = ENOTSUP; + goto err; + } - return 0; -} + /* TODO: need to interpret the 'value' for more meaning + (ie, 'target' subvolume given there, etc) */ + memcpy (value, tmp->data, tmp->len); + if (strcmp (value, "force") == 0) + forced_rebalance = + GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS; + if (conf->decommission_in_progress) + forced_rebalance = GF_DHT_MIGRATE_HARDLINK; -int -dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iovec *vector, int count, struct iatt *stbuf, - struct iobref *iobref) -{ - dht_local_t *local = frame->local; + local->rebalance.target_node = dht_subvol_get_hashed (this, loc); + if (!local->rebalance.target_node) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "hashed subvol for %s", loc->path); + op_errno = EINVAL; + goto err; + } - if (!local) { - op_ret = -1; + local->rebalance.from_subvol = local->cached_subvol; + + if (local->rebalance.target_node == local->rebalance.from_subvol) { + op_errno = EEXIST; + goto err; + } + if (local->rebalance.target_node) { + local->flags = forced_rebalance; + + ret = dht_start_rebalance_task (this, frame); + if (!ret) + return 0; + + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to create a new synctask", + loc->path); + } op_errno = EINVAL; - goto out; - } + goto err; - if (op_ret != -1) - stbuf->ia_ino = local->ia_ino; -out: - DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, - iobref); + } - return 0; -} + tmp = dict_get (xattr, "decommission-brick"); + if (tmp) { + /* This operation should happen only on '/' */ + if (!__is_root_gfid (loc->inode->gfid)) { + op_errno = ENOTSUP; + goto err; + } + memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095)); + local->key = gf_strdup (value); + local->call_cnt = conf->subvolume_cnt; -int -dht_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; + for (i = 0 ; i < conf->subvolume_cnt; i++) { + /* Get the pathinfo, and then compare */ + STACK_WIND (frame, dht_checking_pathinfo_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->getxattr, + loc, GF_XATTR_PATHINFO_KEY, NULL); + } + return 0; + } - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY); + if (tmp) { + gf_log (this->name, GF_LOG_INFO, + "fixing the layout of %s", loc->path); - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; + } - local = dht_local_init (frame); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "Out of memory"); - op_errno = ENOMEM; + tmp = dict_get (xattr, "distribute.directory-spread-count"); + if (tmp) { + /* Setxattr value is packed as 'binary', not string */ + memcpy (value, tmp->data, ((tmp->len < 4095)?tmp->len:4095)); + ret = gf_string2uint32 (value, &dir_spread); + if (!ret && ((dir_spread <= conf->subvolume_cnt) && + (dir_spread > 0))) { + layout->spread_cnt = dir_spread; + + ret = dht_fix_directory_layout (frame, + dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; + } + gf_log (this->name, GF_LOG_ERROR, + "wrong 'directory-spread-count' value (%s)", value); + op_errno = ENOTSUP; goto err; } - local->ia_ino = fd->inode->ino; - STACK_WIND (frame, dht_readv_cbk, - subvol, subvol->fops->readv, - fd, size, off); + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_err_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->setxattr, + loc, xattr, flags, xdata); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - return 0; + return 0; } int -dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf) +dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - - if (op_ret == -1) { - goto out; - } + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; local = frame->local; - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - prebuf->ia_ino = local->ia_ino; - postbuf->ia_ino = local->ia_ino; + prev = cookie; -out: - DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf); + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (removexattr, frame, local->op_ret, + local->op_errno, NULL); + } return 0; } int -dht_writev (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iovec *vector, int count, off_t off, - struct iobref *iobref) +dht_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key, dict_t *xdata) { - xlator_t *subvol = NULL; + xlator_t *subvol = NULL; int op_errno = -1; dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int call_cnt = 0; + dht_conf_t *conf = NULL; + + int i; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (this->private, err); - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + conf = this->private; + + GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR); if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); op_errno = ENOMEM; goto err; } - local->ia_ino = fd->inode->ino; + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + layout = local->layout; + if (!local->layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, dht_writev_cbk, - subvol, subvol->fops->writev, - fd, vector, count, off, iobref); + local->call_cnt = call_cnt = layout->cnt; + local->key = gf_strdup (key); - return 0; + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_removexattr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->removexattr, + loc, key, NULL); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - return 0; + return 0; } - int -dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +dht_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *key, dict_t *xdata) { - xlator_t *subvol = NULL; + xlator_t *subvol = NULL; int op_errno = -1; - dht_local_t *local = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int call_cnt = 0; + dht_conf_t *conf = 0; + int i; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - local->fd = fd_ref (fd); - local->call_cnt = 1; - - STACK_WIND (frame, dht_err_cbk, - subvol, subvol->fops->flush, fd); - - return 0; + VALIDATE_OR_GOTO (this->private, err); -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (flush, frame, -1, op_errno); - - return 0; -} - - -int -dht_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; + conf = this->private; + GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for inode=%s", + uuid_utoa (fd->inode->gfid)); + op_errno = EINVAL; + goto err; + } + + layout = local->layout; + if (!local->layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no layout for inode=%s", uuid_utoa (fd->inode->gfid)); + op_errno = EINVAL; + goto err; + } - local->ia_ino = fd->inode->ino; + local->call_cnt = call_cnt = layout->cnt; + local->key = gf_strdup (key); - STACK_WIND (frame, dht_fsync_cbk, - subvol, subvol->fops->fsync, - fd, datasync); + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_removexattr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->fremovexattr, + fd, key, NULL); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL); - return 0; + return 0; } int -dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct flock *flock) +dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { - DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock); - - return 0; -} + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + local = frame->local; + prev = cookie; -int -dht_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int cmd, struct flock *flock) -{ - xlator_t *subvol = NULL; - int op_errno = -1; + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, + local->fd, NULL); - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + return 0; +} - STACK_WIND (frame, dht_lk_cbk, - subvol, subvol->fops->lk, - fd, cmd, flock); +/* + * dht_normalize_stats - + */ +static void +dht_normalize_stats (struct statvfs *buf, unsigned long bsize, + unsigned long frsize) +{ + double factor = 0; - return 0; + if (buf->f_bsize != bsize) { + buf->f_bsize = bsize; + } -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL); + if (buf->f_frsize != frsize) { + factor = ((double) buf->f_frsize) / frsize; + buf->f_frsize = frsize; + buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks); + buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree); + buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail); - return 0; + } } - int dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct statvfs *statvfs) + int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + int bsize = 0; + int frsize = 0; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - goto unlock; - } - local->op_ret = 0; - - /* TODO: normalize sizes */ - local->statvfs.f_bsize = statvfs->f_bsize; - local->statvfs.f_frsize = statvfs->f_frsize; - - local->statvfs.f_blocks += statvfs->f_blocks; - local->statvfs.f_bfree += statvfs->f_bfree; - local->statvfs.f_bavail += statvfs->f_bavail; - local->statvfs.f_files += statvfs->f_files; - local->statvfs.f_ffree += statvfs->f_ffree; - local->statvfs.f_favail += statvfs->f_favail; - local->statvfs.f_fsid = statvfs->f_fsid; - local->statvfs.f_flag = statvfs->f_flag; - local->statvfs.f_namemax = statvfs->f_namemax; + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + local->op_ret = 0; - } + if (local->statvfs.f_bsize != 0) { + bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); + frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); + dht_normalize_stats(&local->statvfs, bsize, frsize); + dht_normalize_stats(statvfs, bsize, frsize); + } else { + local->statvfs.f_bsize = statvfs->f_bsize; + local->statvfs.f_frsize = statvfs->f_frsize; + } + + local->statvfs.f_blocks += statvfs->f_blocks; + local->statvfs.f_bfree += statvfs->f_bfree; + local->statvfs.f_bavail += statvfs->f_bavail; + local->statvfs.f_files += statvfs->f_files; + local->statvfs.f_ffree += statvfs->f_ffree; + local->statvfs.f_favail += statvfs->f_favail; + local->statvfs.f_fsid = statvfs->f_fsid; + local->statvfs.f_flag = statvfs->f_flag; + local->statvfs.f_namemax = statvfs->f_namemax; + + } unlock: - UNLOCK (&frame->lock); + UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno, - &local->statvfs); + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno, + &local->statvfs, xdata); return 0; } int -dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; int op_errno = -1; - int i = -1; - + int i = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (this->private, err); - conf = this->private; + conf = this->private; - local = dht_local_init (frame); - local->call_cnt = conf->subvolume_cnt; + local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS); + if (!local) { + op_errno = ENOMEM; + goto err; + } - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_statfs_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->statfs, loc); - } + if (IA_ISDIR (loc->inode->ia_type)) { + local->call_cnt = conf->subvolume_cnt; - return 0; + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_statfs_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, loc, + xdata); + } + return 0; + } + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_statfs_cbk, + subvol, subvol->fops->statfs, loc, xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int -dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int ret = -1; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; int op_errno = -1; - int i = -1; - + int i = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (this->private, err); - conf = this->private; + conf = this->private; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR); + if (!local) { + op_errno = ENOMEM; - local->fd = fd_ref (fd); - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + goto err; + } - local->call_cnt = conf->subvolume_cnt; + local->call_cnt = conf->subvolume_cnt; - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_fd_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->opendir, - loc, fd); - } + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_fd_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + loc, fd, xdata); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, gf_dirent_t *orig_entries) + int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { - dht_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - call_frame_t *prev = NULL; - xlator_t *next_subvol = NULL; + dht_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + xlator_t *next_subvol = NULL; off_t next_offset = 0; - int count = 0; + int count = 0; dht_layout_t *layout = 0; dht_conf_t *conf = NULL; xlator_t *subvol = 0; + int ret = 0; - INIT_LIST_HEAD (&entries.list); - prev = cookie; - local = frame->local; - conf = this->private; + INIT_LIST_HEAD (&entries.list); + prev = cookie; + local = frame->local; + conf = this->private; - if (op_ret < 0) - goto done; + if (op_ret < 0) + goto done; if (!local->layout) local->layout = dht_layout_get (this, local->fd->inode); layout = local->layout; - list_for_each_entry (orig_entry, (&orig_entries->list), list) { + list_for_each_entry (orig_entry, (&orig_entries->list), list) { next_offset = orig_entry->d_off; - - if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL) - || (check_is_dir (NULL, (&orig_entry->d_stat), NULL) - && (prev->this != dht_first_up_subvol (this)))) { + if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) && + (prev->this != local->first_up_subvol)) { + continue; + } + if (check_is_linkfile (NULL, (&orig_entry->d_stat), + orig_entry->dict, + conf->link_xattr_name)) { continue; } entry = gf_dirent_for_name (orig_entry->d_name); if (!entry) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); + goto unwind; } @@ -2406,25 +3065,41 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, orig_entry->d_name); if (!subvol || (subvol != prev->this)) { /* TODO: Count the number of entries which need - linkfile to prove its existance in fs */ + linkfile to prove its existence in fs */ layout->search_unhashed++; } } - entry->d_stat = orig_entry->d_stat; - dht_itransform (this, prev->this, orig_entry->d_ino, - &entry->d_ino); dht_itransform (this, prev->this, orig_entry->d_off, &entry->d_off); - entry->d_stat.ia_ino = entry->d_ino; + entry->d_stat = orig_entry->d_stat; + entry->d_ino = orig_entry->d_ino; entry->d_type = orig_entry->d_type; entry->d_len = orig_entry->d_len; + if (orig_entry->dict) + entry->dict = dict_ref (orig_entry->dict); + + /* making sure we set the inode ctx right with layout, + currently possible only for non-directories, so for + directories don't set entry inodes */ + if (!IA_ISDIR(entry->d_stat.ia_type)) { + ret = dht_layout_preset (this, prev->this, + orig_entry->inode); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to link the layout in inode"); + entry->inode = inode_ref (orig_entry->inode); + } else if (orig_entry->inode) { + dht_inode_ctx_time_update (orig_entry->inode, this, + &entry->d_stat, 1); + } + list_add_tail (&entry->list, &entries.list); count++; - } - op_ret = count; + } + op_ret = count; /* We need to ensure that only the last subvolume's end-of-directory * notification is respected so that directory reading does not stop * before all subvolumes have been read. That could happen because the @@ -2436,7 +3111,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, op_errno = 0; done: - if (count == 0) { + if (count == 0) { /* non-zero next_offset means that EOF is not yet hit on the current subvol */ @@ -2446,23 +3121,37 @@ done: next_subvol = prev->this; } - if (!next_subvol) { - goto unwind; - } + if (!next_subvol) { + goto unwind; + } - STACK_WIND (frame, dht_readdirp_cbk, - next_subvol, next_subvol->fops->readdirp, - local->fd, local->size, next_offset); - return 0; - } + if (conf->readdir_optimize == _gf_true) { + if (next_subvol != local->first_up_subvol) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "dict set failed"); + } else { + dict_del (local->xattr, + GF_READDIR_SKIP_DIRS); + } + } + + STACK_WIND (frame, dht_readdirp_cbk, + next_subvol, next_subvol->fops->readdirp, + local->fd, local->size, next_offset, + local->xattr); + return 0; + } unwind: - if (op_ret < 0) - op_ret = 0; + if (op_ret < 0) + op_ret = 0; - DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries); + DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + gf_dirent_free (&entries); return 0; } @@ -2471,34 +3160,33 @@ unwind: int dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *orig_entries) + int op_ret, int op_errno, gf_dirent_t *orig_entries, + dict_t *xdata) { - dht_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - call_frame_t *prev = NULL; - xlator_t *next_subvol = NULL; + dht_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + xlator_t *next_subvol = NULL; off_t next_offset = 0; - int count = 0; + int count = 0; dht_layout_t *layout = 0; - dht_conf_t *conf = NULL; xlator_t *subvol = 0; - INIT_LIST_HEAD (&entries.list); - prev = cookie; - local = frame->local; - conf = this->private; + INIT_LIST_HEAD (&entries.list); + prev = cookie; + local = frame->local; - if (op_ret < 0) - goto done; + if (op_ret < 0) + goto done; if (!local->layout) local->layout = dht_layout_get (this, local->fd->inode); layout = local->layout; - list_for_each_entry (orig_entry, (&orig_entries->list), list) { + list_for_each_entry (orig_entry, (&orig_entries->list), list) { next_offset = orig_entry->d_off; subvol = dht_layout_search (this, layout, orig_entry->d_name); @@ -2511,11 +3199,10 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; } - dht_itransform (this, prev->this, orig_entry->d_ino, - &entry->d_ino); dht_itransform (this, prev->this, orig_entry->d_off, &entry->d_off); + entry->d_ino = orig_entry->d_ino; entry->d_type = orig_entry->d_type; entry->d_len = orig_entry->d_len; @@ -2523,7 +3210,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, count++; } } - op_ret = count; + op_ret = count; /* We need to ensure that only the last subvolume's end-of-directory * notification is respected so that directory reading does not stop * before all subvolumes have been read. That could happen because the @@ -2535,7 +3222,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, op_errno = 0; done: - if (count == 0) { + if (count == 0) { /* non-zero next_offset means that EOF is not yet hit on the current subvol */ @@ -2545,23 +3232,23 @@ done: next_subvol = prev->this; } - if (!next_subvol) { - goto unwind; - } + if (!next_subvol) { + goto unwind; + } - STACK_WIND (frame, dht_readdir_cbk, - next_subvol, next_subvol->fops->readdir, - local->fd, local->size, next_offset); - return 0; - } + STACK_WIND (frame, dht_readdir_cbk, + next_subvol, next_subvol->fops->readdir, + local->fd, local->size, next_offset, NULL); + return 0; + } unwind: - if (op_ret < 0) - op_ret = 0; + if (op_ret < 0) + op_ret = 0; - DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries); + DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + gf_dirent_free (&entries); return 0; } @@ -2569,61 +3256,92 @@ unwind: int dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, int whichop) + off_t yoff, int whichop, dict_t *dict) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; int op_errno = -1; - xlator_t *xvol = NULL; - off_t xoff = 0; - + xlator_t *xvol = NULL; + off_t xoff = 0; + int ret = 0; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (this->private, err); - conf = this->private; + conf = this->private; - local = dht_local_init (frame); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } + local = dht_local_init (frame, NULL, NULL, whichop); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->fd = fd_ref (fd); - local->size = size; + local->fd = fd_ref (fd); + local->size = size; + local->xattr_req = (dict)? dict_ref (dict) : NULL; + local->first_up_subvol = dht_first_up_subvol (this); - dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); + dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); + + /* TODO: do proper readdir */ + if (whichop == GF_FOP_READDIRP) { + if (dict) + local->xattr = dict_ref (dict); + else + local->xattr = dict_new (); + + if (local->xattr) { + ret = dict_set_uint32 (local->xattr, + conf->link_xattr_name, 256); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set '%s' key", + conf->link_xattr_name); + if (conf->readdir_optimize == _gf_true) { + if (xvol != local->first_up_subvol) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, + GF_LOG_ERROR, + "Dict set failed"); + } else { + dict_del (local->xattr, + GF_READDIR_SKIP_DIRS); + } + } + } - /* TODO: do proper readdir */ - if (whichop == GF_FOP_READDIR) - STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir, - fd, size, xoff); - else STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, - fd, size, xoff); + fd, size, xoff, local->xattr); + } else { + STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir, + fd, size, xoff, local->xattr); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff) + off_t yoff, dict_t *xdata) { int op = GF_FOP_READDIR; dht_conf_t *conf = NULL; int i = 0; conf = this->private; + if (!conf) + goto out; for (i = 0; i < conf->subvolume_cnt; i++) { if (!conf->subvolume_status[i]) { @@ -2632,15 +3350,19 @@ dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } } - dht_do_readdir (frame, this, fd, size, yoff, op); + if (conf->use_readdirp) + op = GF_FOP_READDIRP; + +out: + dht_do_readdir (frame, this, fd, size, yoff, op, 0); return 0; } int dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff) + off_t yoff, dict_t *dict) { - dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP); + dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); return 0; } @@ -2648,88 +3370,88 @@ dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, int dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno) + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (op_ret == -1) - local->op_errno = op_errno; + LOCK (&frame->lock); + { + if (op_ret == -1) + local->op_errno = op_errno; - if (op_ret == 0) - local->op_ret = 0; - } - UNLOCK (&frame->lock); + if (op_ret == 0) + local->op_ret = 0; + } + UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret, local->op_errno); + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret, + local->op_errno, xdata); return 0; } int -dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) +dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, + int datasync, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; int op_errno = -1; - int i = -1; - + int i = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (this->private, err); - conf = this->private; + conf = this->private; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->fd = fd_ref (fd); - local->call_cnt = conf->subvolume_cnt; + local->fd = fd_ref (fd); + local->call_cnt = conf->subvolume_cnt; - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_fsyncdir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->fsyncdir, - fd, datasync); - } + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_fsyncdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->fsyncdir, + fd, datasync, xdata); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL); - return 0; + return 0; } int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { - call_frame_t *prev = NULL; - int ret = -1; + xlator_t *prev = NULL; + int ret = -1; dht_local_t *local = NULL; - if (op_ret == -1) - goto out; + if (op_ret == -1) + goto out; local = frame->local; if (!local) { @@ -2738,38 +3460,39 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - prev = cookie; + prev = cookie; - dht_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino); if (local->loc.parent) { - preparent->ia_ino = local->loc.parent->ino; - postparent->ia_ino = local->loc.parent->ino; - WIPE (preparent); - WIPE (postparent); + dht_inode_ctx_time_update (local->loc.parent, this, + preparent, 0); + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); } - ret = dht_layout_preset (this, prev->this, inode); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "could not set pre-set layout for subvolume %s", - prev->this->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + ret = dht_layout_preset (this, prev, inode); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "could not set pre-set layout for subvolume %s", + prev? prev->name: NULL); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + if (local->linked == _gf_true) + dht_linkfile_attr_heal (frame, this); out: - /* - * FIXME: ia_size and st_blocks of preparent and postparent do not have + /* + * FIXME: ia_size and st_blocks of preparent and postparent do not have * correct values. since, preparent and postparent buffers correspond * to a directory these two members should have values equal to sum of * corresponding values from each of the subvolume. * See dht_iatt_merge for reference. - */ - - DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, preparent, - postparent); - return 0; + */ + DHT_STRIP_PHASE1_FLAGS (stbuf); + DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, + preparent, postparent, xdata); + return 0; } int @@ -2777,392 +3500,376 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent) + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; if (op_ret == -1) goto err; - local = frame->local; - cached_subvol = local->cached_subvol; + local = frame->local; + if (!local || !local->cached_subvol) { + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, dht_newfile_cbk, - cached_subvol, cached_subvol->fops->mknod, - &local->loc, local->mode, local->rdev); + cached_subvol = local->cached_subvol; + STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol, + cached_subvol, cached_subvol->fops->mknod, + &local->loc, local->mode, local->rdev, local->umask, + local->params); + + return 0; +err: + DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); return 0; - err: - DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; } int dht_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t rdev) + loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params) { - xlator_t *subvol = NULL; - int op_errno = -1; - int ret = -1; + xlator_t *subvol = NULL; + int op_errno = -1; xlator_t *avail_subvol = NULL; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); + dht_local_t *local = NULL; - conf = this->private; + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume in layout for path=%s", - loc->path); - op_errno = ENOENT; - goto err; - } - - ret = loc_dup (loc, &local->loc); - if (ret == -1) { + local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); + if (!local) { op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); + goto err; + } + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; goto err; } if (!dht_is_subvol_filled (this, subvol)) { gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); - - STACK_WIND (frame, dht_newfile_cbk, - subvol, subvol->fops->mknod, - loc, mode, rdev); + + STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, + subvol, subvol->fops->mknod, loc, mode, + rdev, umask, params); } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol); + + avail_subvol = dht_free_disk_available_subvol (this, subvol, + local); if (avail_subvol != subvol) { - /* Choose the minimum filled volume, and create the + /* Choose the minimum filled volume, and create the files there */ + local->params = dict_ref (params); local->cached_subvol = avail_subvol; - local->mode = mode; + local->mode = mode; local->rdev = rdev; - - dht_linkfile_create (frame, + local->umask = umask; + dht_linkfile_create (frame, dht_mknod_linkfile_create_cbk, - avail_subvol, subvol, loc); + this, avail_subvol, subvol, loc); } else { gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); - - STACK_WIND (frame, dht_newfile_cbk, - subvol, subvol->fops->mknod, - loc, mode, rdev); + + STACK_WIND_COOKIE (frame, dht_newfile_cbk, + (void *)subvol, subvol, + subvol->fops->mknod, loc, mode, + rdev, umask, params); } } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (mknod, frame, -1, op_errno, - NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (mknod, frame, -1, op_errno, + NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } int dht_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc) + const char *linkname, loc_t *loc, mode_t umask, dict_t *params) { - xlator_t *subvol = NULL; - int op_errno = -1; + xlator_t *subvol = NULL; + int op_errno = -1; dht_local_t *local = NULL; - int ret = -1; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume in layout for path=%s", - loc->path); - op_errno = ENOENT; - goto err; - } + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "Failed to copy loc"); + local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK); + if (!local) { op_errno = ENOMEM; goto err; } - gf_log (this->name, GF_LOG_TRACE, - "creating %s on %s", loc->path, subvol->name); + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } - STACK_WIND (frame, dht_newfile_cbk, - subvol, subvol->fops->symlink, - linkname, loc); + gf_log (this->name, GF_LOG_TRACE, + "creating %s on %s", loc->path, subvol->name); - return 0; + STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol, + subvol->fops->symlink, linkname, loc, umask, + params); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (link, frame, -1, op_errno, - NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (link, frame, -1, op_errno, + NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } int -dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - int ret = -1; - int op_errno = -1; - dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); + if (dht_filter_loc_subvol_key (this, loc, &local->loc, + &cached_subvol)) { + gf_log (this->name, GF_LOG_INFO, + "unlinking %s on %s (given path %s)", + local->loc.path, cached_subvol->name, loc->path); + STACK_WIND (frame, dht_unlink_cbk, + cached_subvol, cached_subvol->fops->unlink, + &local->loc, xflag, xdata); + goto done; + } - cached_subvol = dht_subvol_get_cached (this, loc->inode); - if (!cached_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK); + if (!local) { + op_errno = ENOMEM; - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (!hashed_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume in layout for path=%s", - loc->path); - op_errno = EINVAL; - goto err; - } + goto err; + } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + hashed_subvol = dht_subvol_get_hashed (this, loc); + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume in layout for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + cached_subvol = local->cached_subvol; + if (!cached_subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } - if (hashed_subvol != cached_subvol) { - STACK_WIND (frame, dht_unlink_linkfile_cbk, - hashed_subvol, hashed_subvol->fops->unlink, loc); + local->flags = xflag; + if (hashed_subvol != cached_subvol) { + STACK_WIND (frame, dht_unlink_linkfile_cbk, + hashed_subvol, hashed_subvol->fops->unlink, loc, + xflag, xdata); } else { STACK_WIND (frame, dht_unlink_cbk, - cached_subvol, cached_subvol->fops->unlink, loc); + cached_subvol, cached_subvol->fops->unlink, loc, + xflag, xdata); } - - return 0; +done: + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { call_frame_t *prev = NULL; - dht_layout_t *layout = NULL; - dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; prev = cookie; - local = frame->local; + + local = frame->local; if (op_ret == -1) goto out; - layout = dht_layout_for_subvol (this, prev->this); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no pre-set layout for subvolume %s", - prev->this->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - stbuf->ia_ino = local->loc.inode->ino; - - preparent->ia_ino = local->loc2.parent->ino; - postparent->ia_ino = local->loc2.parent->ino; - - WIPE (preparent); - WIPE (postparent); + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + preparent, 0); + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal (frame, this); + } out: + DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, - postparent); + postparent, NULL); - return 0; + return 0; } int dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent) + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *srcvol = NULL; - + dht_local_t *local = NULL; + xlator_t *srcvol = NULL; - if (op_ret == -1) - goto err; + if (op_ret == -1) + goto err; - local = frame->local; - srcvol = local->linkfile.srcvol; + local = frame->local; + srcvol = local->linkfile.srcvol; - STACK_WIND (frame, dht_link_cbk, - srcvol, srcvol->fops->link, - &local->loc, &local->loc2); + STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link, + &local->loc, &local->loc2, xdata); - return 0; + return 0; err: - DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, - postparent); + DHT_STRIP_PHASE1_FLAGS (stbuf); + DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, NULL); - return 0; + return 0; } int dht_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - int op_errno = -1; - int ret = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (oldloc, err); - VALIDATE_OR_GOTO (newloc, err); - - cached_subvol = dht_subvol_get_cached (this, oldloc->inode); - if (!cached_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", oldloc->path); - op_errno = EINVAL; - goto err; - } + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; - hashed_subvol = dht_subvol_get_hashed (this, newloc); - if (!hashed_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume in layout for path=%s", - newloc->path); - op_errno = EINVAL; - goto err; - } + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (oldloc, err); + VALIDATE_OR_GOTO (newloc, err); - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK); + if (!local) { + op_errno = ENOMEM; - ret = loc_copy (&local->loc, oldloc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + goto err; + } - ret = loc_copy (&local->loc2, newloc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + cached_subvol = local->cached_subvol; + if (!cached_subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", oldloc->path); + op_errno = EINVAL; + goto err; + } - if (hashed_subvol != cached_subvol) { - dht_linkfile_create (frame, dht_link_linkfile_cbk, - cached_subvol, hashed_subvol, newloc); - } else { - STACK_WIND (frame, dht_link_cbk, - cached_subvol, cached_subvol->fops->link, - oldloc, newloc); - } + hashed_subvol = dht_subvol_get_hashed (this, newloc); + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume in layout for path=%s", + newloc->path); + op_errno = EINVAL; + goto err; + } - return 0; + ret = loc_copy (&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + goto err; + } + + if (hashed_subvol != cached_subvol) { + uuid_copy (local->gfid, oldloc->inode->gfid); + dht_linkfile_create (frame, dht_link_linkfile_cbk, this, + cached_subvol, hashed_subvol, newloc); + } else { + STACK_WIND (frame, dht_link_cbk, + cached_subvol, cached_subvol->fops->link, + oldloc, newloc, xdata); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - fd_t *fd, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent) + int op_ret, int op_errno, + fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - call_frame_t *prev = NULL; - int ret = -1; + call_frame_t *prev = NULL; + int ret = -1; dht_local_t *local = NULL; - if (op_ret == -1) - goto out; + if (op_ret == -1) + goto out; local = frame->local; if (!local) { @@ -3171,101 +3878,105 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - prev = cookie; + prev = cookie; - dht_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino); if (local->loc.parent) { - preparent->ia_ino = local->loc.parent->ino; - postparent->ia_ino = local->loc.parent->ino; + dht_inode_ctx_time_update (local->loc.parent, this, + preparent, 0); - WIPE (preparent); - WIPE (postparent); + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); } ret = dht_layout_preset (this, prev->this, inode); - if (ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "could not set preset layout for subvol %s", + if (ret != 0) { + gf_log (this->name, GF_LOG_DEBUG, + "could not set preset layout for subvol %s", prev->this->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - + op_ret = -1; + op_errno = EINVAL; + goto out; + } + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal (frame, this); + } out: - DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent, - postparent); - return 0; + DHT_STRIP_PHASE1_FLAGS (stbuf); + DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent, + postparent, NULL); + return 0; } int dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, + xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent) + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; if (op_ret == -1) goto err; - local = frame->local; - cached_subvol = local->cached_subvol; + local = frame->local; + cached_subvol = local->cached_subvol; STACK_WIND (frame, dht_create_cbk, cached_subvol, cached_subvol->fops->create, - &local->loc, local->flags, local->mode, local->fd); + &local->loc, local->flags, local->mode, + local->umask, local->fd, local->params); return 0; - err: - DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; +err: + DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); + return 0; } int dht_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) + loc_t *loc, int32_t flags, mode_t mode, + mode_t umask, fd_t *fd, dict_t *params) { - int op_errno = -1; - int ret = -1; - xlator_t *subvol = NULL; - dht_conf_t *conf = NULL; + int op_errno = -1; + xlator_t *subvol = NULL; dht_local_t *local = NULL; xlator_t *avail_subvol = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - conf = this->private; + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } + local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume in layout for path=%s", - loc->path); - op_errno = ENOENT; - goto err; - } + if (dht_filter_loc_subvol_key (this, loc, &local->loc, + &subvol)) { + gf_log (this->name, GF_LOG_INFO, + "creating %s on %s (got create on %s)", + local->loc.path, subvol->name, loc->path); + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + &local->loc, flags, mode, umask, fd, params); + goto done; + } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; goto err; } @@ -3274,153 +3985,158 @@ dht_create (call_frame_t *frame, xlator_t *this, "creating %s on %s", loc->path, subvol->name); STACK_WIND (frame, dht_create_cbk, subvol, subvol->fops->create, - loc, flags, mode, fd); - } else { - /* Choose the minimum filled volume, and create the - files there */ - /* TODO */ - avail_subvol = dht_free_disk_available_subvol (this, subvol); - if (avail_subvol != subvol) { - local->fd = fd_ref (fd); - local->flags = flags; - local->mode = mode; - - local->cached_subvol = avail_subvol; - local->hashed_subvol = subvol; - gf_log (this->name, GF_LOG_TRACE, - "creating %s on %s (link at %s)", loc->path, - avail_subvol->name, subvol->name); - dht_linkfile_create (frame, - dht_create_linkfile_create_cbk, - avail_subvol, subvol, loc); - } else { - gf_log (this->name, GF_LOG_TRACE, - "creating %s on %s", loc->path, subvol->name); - STACK_WIND (frame, dht_create_cbk, - subvol, subvol->fops->create, - loc, flags, mode, fd); - - } + loc, flags, mode, umask, fd, params); + goto done; } - - return 0; + /* Choose the minimum filled volume, and create the + files there */ + avail_subvol = dht_free_disk_available_subvol (this, subvol, local); + if (avail_subvol != subvol) { + local->params = dict_ref (params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + local->cached_subvol = avail_subvol; + local->hashed_subvol = subvol; + gf_log (this->name, GF_LOG_TRACE, + "creating %s on %s (link at %s)", loc->path, + avail_subvol->name, subvol->name); + dht_linkfile_create (frame, dht_create_linkfile_create_cbk, + this, avail_subvol, subvol, loc); + goto done; + } + gf_log (this->name, GF_LOG_TRACE, + "creating %s on %s", loc->path, subvol->name); + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, umask, fd, params); +done: + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); - return 0; + return 0; } int dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno) + xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + local = frame->local; + layout = local->selfheal.layout; - local = frame->local; - layout = local->selfheal.layout; - - if (op_ret == 0) { + if (op_ret == 0) { dht_layout_set (this, local->inode, layout); - local->stbuf.ia_ino = local->ia_ino; - local->stbuf.ia_gen = local->ia_gen; if (local->loc.parent) { - local->preparent.ia_ino = local->loc.parent->ino; - local->postparent.ia_ino = local->loc.parent->ino; + dht_inode_ctx_time_update (local->loc.parent, this, + &local->preparent, 0); - WIPE (&local->preparent); - WIPE (&local->postparent); + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); } - } + } - DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno, - local->inode, &local->stbuf, &local->preparent, - &local->postparent); + DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno, + local->inode, &local->stbuf, &local->preparent, + &local->postparent, NULL); - return 0; + return 0; } int dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent) + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - int ret = -1; - int subvol_filled = 0; - call_frame_t *prev = NULL; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + int ret = -1; + gf_boolean_t subvol_filled = _gf_false; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; - conf = this->private; - local = frame->local; - prev = cookie; - layout = local->layout; + local = frame->local; + prev = cookie; + layout = local->layout; subvol_filled = dht_is_subvol_filled (this, prev->this); - LOCK (&frame->lock); - { + LOCK (&frame->lock); + { if (subvol_filled && (op_ret != -1)) { ret = dht_layout_merge (this, layout, prev->this, -1, ENOSPC, NULL); } else { + if (op_ret == -1 && op_errno == EEXIST) + /* Very likely just a race between mkdir and + self-heal (from lookup of a concurrent mkdir + attempt). + Ignore error for now. layout setting will + anyways fail if this was a different (old) + pre-existing different directory. + */ + op_ret = 0; ret = dht_layout_merge (this, layout, prev->this, op_ret, op_errno, NULL); } + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to merge layouts", local->loc.path); - if (op_ret == -1) { - local->op_errno = op_errno; - goto unlock; - } - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); dht_iatt_merge (this, &local->preparent, preparent, prev->this); dht_iatt_merge (this, &local->postparent, postparent, prev->this); - - if (prev->this == dht_first_up_subvol (this)) { - local->ia_ino = local->stbuf.ia_ino; - local->ia_gen = local->stbuf.ia_gen; - } - - } + } unlock: - UNLOCK (&frame->lock); + UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk, - layout); - } + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk, + layout); + } return 0; } int -dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, +dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent) + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - int ret = -1; - call_frame_t *prev = NULL; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; - int i = 0; - xlator_t *hashed_subvol = NULL; + dht_local_t *local = NULL; + int ret = -1; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *hashed_subvol = NULL; - local = frame->local; - prev = cookie; - layout = local->layout; - conf = this->private; - hashed_subvol = local->hashed_subvol; + VALIDATE_OR_GOTO (this->private, err); + + local = frame->local; + prev = cookie; + layout = local->layout; + conf = this->private; + hashed_subvol = local->hashed_subvol; + + if (uuid_is_null (local->loc.gfid) && !op_ret) + uuid_copy (local->loc.gfid, stbuf->ia_gfid); if (dht_is_subvol_filled (this, hashed_subvol)) ret = dht_layout_merge (this, layout, prev->this, @@ -3428,49 +4144,55 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, else ret = dht_layout_merge (this, layout, prev->this, op_ret, op_errno, NULL); - - if (op_ret == -1) { - local->op_errno = op_errno; - goto err; - } - local->op_ret = 0; - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); + /* TODO: we may have to return from the function + if layout merge fails. For now, lets just log an error */ + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to merge layouts", local->loc.path); + + if (op_ret == -1) { + local->op_errno = op_errno; + goto err; + } + local->op_ret = 0; + + dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); dht_iatt_merge (this, &local->preparent, preparent, prev->this); dht_iatt_merge (this, &local->postparent, postparent, prev->this); - local->ia_ino = local->stbuf.ia_ino; - local->ia_gen = local->stbuf.ia_gen; + local->call_cnt = conf->subvolume_cnt - 1; - local->call_cnt = conf->subvolume_cnt - 1; - - if (local->call_cnt == 0) { - dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, - &local->loc, layout); - } - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == hashed_subvol) - continue; - STACK_WIND (frame, dht_mkdir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->mkdir, - &local->loc, local->mode); - } - return 0; + if (uuid_is_null (local->loc.gfid)) + uuid_copy (local->loc.gfid, stbuf->ia_gfid); + if (local->call_cnt == 0) { + dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, + &local->loc, layout); + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == hashed_subvol) + continue; + STACK_WIND (frame, dht_mkdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->mkdir, &local->loc, + local->mode, local->umask, local->params); + } + return 0; err: - DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL); + DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL); return 0; } -int + + int dht_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode) + loc_t *loc, mode_t mode, mode_t umask, dict_t *params) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; int op_errno = -1; - int ret = -1; - xlator_t *hashed_subvol = NULL; + xlator_t *hashed_subvol = NULL; VALIDATE_OR_GOTO (frame, err); @@ -3478,147 +4200,234 @@ dht_mkdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (this->private, err); - conf = this->private; + conf = this->private; dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } - - hashed_subvol = dht_subvol_get_hashed (this, loc); + local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR); + if (!local) { + op_errno = ENOMEM; + goto err; + } - if (hashed_subvol == NULL) { - gf_log (this->name, GF_LOG_DEBUG, - "hashed subvol not found for %s", + hashed_subvol = dht_subvol_get_hashed (this, loc); + if (hashed_subvol == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "hashed subvol not found for %s", loc->path); - op_errno = EINVAL; - goto err; - } + op_errno = EINVAL; + goto err; + } - local->hashed_subvol = hashed_subvol; - local->inode = inode_ref (loc->inode); - ret = loc_copy (&local->loc, loc); - local->mode = mode; + local->hashed_subvol = hashed_subvol; + local->mode = mode; + local->umask = umask; + local->params = dict_ref (params); + local->inode = inode_ref (loc->inode); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } - - local->layout = dht_layout_new (this, conf->subvolume_cnt); - if (!local->layout) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } - STACK_WIND (frame, dht_mkdir_hashed_cbk, - hashed_subvol, - hashed_subvol->fops->mkdir, - loc, mode); + STACK_WIND (frame, dht_mkdir_hashed_cbk, + hashed_subvol, + hashed_subvol->fops->mkdir, + loc, mode, umask, params); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL); - return 0; + return 0; } int dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno) + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; + + DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); + + return 0; +} + + +int +dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = -1; + if (op_errno != ENOENT && op_errno != EACCES) { + local->need_selfheal = 1; + } + + + gf_log (this->name, GF_LOG_DEBUG, + "rmdir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto unlock; + } + + dht_iatt_merge (this, &local->preparent, preparent, prev->this); + dht_iatt_merge (this, &local->postparent, postparent, + prev->this); - if (local->loc.parent) { - local->preparent.ia_ino = local->loc.parent->ino; - local->postparent.ia_ino = local->loc.parent->ino; } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + if (local->need_selfheal) { + local->layout = + dht_layout_get (this, local->loc.inode); - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent); + /* TODO: neater interface needed below */ + local->stbuf.ia_type = local->loc.inode->ia_type; - return 0; + uuid_copy (local->gfid, local->loc.inode->gfid); + dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, + &local->loc, local->layout); + } else { + + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->preparent, + 0); + + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->postparent, + 1); + } + + DHT_STACK_UNWIND (rmdir, frame, local->op_ret, + local->op_errno, &local->preparent, + &local->postparent, NULL); + } + } + + return 0; } int dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent) + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + int done = 0; - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - local->op_ret = -1; - - if (op_errno != ENOENT) - local->need_selfheal = 1; - - gf_log (this->name, GF_LOG_DEBUG, - "rmdir on %s for %s failed (%s)", - prev->this->name, local->loc.path, - strerror (op_errno)); - goto unlock; - } + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = -1; + if (op_errno != ENOENT && op_errno != EACCES) { + local->need_selfheal = 1; + } + + gf_log (this->name, GF_LOG_DEBUG, + "rmdir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto unlock; + } + + /* Track if rmdir succeeded on atleast one subvol*/ + local->fop_succeeded = 1; dht_iatt_merge (this, &local->preparent, preparent, prev->this); dht_iatt_merge (this, &local->postparent, postparent, prev->this); - } + } unlock: - UNLOCK (&frame->lock); + UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - if (local->need_selfheal) { + this_call_cnt = dht_frame_return (frame); + + /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */ + if (local->hashed_subvol && (this_call_cnt == 1)) { + done = 1; + } else if (!local->hashed_subvol && !this_call_cnt) { + done = 1; + } + + + if (done) { + if (local->need_selfheal && local->fop_succeeded) { local->layout = dht_layout_get (this, local->loc.inode); - /* TODO: neater interface needed below */ - local->stbuf.ia_type = local->loc.inode->ia_type; + /* TODO: neater interface needed below */ + local->stbuf.ia_type = local->loc.inode->ia_type; + + uuid_copy (local->gfid, local->loc.inode->gfid); + dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, + &local->loc, local->layout); + } else if (this_call_cnt) { + /* If non-hashed subvol's have responded, proceed */ + + local->need_selfheal = 0; + STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk, + local->hashed_subvol, + local->hashed_subvol->fops->rmdir, + &local->loc, local->flags, NULL); + } else if (!this_call_cnt) { + /* All subvol's have responded, proceed */ - dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, - &local->loc, local->layout); - } else { if (local->loc.parent) { - local->preparent.ia_ino = - local->loc.parent->ino; - local->postparent.ia_ino = - local->loc.parent->ino; - WIPE (&local->preparent); - WIPE (&local->postparent); + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->preparent, + 0); + + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->postparent, + 1); + } - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, - local->op_errno, &local->preparent, - &local->postparent); - } - } + DHT_STACK_UNWIND (rmdir, frame, local->op_ret, + local->op_errno, &local->preparent, + &local->postparent, NULL); + } + } return 0; } @@ -3627,38 +4436,64 @@ unlock: int dht_rmdir_do (call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int i = 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *hashed_subvol = NULL; - conf = this->private; - local = frame->local; + VALIDATE_OR_GOTO (this->private, err); - if (local->op_ret == -1) - goto err; + conf = this->private; + local = frame->local; - local->call_cnt = conf->subvolume_cnt; + if (local->op_ret == -1) + goto err; - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_rmdir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->rmdir, - &local->loc); - } + local->call_cnt = conf->subvolume_cnt; - return 0; + /* first remove from non-hashed_subvol */ + hashed_subvol = dht_subvol_get_hashed (this, &local->loc); + + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_WARNING, "failed to get hashed " + "subvol for %s",local->loc.path); + } else { + local->hashed_subvol = hashed_subvol; + } + + /* When DHT has only 1 child */ + if (conf->subvolume_cnt == 1) { + STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk, + conf->subvolumes[0], + conf->subvolumes[0]->fops->rmdir, + &local->loc, local->flags, NULL); + return 0; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (hashed_subvol && + (hashed_subvol == conf->subvolumes[i])) + continue; + + STACK_WIND (frame, dht_rmdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->rmdir, + &local->loc, local->flags, NULL); + } + + return 0; err: - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent); - return 0; + DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); + return 0; } int dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { dht_local_t *local = NULL; call_frame_t *prev = NULL; @@ -3706,6 +4541,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_frame_t *main_frame = NULL; dht_local_t *main_local = NULL; int this_call_cnt = 0; + dht_conf_t *conf = this->private; local = frame->local; prev = cookie; @@ -3717,7 +4553,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret != 0) goto err; - if (check_is_linkfile (inode, stbuf, xattr) == 0) { + if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { main_local->op_ret = -1; main_local->op_errno = ENOTEMPTY; @@ -3728,7 +4564,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk, - src, src->fops->unlink, &local->loc); + src, src->fops->unlink, &local->loc, 0, NULL); return 0; err: @@ -3751,6 +4587,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, call_frame_t *lookup_frame = NULL; dht_local_t *lookup_local = NULL; dht_local_t *local = NULL; + dict_t *xattrs = NULL; + dht_conf_t *conf = this->private; local = frame->local; @@ -3759,7 +4597,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, continue; if (strcmp (trav->d_name, "..") == 0) continue; - if (check_is_linkfile (NULL, (&trav->d_stat), NULL) == 1) { + if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict, + conf->link_xattr_name)) { ret++; continue; } @@ -3771,6 +4610,21 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, return 0; } + xattrs = dict_new (); + if (!xattrs) { + gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); + return -1; + } + + ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key" + " in dict"); + if (xattrs) + dict_unref (xattrs); + return -1; + } + list_for_each_entry (trav, &entries->list, list) { if (strcmp (trav->d_name, ".") == 0) continue; @@ -3782,18 +4636,13 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, lookup_frame = copy_frame (frame); if (!lookup_frame) { - gf_log (this->name, GF_LOG_ERROR, - "Out of Memory"); /* out of memory, let the rmdir fail (as non-empty, unfortunately) */ goto err; } - lookup_local = GF_CALLOC (sizeof (*local), 1, - gf_dht_mt_dht_local_t); + lookup_local = mem_get0 (this->local_pool); if (!lookup_local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of Memory"); goto err; } @@ -3805,6 +4654,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, if (build_ret != 0) goto err; + uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid); + gf_log (this->name, GF_LOG_TRACE, "looking up %s on %s", lookup_local->loc.path, src->name); @@ -3817,12 +4668,18 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk, src, src->fops->lookup, - &lookup_local->loc, NULL); + &lookup_local->loc, xattrs); ret++; } + if (xattrs) + dict_unref (xattrs); + return ret; err: + if (xattrs) + dict_unref (xattrs); + DHT_STACK_DESTROY (lookup_frame); return 0; } @@ -3830,19 +4687,20 @@ err: int dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries) + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = -1; - call_frame_t *prev = NULL; + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; xlator_t *src = NULL; int ret = 0; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; src = prev->this; - if (op_ret > 2) { + if (op_ret > 2) { ret = dht_rmdir_is_subvol_empty (frame, this, entries, src); switch (ret) { @@ -3860,710 +4718,495 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, prev->this->name, local->loc.path, ret); break; } - } + } - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - dht_rmdir_do (frame, this); - } + if (is_last_call (this_call_cnt)) { + dht_rmdir_do (frame, this); + } - return 0; + return 0; } int dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd) -{ - dht_local_t *local = NULL; - int this_call_cnt = -1; - call_frame_t *prev = NULL; - - - local = frame->local; - prev = cookie; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "opendir on %s for %s failed (%s)", - prev->this->name, local->loc.path, - strerror (op_errno)); - goto err; - } - - STACK_WIND (frame, dht_rmdir_readdirp_cbk, - prev->this, prev->this->fops->readdirp, - local->fd, 4096, 0); - - return 0; - -err: - this_call_cnt = dht_frame_return (frame); - - if (is_last_call (this_call_cnt)) { - dht_rmdir_do (frame, this); - } - - return 0; -} - - -int -dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - int i = -1; - int ret = -1; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - conf = this->private; - - local = dht_local_init (frame); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } - - local->call_cnt = conf->subvolume_cnt; - local->op_ret = 0; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } - - local->fd = fd_create (local->loc.inode, frame->root->pid); - if (!local->fd) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_errno = ENOMEM; - goto err; - } - - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_rmdir_opendir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->opendir, - loc, local->fd); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (rmdir, frame, -1, op_errno, - NULL, NULL); - - return 0; -} - - -int -dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { - DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict); - return 0; -} - + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + dict_t *dict = NULL; + int ret = 0; + dht_conf_t *conf = this->private; -int -dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; + local = frame->local; + prev = cookie; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "opendir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + if (op_errno != ENOENT) { + local->op_ret = -1; + local->op_errno = op_errno; + } + goto err; + } - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + dict = dict_new (); + if (!dict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + ret = dict_set_uint32 (dict, conf->link_xattr_name, 256); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set '%s' key", + local->loc.path, conf->link_xattr_name); - local->inode = inode_ref (loc->inode); - local->call_cnt = 1; + STACK_WIND (frame, dht_rmdir_readdirp_cbk, + prev->this, prev->this->fops->readdirp, + local->fd, 4096, 0, dict); - STACK_WIND (frame, - dht_xattrop_cbk, - subvol, subvol->fops->xattrop, - loc, flags, dict); + if (dict) + dict_unref (dict); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict); - return 0; -} - - -int -dht_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - STACK_WIND (frame, - dht_fxattrop_cbk, - subvol, subvol->fops->fxattrop, - fd, flags, dict); - - return 0; + this_call_cnt = dht_frame_return (frame); -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL); + if (is_last_call (this_call_cnt)) { + dht_rmdir_do (frame, this); + } - return 0; + return 0; } int -dht_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - +dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno); - return 0; -} - - -int32_t -dht_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *lock) -{ - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; int op_errno = -1; - dht_local_t *local = NULL; - + int i = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (this->private, err); - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = 1; - - STACK_WIND (frame, - dht_inodelk_cbk, - subvol, subvol->fops->inodelk, - volume, loc, cmd, lock); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (inodelk, frame, -1, op_errno); - - return 0; -} - - -int -dht_finodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ - DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno); - return 0; -} + conf = this->private; + local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR); + if (!local) { + op_errno = ENOMEM; + goto err; + } -int -dht_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *lock) -{ - xlator_t *subvol = NULL; - int op_errno = -1; + local->call_cnt = conf->subvolume_cnt; + local->op_ret = 0; + local->fop_succeeded = 0; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + local->flags = flags; - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + local->fd = fd_create (local->loc.inode, frame->root->pid); + if (!local->fd) { + op_errno = ENOMEM; + goto err; + } - STACK_WIND (frame, - dht_finodelk_cbk, - subvol, subvol->fops->finodelk, - volume, fd, cmd, lock); + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rmdir_opendir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + loc, local->fd, NULL); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (finodelk, frame, -1, op_errno); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (rmdir, frame, -1, op_errno, + NULL, NULL, NULL); - return 0; + return 0; } - int dht_entrylk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) + xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno); - return 0; + DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata); + return 0; } int dht_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - xlator_t *subvol = NULL; + xlator_t *subvol = NULL; int op_errno = -1; - dht_local_t *local = NULL; + dht_local_t *local = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } - local->inode = inode_ref (loc->inode); - local->call_cnt = 1; + local->call_cnt = 1; - STACK_WIND (frame, dht_entrylk_cbk, - subvol, subvol->fops->entrylk, - volume, loc, basename, cmd, type); + STACK_WIND (frame, dht_entrylk_cbk, + subvol, subvol->fops->entrylk, + volume, loc, basename, cmd, type, xdata); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (entrylk, frame, -1, op_errno); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL); - return 0; + return 0; } int dht_fentrylk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) + xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno); - return 0; + DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL); + return 0; } int dht_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - xlator_t *subvol = NULL; + xlator_t *subvol = NULL; int op_errno = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, dht_fentrylk_cbk, - subvol, subvol->fops->fentrylk, - volume, fd, basename, cmd, type); + STACK_WIND (frame, dht_fentrylk_cbk, + subvol, subvol->fops->fentrylk, + volume, fd, basename, cmd, type, xdata); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL); - return 0; + return 0; } int -dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost) +dht_forget (xlator_t *this, inode_t *inode) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - + uint64_t ctx_int = 0; + dht_inode_ctx_t *ctx = NULL; + dht_layout_t *layout = NULL; - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } + inode_ctx_del (inode, this, &ctx_int); - dht_iatt_merge (this, &local->prebuf, statpre, prev->this); - dht_iatt_merge (this, &local->stbuf, statpost, prev->this); - - if (local->inode) { - local->prebuf.ia_ino = local->inode->ino; - local->stbuf.ia_ino = local->inode->ino; - } + if (!ctx_int) + return 0; - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); + ctx = (dht_inode_ctx_t *) (long) ctx_int; - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->stbuf); + layout = ctx->layout; + ctx->layout = NULL; + dht_layout_unref (this, layout); + GF_FREE (ctx); return 0; } int -dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) +dht_notify (xlator_t *this, int event, void *data, ...) { - dht_layout_t *layout = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int i = -1; + xlator_t *subvol = NULL; + int cnt = -1; + int i = -1; + dht_conf_t *conf = NULL; + int ret = -1; + int propagate = 0; + + int had_heard_from_all = 0; + int have_heard_from_all = 0; + struct timeval time = {0,}; + gf_defrag_info_t *defrag = NULL; + dict_t *dict = NULL; + gf_defrag_type cmd = 0; + dict_t *output = NULL; + va_list ap; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); + conf = this->private; + if (!conf) + return ret; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, - "memory allocation failed :("); - goto err; - } + /* had all subvolumes reported status once till now? */ + had_heard_from_all = 1; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->last_event[i]) { + had_heard_from_all = 0; + } + } - local->layout = layout = dht_layout_get (this, loc->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + switch (event) { + case GF_EVENT_CHILD_UP: + subvol = data; - if (!layout_is_sane (layout)) { - gf_log (this->name, GF_LOG_DEBUG, - "layout is not sane for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + conf->gen++; - local->inode = inode_ref (loc->inode); - local->call_cnt = layout->cnt; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } - for (i = 0; i < layout->cnt; i++) { - STACK_WIND (frame, dht_setattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator->fops->setattr, - loc, stbuf, valid); - } + if (cnt == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_CHILD_UP bad subvolume %s", + subvol->name); + break; + } - return 0; + gettimeofday (&time, NULL); + LOCK (&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 1; + conf->last_event[cnt] = event; + conf->subvol_up_time[cnt] = time.tv_sec; + } + UNLOCK (&conf->subvolume_lock); -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL); + /* one of the node came back up, do a stat update */ + dht_get_du_info_for_subvol (this, cnt); - return 0; -} + break; + case GF_EVENT_CHILD_MODIFIED: + subvol = data; -int -dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, - int32_t valid) -{ - dht_layout_t *layout = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int i = -1; + conf->gen++; + propagate = 1; + break; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + case GF_EVENT_CHILD_DOWN: + subvol = data; - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - goto err; - } + if (conf->assert_no_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "Received CHILD_DOWN. Exiting"); + if (conf->defrag) { + gf_defrag_stop (conf->defrag, NULL); + } else { + kill (getpid(), SIGTERM); + } + } - local->layout = layout = dht_layout_get (this, fd->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } - if (!layout_is_sane (layout)) { - gf_log (this->name, GF_LOG_DEBUG, - "layout is not sane for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + if (cnt == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_CHILD_DOWN bad subvolume %s", + subvol->name); + break; + } - local->inode = inode_ref (fd->inode); - local->call_cnt = layout->cnt; + LOCK (&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 0; + conf->last_event[cnt] = event; + conf->subvol_up_time[cnt] = 0; + } + UNLOCK (&conf->subvolume_lock); - for (i = 0; i < layout->cnt; i++) { - STACK_WIND (frame, dht_setattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator->fops->fsetattr, - fd, stbuf, valid); - } + break; - return 0; + case GF_EVENT_CHILD_CONNECTING: + subvol = data; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL); + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } - return 0; -} + if (cnt == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_CHILD_CONNECTING bad subvolume %s", + subvol->name); + break; + } + LOCK (&conf->subvolume_lock); + { + conf->last_event[cnt] = event; + } + UNLOCK (&conf->subvolume_lock); -int -dht_forget (xlator_t *this, inode_t *inode) -{ - uint64_t tmp_layout = 0; - dht_layout_t *layout = NULL; + break; + case GF_EVENT_VOLUME_DEFRAG: + { + if (!conf->defrag) { + return ret; + } + defrag = conf->defrag; - inode_ctx_get (inode, this, &tmp_layout); + dict = data; + va_start (ap, data); + output = va_arg (ap, dict_t*); - if (!tmp_layout) - return 0; + ret = dict_get_int32 (dict, "rebalance-command", + (int32_t*)&cmd); + if (ret) + return ret; + LOCK (&defrag->lock); + { + if (defrag->is_exiting) + goto unlock; + if (cmd == GF_DEFRAG_CMD_STATUS) + gf_defrag_status_get (defrag, output); + else if (cmd == GF_DEFRAG_CMD_STOP) + gf_defrag_stop (defrag, output); + } +unlock: + UNLOCK (&defrag->lock); + return 0; + break; + } - layout = (dht_layout_t *)(long)tmp_layout; - dht_layout_unref (this, layout); + default: + propagate = 1; + break; + } - return 0; -} + /* have all subvolumes reported status once by now? */ + have_heard_from_all = 1; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->last_event[i]) + have_heard_from_all = 0; + } + /* if all subvols have reported status, no need to hide anything + or wait for anything else. Just propagate blindly */ + if (have_heard_from_all) { + propagate = 1; -int -dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) -{ - xlator_list_t *subvols = NULL; - int cnt = 0; + } - for (subvols = this->children; subvols; subvols = subvols->next) - cnt++; + if (!had_heard_from_all && have_heard_from_all) { + /* This is the first event which completes aggregation + of events from all subvolumes. If at least one subvol + had come up, propagate CHILD_UP, but only this time + */ + event = GF_EVENT_CHILD_DOWN; - conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *), - gf_dht_mt_xlator_t); - if (!conf->subvolumes) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - return -1; - } - conf->subvolume_cnt = cnt; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->last_event[i] == GF_EVENT_CHILD_UP) { + event = GF_EVENT_CHILD_UP; + break; + } - cnt = 0; - for (subvols = this->children; subvols; subvols = subvols->next) - conf->subvolumes[cnt++] = subvols->xlator; + if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) { + event = GF_EVENT_CHILD_CONNECTING; + /* continue to check other events for CHILD_UP */ + } + } - conf->subvolume_status = GF_CALLOC (cnt, sizeof (char), - gf_dht_mt_char); - if (!conf->subvolume_status) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - return -1; - } + /* rebalance is started with assert_no_child_down. So we do + * not need to handle CHILD_DOWN event here. + */ + if (conf->defrag) { + ret = gf_thread_create (&conf->defrag->th, NULL, + gf_defrag_start, this); + if (ret) { + conf->defrag = NULL; + GF_FREE (conf->defrag); + kill (getpid(), SIGTERM); + } + } + } - return 0; -} + ret = 0; + if (propagate) + ret = default_notify (this, event, data); + return ret; +} int -dht_notify (xlator_t *this, int event, void *data, ...) +dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout) { - xlator_t *subvol = NULL; - int cnt = -1; - int i = -1; - dht_conf_t *conf = NULL; - int ret = -1; - - - conf = this->private; - - switch (event) { - case GF_EVENT_CHILD_UP: - subvol = data; - - conf->gen++; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - cnt = i; - break; - } - } - - if (cnt == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "got GF_EVENT_CHILD_UP bad subvolume %s", - subvol->name); - break; - } - - LOCK (&conf->subvolume_lock); - { - conf->subvolume_status[cnt] = 1; - } - UNLOCK (&conf->subvolume_lock); + dht_inode_ctx_t *ctx = NULL; + int ret = -1; - /* one of the node came back up, do a stat update */ - dht_get_du_info_for_subvol (this, cnt); - - break; - - case GF_EVENT_CHILD_DOWN: - subvol = data; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - cnt = i; - break; - } - } - - if (cnt == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "got GF_EVENT_CHILD_DOWN bad subvolume %s", - subvol->name); - break; - } + ret = dht_inode_ctx_get (inode, this, &ctx); - LOCK (&conf->subvolume_lock); - { - conf->subvolume_status[cnt] = 0; - } - UNLOCK (&conf->subvolume_lock); - - break; - } - - ret = default_notify (this, event, data); + if (!ret && ctx) { + if (ctx->layout) { + if (layout) + *layout = ctx->layout; + ret = 0; + } else { + ret = -1; + } + } - return ret; + return ret; } - |
