diff options
author | Susant Palai <spalai@redhat.com> | 2015-06-16 20:35:46 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2015-08-23 23:06:31 -0700 |
commit | 1626fb105c99ef623be3687f3f48d9247ab9b7c4 (patch) | |
tree | cda2fc88f521eaf69041ca7a8d343dad02f7c87c /xlators/cluster/dht/src/dht-common.c | |
parent | a586b30c1bd968d23562406cefbb76b82a0e236c (diff) |
dht: block/handle create op falling to decommissioned brick
Problem:
Post remove-brick start till commit phase, the client layout
may not be in sync with disk layout because of lack of lookup.
Hence,a create call may fall on the decommissioned brick.
Solution:
Will acquire a lock on hashed subvol. So that a fix-layout or
selfheal can not step on layout while reading the layout.
Even if we read a layout before remove-brick fix-layout and the
file falls on the decommissioned brick, the file should be
migrated to a new brick as per the fix-layout.
Change-Id: If84a12ec34f981adb2b9b224e80f535cfe5bf9f2
BUG: 1232378
Signed-off-by: Susant Palai <spalai@redhat.com>
Reviewed-on: http://review.gluster.org/11260
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 456 |
1 files changed, 408 insertions, 48 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index dbbb7e59bc0..1195c3bb49d 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -34,7 +34,6 @@ dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); int dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); - int dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value) { @@ -3512,7 +3511,6 @@ err: return 0; } - static int dht_common_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @@ -5473,9 +5471,6 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int ret = -1; dht_local_t *local = NULL; - if (op_ret == -1) - goto out; - local = frame->local; if (!local) { op_ret = -1; @@ -5483,6 +5478,9 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } + if (op_ret == -1) + goto out; + prev = cookie; if (local->loc.parent) { @@ -5502,18 +5500,34 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; goto out; } + + local->op_errno = op_errno; + if (local->linked == _gf_true) { local->stbuf = *stbuf; dht_linkfile_attr_heal (frame, this); } out: + DHT_STRIP_PHASE1_FLAGS (stbuf); - DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent, - postparent, xdata); + + if (local && local->lock.locks) { + /* store op_errno for failure case*/ + local->op_errno = op_errno; + local->refresh_layout_unlock (frame, this, op_ret); + + if (op_ret == 0) { + DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, + inode, stbuf, preparent, postparent, + xdata); + } + } else { + DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, + stbuf, preparent, postparent, xdata); + } return 0; } - int dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -5525,8 +5539,10 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, dht_local_t *local = NULL; xlator_t *cached_subvol = NULL; - if (op_ret == -1) + if (op_ret == -1) { + local->op_errno = op_errno; goto err; + } local = frame->local; cached_subvol = local->cached_subvol; @@ -5538,25 +5554,327 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, return 0; err: - DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); + if (local->lock.locks) + local->refresh_layout_unlock (frame, this, -1); + + return 0; +} + +int +dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, + dict_t *params) +{ + dht_local_t *local = NULL; + xlator_t *avail_subvol = NULL; + + local = frame->local; + + if (!dht_is_subvol_filled (this, subvol)) { + gf_msg_debug (this->name, 0, + "creating %s on %s", loc->path, + subvol->name); + + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, umask, fd, params); + + } else { + avail_subvol = dht_free_disk_available_subvol (this, subvol, local); + + if (avail_subvol != subvol) { + local->params = dict_ref (params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + local->cached_subvol = avail_subvol; + local->hashed_subvol = subvol; + + gf_msg_debug (this->name, 0, + "creating %s on %s (link at %s)", loc->path, + avail_subvol->name, subvol->name); + + dht_linkfile_create (frame, dht_create_linkfile_create_cbk, + this, avail_subvol, subvol, loc); + + goto out; + } + + gf_msg_debug (this->name, 0, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, umask, fd, params); + } +out: return 0; } int +dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child, + int32_t *op_errno) +{ + inode_table_t *table = NULL; + int ret = -1; + + if (!parent || !child) { + if (op_errno) + *op_errno = EINVAL; + goto out; + } + + if (child->parent) { + parent->inode = inode_ref (child->parent); + if (!parent->inode) { + if (op_errno) + *op_errno = EINVAL; + goto out; + } + + gf_uuid_copy (parent->gfid, child->pargfid); + + ret = 0; + + goto out; + } else { + if (gf_uuid_is_null (child->pargfid)) { + if (op_errno) + *op_errno = EINVAL; + goto out; + } + + table = this->itable; + + if (!table) { + if (op_errno) { + *op_errno = EINVAL; + goto out; + } + } + + parent->inode = inode_find (table, child->pargfid); + + if (!parent->inode) { + if (op_errno) { + *op_errno = EINVAL; + goto out; + } + } + + gf_uuid_copy (parent->gfid, child->pargfid); + + ret = 0; + } + +out: + return ret; +} + + +int32_t +dht_create_do (call_frame_t *frame) +{ + dht_local_t *local = NULL; + dht_layout_t *refreshed = NULL; + xlator_t *subvol = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; + + local = frame->local; + + this = THIS; + + conf = this->private; + + GF_VALIDATE_OR_GOTO (this->name, conf, err); + + methods = conf->methods; + + GF_VALIDATE_OR_GOTO (this->name, conf->methods, err); + + /* We don't need parent_loc anymore */ + loc_wipe (&local->loc); + + loc_copy (&local->loc, &local->loc2); + + loc_wipe (&local->loc2); + + refreshed = local->selfheal.refreshed_layout; + + subvol = methods->layout_search (this, refreshed, local->loc.name); + + if (!subvol) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in " + "layout for path=%s", local->loc.path); + local->op_errno = ENOENT; + goto err; + } + + dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc, + local->flags, local->mode, + local->umask, local->fd, local->params); + return 0; +err: + local->refresh_layout_unlock (frame, this, -1); + + return 0; +} + +int32_t +dht_create_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + DHT_STACK_DESTROY (frame); + return 0; +} + +int32_t +dht_create_finish (call_frame_t *frame, xlator_t *this, int op_ret) +{ + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + int lock_count = 0; + + local = frame->local; + lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count); + if (lock_count == 0) + goto done; + + lock_frame = copy_frame (frame); + if (lock_frame == NULL) { + goto done; + } + + lock_local = dht_local_init (lock_frame, &local->loc, NULL, + lock_frame->root->op); + if (lock_local == NULL) { + goto done; + } + + lock_local->lock.locks = local->lock.locks; + lock_local->lock.lk_count = local->lock.lk_count; + + local->lock.locks = NULL; + local->lock.lk_count = 0; + + dht_unlock_inodelk (lock_frame, lock_local->lock.locks, + lock_local->lock.lk_count, + dht_create_unlock_cbk); + lock_frame = NULL; + +done: + if (lock_frame != NULL) { + DHT_STACK_DESTROY (lock_frame); + } + + if (op_ret == 0) + return 0; + + DHT_STACK_UNWIND (create, frame, op_ret, local->op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +dht_create_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + + local = frame->local; + + if (!local) { + goto err; + } + + if (op_ret < 0) { + gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "Create lock failed for file: %s", local->loc2.name); + + local->op_errno = op_errno; + + goto err; + } + + local->refresh_layout_unlock = dht_create_finish; + + local->refresh_layout_done = dht_create_do; + + dht_refresh_layout (frame); + + return 0; +err: + dht_create_finish (frame, this, -1); + return 0; +} + +int32_t +dht_create_lock (call_frame_t *frame, xlator_t *subvol) +{ + dht_local_t *local = NULL; + int count = 1, ret = -1; + dht_lock_t **lk_array = NULL; + + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err); + + local = frame->local; + + lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char); + + if (lk_array == NULL) + goto err; + + lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK, + DHT_LAYOUT_HEAL_DOMAIN); + + if (lk_array[0] == NULL) + goto err; + + local->lock.locks = lk_array; + local->lock.lk_count = count; + + ret = dht_blocking_inodelk (frame, lk_array, count, + dht_create_lock_cbk); + + if (ret < 0) { + local->lock.locks = NULL; + local->lock.lk_count = 0; + goto err; + } + + return 0; +err: + if (lk_array != NULL) { + dht_lock_array_free (lk_array, count); + GF_FREE (lk_array); + } + + return -1; +} + +int dht_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { - int op_errno = -1; - xlator_t *subvol = NULL; - dht_local_t *local = NULL; - xlator_t *avail_subvol = NULL; + int op_errno = -1; + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + int i = 0; + dht_conf_t *conf = NULL; + int ret = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); + conf = this->private; + dht_get_du_info (frame, this, loc); local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); @@ -5579,48 +5897,90 @@ dht_create (call_frame_t *frame, xlator_t *this, subvol = dht_subvol_get_hashed (this, loc); if (!subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - loc->path); + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; goto err; } - if (!dht_is_subvol_filled (this, subvol)) { - gf_msg_trace (this->name, 0, - "creating %s on %s", loc->path, - subvol->name); - STACK_WIND (frame, dht_create_cbk, - subvol, subvol->fops->create, - loc, flags, mode, umask, fd, params); - goto done; - } - /* Choose the minimum filled volume, and create the - files there */ - avail_subvol = dht_free_disk_available_subvol (this, subvol, local); - if (avail_subvol != subvol) { - local->params = dict_ref (params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - local->cached_subvol = avail_subvol; - local->hashed_subvol = subvol; - gf_msg_trace (this->name, 0, - "creating %s on %s (link at %s)", loc->path, - avail_subvol->name, subvol->name); - dht_linkfile_create (frame, dht_create_linkfile_create_cbk, - this, avail_subvol, subvol, loc); - goto done; + /* Post remove-brick, the client layout may not be in sync with + * disk layout because of lack of lookup. Hence,a create call + * may fall on the decommissioned brick. Hence, if the + * hashed_subvol is part of decommissioned bricks list, do a + * lookup on parent dir. If a fix-layout is already done by the + * remove-brick process, the parent directory layout will be in + * sync with that of the disk. If fix-layout is still ending + * on the parent directory, we can let the file get created on + * the decommissioned brick which will be eventually migrated to + * non-decommissioned brick based on the new layout. + */ + + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] && + conf->decommissioned_bricks[i] == subvol) { + + gf_msg_debug (this->name, 0, "hashed subvol:%s is " + "part of decommission brick list for " + "file: %s", subvol->name, loc->path); + + /* dht_refresh_layout needs directory info in + * local->loc. Hence, storing the parent_loc in + * local->loc and storing the create context in + * local->loc2. We will restore this information + * in dht_creation do */ + + ret = loc_copy (&local->loc2, &local->loc); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_NO_MEMORY, + "loc_copy failed %s", loc->path); + + goto err; + } + + local->params = dict_ref (params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + + loc_wipe (&local->loc); + + ret = dht_build_parent_loc (this, &local->loc, loc, + &op_errno); + + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_NO_MEMORY, + "parent loc build failed"); + goto err; + } + + ret = dht_create_lock (frame, subvol); + + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_INODE_LK_ERROR, + "locking parent failed"); + goto err; + } + + goto done; + } + } } - gf_msg_trace (this->name, 0, - "creating %s on %s", loc->path, subvol->name); - STACK_WIND (frame, dht_create_cbk, - subvol, subvol->fops->create, - loc, flags, mode, umask, fd, params); + + + dht_create_wind_to_avail_subvol (frame, this, subvol, loc, flags, mode, + umask, fd, params); done: return 0; err: + op_errno = (op_errno == -1) ? errno : op_errno; DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, NULL); |