From c87817495b3c5c36dcca9d157e9313b7d3195eed Mon Sep 17 00:00:00 2001 From: Susant Palai Date: Fri, 27 Dec 2019 12:06:19 +0530 Subject: dht: Fix stale-layout and create issue Problem: With lookup-optimize set to on by default, a client with stale-layout can create a new file on a wrong subvol. This will lead to possible duplicate files if two different clients attempt to create the same file with two different layouts. Solution: Send in-memory layout to be cross checked at posix before commiting a "create". In case of a mismatch, sync the client layout with that of the server and attempt the create fop one more time. test: Manual, testcase(attached) fixes: bz#1786679 Change-Id: Ife0941f105113f1c572f4363cbcee65e0dd9bd6a Signed-off-by: Susant Palai --- xlators/cluster/dht/src/dht-common.c | 149 ++++++++++++++++++++--- xlators/cluster/dht/src/dht-common.h | 6 + xlators/protocol/client/src/client-rpc-fops_v2.c | 9 +- xlators/storage/posix/src/posix-entry-ops.c | 29 ++++- xlators/storage/posix/src/posix-helpers.c | 76 ++++++++++++ xlators/storage/posix/src/posix.h | 4 + 6 files changed, 253 insertions(+), 20 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index ce211e6c642..2231af647de 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -8341,6 +8341,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, xlator_t *prev = NULL; int ret = -1; dht_local_t *local = NULL; + gf_boolean_t parent_layout_changed = _gf_false; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + xlator_t *subvol = NULL; + + local = frame->local; local = frame->local; if (!local) { @@ -8349,8 +8354,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, goto out; } - if (op_ret == -1) + if (op_ret == -1) { + local->op_errno = op_errno; + parent_layout_changed = (xdata && + dict_get(xdata, GF_PREOP_CHECK_FAILED)) + ? _gf_true + : _gf_false; + + if (parent_layout_changed) { + if (local && local->lock[0].layout.parent_layout.locks) { + /* Returning failure as the layout could not be fixed even under + * the lock */ + goto out; + } + + gf_uuid_unparse(local->loc.parent->gfid, pgfid); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, + "create (%s/%s) (path: %s): parent layout " + "changed. Attempting a layout refresh and then a " + "retry", + pgfid, local->loc.name, local->loc.path); + + /* + dht_refresh_layout needs directory info in local->loc.Hence, + storing the parent_loc in local->loc and storing the create + context in local->loc2. We will restore this information in + dht_creation_do. + */ + + loc_wipe(&local->loc2); + + ret = loc_copy(&local->loc2, &local->loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "loc_copy failed %s", local->loc.path); + + goto out; + } + + loc_wipe(&local->loc); + + ret = dht_build_parent_loc(this, &local->loc, &local->loc2, + &op_errno); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, + "parent loc build failed"); + goto out; + } + + subvol = dht_subvol_get_hashed(this, &local->loc2); + + ret = dht_create_lock(frame, subvol); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "locking parent failed"); + goto out; + } + + return 0; + } + goto out; + } prev = cookie; @@ -8471,6 +8537,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, subvol->name); + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, loc, flags, mode, umask, fd, params); @@ -8479,10 +8547,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, avail_subvol = dht_free_disk_available_subvol(this, subvol, local); if (avail_subvol != subvol) { - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; local->cached_subvol = avail_subvol; local->hashed_subvol = subvol; @@ -8498,6 +8562,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, subvol->name); + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, loc, flags, mode, umask, fd, params); @@ -8713,7 +8779,7 @@ err: return 0; } -static int32_t +int32_t dht_create_lock(call_frame_t *frame, xlator_t *subvol) { dht_local_t *local = NULL; @@ -8758,6 +8824,60 @@ err: return -1; } +int +dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local) +{ + dht_conf_t *conf = this->private; + dht_layout_t *parent_layout = NULL; + int *parent_disk_layout = NULL; + xlator_t *hashed_subvol = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + + gf_uuid_unparse(loc->parent->gfid, pgfid); + + parent_layout = dht_layout_get(this, loc->parent); + hashed_subvol = dht_subvol_get_hashed(this, loc); + + ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, + &parent_disk_layout); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "extracting in-memory layout of parent failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + + ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY, + conf->xattr_name); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting %s key in params dictionary failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path, + GF_PREOP_PARENT_KEY); + goto err; + } + + ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout, + 4 * 4); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting parent-layout in params dictionary failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + +err: + dht_layout_unref(this, parent_layout); + return ret; +} + int dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *params) @@ -8784,6 +8904,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, goto err; } + local->params = dict_ref(params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) { gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, "creating %s on %s (got create on %s)", local->loc.path, @@ -8799,10 +8924,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (hashed_subvol && (hashed_subvol != subvol)) { /* Create the linkto file and then the data file */ - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; local->cached_subvol = subvol; local->hashed_subvol = hashed_subvol; @@ -8815,6 +8936,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, * file as we expect a lookup everywhere if there are problems * with the parent layout */ + + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, &local->loc, flags, mode, umask, fd, params); @@ -8866,11 +8990,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, goto err; } - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - loc_wipe(&local->loc); ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 6b76d9a0c79..d82087c0448 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -1502,4 +1502,10 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); +int32_t +dht_create_lock(call_frame_t *frame, xlator_t *subvol); + +int +dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local); + #endif /* _DHT_H */ diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c index 7537f8adbdd..0d80d4e8efb 100644 --- a/xlators/protocol/client/src/client-rpc-fops_v2.c +++ b/xlators/protocol/client/src/client-rpc-fops_v2.c @@ -2076,11 +2076,12 @@ client4_0_create_cbk(struct rpc_req *req, struct iovec *iov, int count, goto out; } + ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, + local, &xdata); + if (ret < 0) + goto out; + if (-1 != rsp.op_ret) { - ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, - local, &xdata); - if (ret < 0) - goto out; ret = client_add_fd_to_saved_fds(frame->this, fd, &local->loc, local->flags, rsp.fd, 0); if (ret) { diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c index 1f1e05f1dc9..667fec7d62f 100644 --- a/xlators/storage/posix/src/posix-entry-ops.c +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -2145,6 +2145,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, 0, }; + dict_t *xdata_rsp = dict_ref(xdata); + DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); @@ -2194,6 +2196,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, was_present = 0; } + if (!was_present) { + if (posix_is_layout_stale(xdata, par_path, this)) { + op_ret = -1; + op_errno = EIO; + if (!xdata_rsp) { + xdata_rsp = dict_new(); + if (!xdata_rsp) { + op_errno = ENOMEM; + goto out; + } + } + + if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == + -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, + "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); + } + + goto out; + } + } + if (priv->o_direct) _flags |= O_DIRECT; @@ -2313,7 +2337,10 @@ out: STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, (loc) ? loc->inode : NULL, &stbuf, &preparent, - &postparent, xdata); + &postparent, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); return 0; } diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index cbc271481a6..4919cbdf2e9 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -3586,3 +3586,79 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) } } } + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) +{ + int op_ret = 0; + ssize_t size = 0; + char value_buf[4096] = { + 0, + }; + gf_boolean_t have_val = _gf_false; + data_t *arg_data = NULL; + char *xattr_name = NULL; + gf_boolean_t is_stale = _gf_false; + + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + if (xattr_name == NULL) { + op_ret = 0; + goto out; + } + + arg_data = dict_get(xdata, xattr_name); + if (!arg_data) { + op_ret = 0; + goto out; + } + + size = sys_lgetxattr(par_path, xattr_name, value_buf, + sizeof(value_buf) - 1); + + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) path (%s) failed due to" + " buffer overflow", + xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + } + if (size < 0) { + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed, path : %s", xattr_name, + par_path); + goto out; + } + } + + if (!have_val) { + size = sys_lgetxattr(par_path, xattr_name, value_buf, size); + if (size < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed (%s)", xattr_name, + strerror(errno)); + goto out; + } + } + + if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { + gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, + "failing preop as on-disk xattr value differs from argument " + "value for key %s", + xattr_name); + op_ret = -1; + } + +out: + dict_del_sizen(xdata, xattr_name); + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + + if (op_ret == -1) { + is_stale = _gf_true; + } + + return is_stale; +} diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index ce4e1193639..7893a9b3e35 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -658,4 +658,8 @@ posix_spawn_ctx_janitor_thread(xlator_t *this); void posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); + #endif /* _POSIX_H */ -- cgit