summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2019-12-27 12:06:19 +0530
committerAmar Tumballi <amar@kadalu.io>2020-02-09 02:51:31 +0000
commitc87817495b3c5c36dcca9d157e9313b7d3195eed (patch)
tree45d7937077f34afc4c05f8cf752f0df19bef70ba
parent8fad76650bd85463708f59d2518f5b764ae4c702 (diff)
dht: Fix stale-layout and create issue
Problem: With lookup-optimize set to on by default, a client with stale-layout can create a new file on a wrong subvol. This will lead to possible duplicate files if two different clients attempt to create the same file with two different layouts. Solution: Send in-memory layout to be cross checked at posix before commiting a "create". In case of a mismatch, sync the client layout with that of the server and attempt the create fop one more time. test: Manual, testcase(attached) fixes: bz#1786679 Change-Id: Ife0941f105113f1c572f4363cbcee65e0dd9bd6a Signed-off-by: Susant Palai <spalai@redhat.com>
-rwxr-xr-xtests/bugs/distribute/bug-1786679.t69
-rw-r--r--xlators/cluster/dht/src/dht-common.c149
-rw-r--r--xlators/cluster/dht/src/dht-common.h6
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c9
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c29
-rw-r--r--xlators/storage/posix/src/posix-helpers.c76
-rw-r--r--xlators/storage/posix/src/posix.h4
7 files changed, 322 insertions, 20 deletions
diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t
new file mode 100755
index 0000000..219ce51
--- /dev/null
+++ b/tests/bugs/distribute/bug-1786679.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+
+# create 2 subvols
+# create a dir
+# create a file
+# change layout
+# remove the file
+# execute create from a different mount
+# Without the patch, the file will be present on both of the bricks
+
+cleanup
+
+function get_layout () {
+
+layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'`
+
+echo $layout
+
+}
+
+function set_layout()
+{
+ setfattr -n "trusted.glusterfs.dht" -v $1 $2
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK1=$B0/${V0}-0
+BRICK2=$B0/${V0}-1
+
+TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
+TEST $CLI volume start $V0
+
+# Mount FUSE and create symlink
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+TEST ! stat "$BRICK1/dir/file"
+TEST stat "$BRICK2/dir/file"
+
+layout1="$(get_layout "$BRICK1/dir")"
+layout2="$(get_layout "$BRICK2/dir")"
+
+TEST set_layout $layout1 "$BRICK2/dir"
+TEST set_layout $layout2 "$BRICK1/dir"
+
+TEST rm $M0/dir/file -f
+TEST gluster v set $V0 client-log-level DEBUG
+
+#Without the patch in place, this client will create the file in $BRICK2
+#which will lead to two files being on both the bricks when a new client
+#create the file with the same name
+TEST touch $M0/dir/file
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+TEST touch $M1/dir/file
+
+TEST stat "$BRICK1/dir/file"
+TEST ! stat "$BRICK2/dir/file"
+
+cleanup
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ce211e6..2231af6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -8341,6 +8341,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
local = frame->local;
if (!local) {
@@ -8349,8 +8354,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
+ goto out;
+ }
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
+
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
+
+ goto out;
+ }
+
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto out;
+ }
+
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
+
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
+
+ return 0;
+ }
+
goto out;
+ }
prev = cookie;
@@ -8471,6 +8537,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8479,10 +8547,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
if (avail_subvol != subvol) {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
@@ -8498,6 +8562,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8713,7 +8779,7 @@ err:
return 0;
}
-static int32_t
+int32_t
dht_create_lock(call_frame_t *frame, xlator_t *subvol)
{
dht_local_t *local = NULL;
@@ -8759,6 +8825,60 @@ err:
}
int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
+{
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+err:
+ dht_layout_unref(this, parent_layout);
+ return ret;
+}
+
+int
dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
@@ -8784,6 +8904,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
"creating %s on %s (got create on %s)", local->loc.path,
@@ -8799,10 +8924,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (hashed_subvol && (hashed_subvol != subvol)) {
/* Create the linkto file and then the data file */
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = subvol;
local->hashed_subvol = hashed_subvol;
@@ -8815,6 +8936,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
* file as we expect a lookup everywhere if there are problems
* with the parent layout
*/
+
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, &local->loc, flags, mode, umask,
fd, params);
@@ -8866,11 +8990,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
-
loc_wipe(&local->loc);
ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 6b76d9a..d82087c 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1502,4 +1502,10 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata);
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
#endif /* _DHT_H */
diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
index 7537f8a..0d80d4e 100644
--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
+++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
@@ -2076,11 +2076,12 @@ client4_0_create_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
+ ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent,
+ local, &xdata);
+ if (ret < 0)
+ goto out;
+
if (-1 != rsp.op_ret) {
- ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent,
- local, &xdata);
- if (ret < 0)
- goto out;
ret = client_add_fd_to_saved_fds(frame->this, fd, &local->loc,
local->flags, rsp.fd, 0);
if (ret) {
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
index 1f1e05f..667fec7 100644
--- a/xlators/storage/posix/src/posix-entry-ops.c
+++ b/xlators/storage/posix/src/posix-entry-ops.c
@@ -2145,6 +2145,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
0,
};
+ dict_t *xdata_rsp = dict_ref(xdata);
+
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
@@ -2194,6 +2196,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
was_present = 0;
}
+ if (!was_present) {
+ if (posix_is_layout_stale(xdata, par_path, this)) {
+ op_ret = -1;
+ op_errno = EIO;
+ if (!xdata_rsp) {
+ xdata_rsp = dict_new();
+ if (!xdata_rsp) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) ==
+ -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED,
+ "setting key %s in dict failed", GF_PREOP_CHECK_FAILED);
+ }
+
+ goto out;
+ }
+ }
+
if (priv->o_direct)
_flags |= O_DIRECT;
@@ -2313,7 +2337,10 @@ out:
STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd,
(loc) ? loc->inode : NULL, &stbuf, &preparent,
- &postparent, xdata);
+ &postparent, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
return 0;
}
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index cbc2714..4919cbd 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -3586,3 +3586,79 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req)
}
}
}
+
+gf_boolean_t
+posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
+{
+ int op_ret = 0;
+ ssize_t size = 0;
+ char value_buf[4096] = {
+ 0,
+ };
+ gf_boolean_t have_val = _gf_false;
+ data_t *arg_data = NULL;
+ char *xattr_name = NULL;
+ gf_boolean_t is_stale = _gf_false;
+
+ op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
+ if (xattr_name == NULL) {
+ op_ret = 0;
+ goto out;
+ }
+
+ arg_data = dict_get(xdata, xattr_name);
+ if (!arg_data) {
+ op_ret = 0;
+ goto out;
+ }
+
+ size = sys_lgetxattr(par_path, xattr_name, value_buf,
+ sizeof(value_buf) - 1);
+
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) path (%s) failed due to"
+ " buffer overflow",
+ xattr_name, par_path);
+ size = sys_lgetxattr(par_path, xattr_name, NULL, 0);
+ }
+ if (size < 0) {
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) failed, path : %s", xattr_name,
+ par_path);
+ goto out;
+ }
+ }
+
+ if (!have_val) {
+ size = sys_lgetxattr(par_path, xattr_name, value_buf, size);
+ if (size < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) failed (%s)", xattr_name,
+ strerror(errno));
+ goto out;
+ }
+ }
+
+ if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) {
+ gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED,
+ "failing preop as on-disk xattr value differs from argument "
+ "value for key %s",
+ xattr_name);
+ op_ret = -1;
+ }
+
+out:
+ dict_del_sizen(xdata, xattr_name);
+ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
+
+ if (op_ret == -1) {
+ is_stale = _gf_true;
+ }
+
+ return is_stale;
+}
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index ce4e119..7893a9b 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -658,4 +658,8 @@ posix_spawn_ctx_janitor_thread(xlator_t *this);
void
posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+
+gf_boolean_t
+posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
+
#endif /* _POSIX_H */