summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSakshi <sabansal@redhat.com>2015-08-05 16:05:22 +0530
committerRaghavendra G <rgowdapp@redhat.com>2016-04-10 22:23:48 -0700
commit0a01154c68cb5eb884096fc67288a71c391d9160 (patch)
tree8251fa978901dbfc9db2e97e4b5a40e65d72210e
parent6a1d6da4588726ea0e1d0b0b6eb204a9d829db19 (diff)
dht: lock on subvols to prevent rename and lookup selfheal race
This patch addresses two races while renaming directories: 1) While renaming src to dst, if a lookup selfheal is triggered it can recreate src on those subvols where rename was successful. This leads to multiple directories (src and dst) having same gfid. To avoid this we must take locks on all subvols with src. 2) While renaming if the dst exists and a lookup selfheal is triggered it will find anomalies in the dst layout and try to heal the stale layout. To avoid this we must take lock on any one subvol with dst. Backport of http://review.gluster.org/#/c/11880/ > Change-Id: I637f637d3241d9065cd5be59a671c7e7ca3eed53 > BUG: 1252244 > Signed-off-by: Sakshi <sabansal@redhat.com> Change-Id: I637f637d3241d9065cd5be59a671c7e7ca3eed53 BUG: 1324381 Signed-off-by: Sakshi <sabansal@redhat.com> Reviewed-on: http://review.gluster.org/13917 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r--xlators/cluster/dht/src/dht-rename.c201
1 files changed, 158 insertions, 43 deletions
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 3b636c529a2..c9a39b762a7 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -21,6 +21,7 @@
#include "dht-common.h"
#include "defaults.h"
+int dht_rename_unlock (call_frame_t *frame, xlator_t *this);
int
dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -76,11 +77,7 @@ unwind:
WIPE (&local->preparent);
WIPE (&local->postparent);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, xdata);
+ dht_rename_unlock (frame, this);
}
return 0;
@@ -161,13 +158,7 @@ unwind:
WIPE (&local->preparent);
WIPE (&local->postparent);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
-
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, NULL);
-
+ dht_rename_unlock (frame, this);
return 0;
}
@@ -191,8 +182,7 @@ dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
return 0;
err:
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL, NULL);
+ dht_rename_unlock (frame, this);
return 0;
}
@@ -269,28 +259,35 @@ err:
int
-dht_rename_dir (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int op_errno = -1;
-
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_conf_t *conf = NULL;
+ int i = 0;
- conf = frame->this->private;
local = frame->local;
+ conf = this->private;
- local->call_cnt = conf->subvolume_cnt;
+ if (op_ret < 0) {
+ uuid_utoa_r (local->loc.inode->gfid, src_gfid);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_RENAME_FAILED,
- "Rename dir failed: subvolume down (%s)",
- conf->subvolumes[i]->name);
- op_errno = ENOTCONN;
- goto err;
- }
+ if (local->loc2.inode)
+ uuid_utoa_r (local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
}
local->fd = fd_create (local->loc.inode, frame->root->pid);
@@ -316,14 +313,120 @@ dht_rename_dir (call_frame_t *frame, xlator_t *this)
return 0;
err:
+ /* No harm in calling an extra unlock */
+ dht_rename_unlock (frame, this);
+ return 0;
+}
+
+int
+dht_rename_dir (call_frame_t *frame, xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_lock_t **lk_array = NULL;
+ dht_layout_t *dst_layout = NULL;
+ xlator_t *first_subvol = NULL;
+ int count = 1;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int op_errno = -1;
+
+ conf = frame->this->private;
+ local = frame->local;
+
+ /* We must take a lock on all the subvols with src gfid.
+ * Along with this if dst exists we must take lock on
+ * any one subvol with dst gfid.
+ */
+ count = local->call_cnt = conf->subvolume_cnt;
+ if (local->loc2.inode) {
+ dst_layout = dht_layout_get (this, local->loc2.inode);
+ if (dst_layout)
+ ++count;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_RENAME_FAILED,
+ "Rename dir failed: subvolume down (%s)",
+ conf->subvolumes[i]->name);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Rename must take locks on src to avoid lookup selfheal from
+ * recreating src on those subvols where the rename was successful.
+ * Rename must take locks on all subvols with src because selfheal
+ * in entry creation phase may not have acquired lock on all subvols.
+ */
+ for (i = 0; i < local->call_cnt; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ /* If the dst exists, we are going to replace dst layout range with
+ * that of src. This will lead to anomalies in dst layout until the
+ * rename completes. To avoid a lookup selfheal to change dst layout
+ * during this interval we take a lock on one subvol of dst.
+ */
+ if (dst_layout) {
+ for (j = 0; (j < dst_layout->cnt) &&
+ (dst_layout->list[j].err == 0); j++) {
+
+ first_subvol = dst_layout->list[j].xlator;
+ lk_array[i] = dht_lock_new (frame->this, first_subvol,
+ &local->loc2, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ break;
+ }
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE,
+ dht_rename_dir_lock_cbk);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
NULL, NULL);
return 0;
}
-
-
static int
dht_rename_track_for_changelog (xlator_t *this, dict_t *xattr,
loc_t *oldloc, loc_t *newloc)
@@ -426,12 +529,14 @@ dht_rename_unlock_cbk (call_frame_t *frame, void *cookie,
local = frame->local;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
dht_set_fixed_dir_stat (&local->preoldparent);
dht_set_fixed_dir_stat (&local->postoldparent);
dht_set_fixed_dir_stat (&local->preparent);
dht_set_fixed_dir_stat (&local->postparent);
+ if (IA_ISREG (local->stbuf.ia_type))
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent, &local->preparent,
@@ -448,7 +553,6 @@ dht_rename_unlock (call_frame_t *frame, xlator_t *this)
char dst_gfid[GF_UUID_BUF_SIZE] = {0};
local = frame->local;
-
op_ret = dht_unlock_inodelk (frame, local->lock.locks,
local->lock.lk_count,
dht_rename_unlock_cbk);
@@ -458,14 +562,25 @@ dht_rename_unlock (call_frame_t *frame, xlator_t *this)
if (local->loc2.inode)
uuid_utoa_r (local->loc2.inode->gfid, dst_gfid);
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_UNLOCKING_FAILED,
- "winding unlock inodelk failed "
- "rename (%s:%s:%s %s:%s:%s), "
- "stale locks left on bricks",
- local->loc.path, src_gfid, local->src_cached->name,
- local->loc2.path, dst_gfid,
- local->dst_cached ? local->dst_cached->name : NULL);
+ if (IA_ISREG (local->stbuf.ia_type))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid,
+ local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ?
+ local->dst_cached->name : NULL);
+ else
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid,
+ local->loc2.path, dst_gfid);
dht_rename_unlock_cbk (frame, NULL, this, 0, 0, NULL);
}