summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorMohammed Rafi KC <rkavunga@redhat.com>2016-05-03 14:43:20 +0530
committerRaghavendra G <rgowdapp@redhat.com>2016-06-20 04:56:16 -0700
commit6106aec748e8f1c328dda30cbbb5bdfa91c0a869 (patch)
tree0300b0e43a24d0b3728180dc5e80b39a68621953 /xlators/cluster
parentd1a4e534ba9283da9e75033f4a3a309d16176b24 (diff)
dht:remember locked subvol and send unlock to the same
During locking we send lock request to cached subvol, and normally we unlock to the cached subvol But with parallel fresh lookup on a directory, there is a race window where the cached subvol can change and the unlock can go into a different subvol from which we took lock. This will result in a stale lock held on one of the subvol. So we will store the details of subvol which we took the lock and will unlock from the same subvol Back port of> >Change-Id: I47df99491671b10624eb37d1d17e40bacf0b15eb >BUG: 1311002 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> >Reviewed-on: http://review.gluster.org/13492 >Reviewed-by: N Balachandran <nbalacha@redhat.com> >Smoke: Gluster Build System <jenkins@build.gluster.com> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >Reviewed-by: Raghavendra G <rgowdapp@redhat.com> >CentOS-regression: Gluster Build System <jenkins@build.gluster.com> (cherry picked from commit ef0db52bc55a51fe5e3856235aed0230b6a188fe) Change-Id: Ib821e7355b4937b86d2f9f11e2c8311b7301b6c7 BUG: 1347524 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> Reviewed-on: http://review.gluster.org/14750 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: N Balachandran <nbalacha@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/dht/src/dht-common.c5
-rw-r--r--xlators/cluster/dht/src/dht-common.h11
-rw-r--r--xlators/cluster/dht/src/dht-helper.c155
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c59
-rw-r--r--xlators/cluster/dht/src/dht-messages.h9
5 files changed, 218 insertions, 21 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 7e68be5553c..21962cd342c 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -8404,7 +8404,10 @@ dht_entrylk_cbk (call_frame_t *frame, void *cookie,
return 0;
}
-
+/* TODO
+ * Sending entrylk to cached subvol can result in stale lock
+ * as described in the bug 1311002.
+ */
int
dht_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index b2e9df68996..9a71c46c8e4 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -101,6 +101,7 @@ typedef struct dht_stat_time dht_stat_time_t;
struct dht_inode_ctx {
dht_layout_t *layout;
dht_stat_time_t time;
+ xlator_t *lock_subvol;
};
typedef struct dht_inode_ctx dht_inode_ctx_t;
@@ -285,6 +286,8 @@ struct dht_local {
int op_errno;
} lock;
+ short lock_type;
+
call_stub_t *stub;
int32_t parent_disk_layout[4];
};
@@ -1218,4 +1221,12 @@ dht_release (xlator_t *this, fd_t *fd);
int32_t
dht_set_fixed_dir_stat (struct iatt *stat);
+
+xlator_t*
+dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local);
+
+int
+dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret);
+
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 208fe92bd44..590d0043507 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -2352,3 +2352,158 @@ dht_heal_full_path_done (int op_ret, call_frame_t *heal_frame, void *data)
DHT_STACK_DESTROY (heal_frame);
return 0;
}
+
+/* This function must be called inside an inode lock */
+int
+__dht_lock_subvol_set (inode_t *inode, xlator_t *this,
+ xlator_t *lock_subvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t value = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = __inode_ctx_get0 (inode, this, &value);
+ if (ret || !value) {
+ return -1;
+ }
+
+ ctx = (dht_inode_ctx_t *) value;
+ ctx->lock_subvol = lock_subvol;
+out:
+ return ret;
+}
+
+xlator_t*
+dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local)
+{
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ int32_t ret = -1;
+ uint64_t value = 0;
+ xlator_t *cached_subvol = NULL;
+ dht_inode_ctx_t *ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO (this->name, lock, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+
+ if (!inode)
+ goto out;
+
+ if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) {
+ /*
+ * We may get non-linked inode for directories as part
+ * of the selfheal code path. So checking for IA_INVAL
+ * type also. This will only happen for directory.
+ */
+ subvol = local->cached_subvol;
+ goto out;
+ }
+
+ if (lock->l_type != F_UNLCK) {
+ /*
+ * inode purging might happen on NFS between a lk
+ * and unlk. Due to this lk and unlk might be sent
+ * to different subvols.
+ * So during a lock request, taking a ref on inode
+ * to prevent inode purging. inode unref will happen
+ * in unlock cbk code path.
+ */
+ inode_ref (inode);
+ }
+
+ LOCK (&inode->lock);
+ ret = __inode_ctx_get0 (inode, this, &value);
+ if (!ret && value) {
+ ctx = (dht_inode_ctx_t *) value;
+ subvol = ctx->lock_subvol;
+ }
+ if (!subvol && lock->l_type != F_UNLCK && cached_subvol) {
+ ret = __dht_lock_subvol_set (inode, this,
+ cached_subvol);
+ if (ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set lock_subvol in "
+ "inode ctx for gfid %s",
+ gfid);
+ goto unlock;
+ }
+ subvol = cached_subvol;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (!subvol && inode && lock->l_type != F_UNLCK) {
+ inode_unref (inode);
+ }
+out:
+ return subvol;
+}
+
+int
+dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ this = frame->this;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+ if (!inode) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ "Found a NULL inode. Failed to unref the inode");
+ goto out;
+ }
+
+ if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ switch (local->lock_type) {
+ case F_RDLCK:
+ case F_WRLCK:
+ if (op_ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg_debug (this->name, 0,
+ "lock request failed for gfid %s", gfid);
+ inode_unref (inode);
+ goto out;
+ }
+ break;
+
+ case F_UNLCK:
+ if (!op_ret) {
+ inode_unref (inode);
+ } else {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ "Unlock request failed for gfid %s."
+ "Failed to unref the inode", gfid);
+ goto out;
+ }
+ default:
+ break;
+ }
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 16a82cd5b98..549f1b9ea7e 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -997,6 +997,7 @@ dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
out:
+ dht_lk_inode_unref (frame, op_ret);
DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
return 0;
@@ -1035,7 +1036,7 @@ int
dht_lk (call_frame_t *frame, xlator_t *this,
fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *lock_subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
@@ -1044,11 +1045,16 @@ dht_lk (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd, err);
local = dht_local_init (frame, NULL, fd, GF_FOP_LK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
+ local->lock_type = flock->l_type;
+ lock_subvol = dht_get_lock_subvolume (this, flock, local);
+ if (!lock_subvol) {
gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
+ "no lock subvolume for path=%p", fd);
op_errno = EINVAL;
goto err;
}
@@ -1058,7 +1064,7 @@ dht_lk (call_frame_t *frame, xlator_t *this,
local->call_cnt = 1;
- STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
+ STACK_WIND (frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd,
cmd, flock, xdata);
return 0;
@@ -1283,6 +1289,7 @@ dht_inodelk_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ dht_lk_inode_unref (frame, op_ret);
DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -1292,9 +1299,9 @@ int32_t
dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
VALIDATE_OR_GOTO (frame, err);
@@ -1308,10 +1315,11 @@ dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
goto err;
}
- subvol = local->cached_subvol;
- if (!subvol) {
+ local->lock_type = lock->l_type;
+ lock_subvol = dht_get_lock_subvolume (this, lock, local);
+ if (!lock_subvol) {
gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
+ "no lock subvolume for path=%s", loc->path);
op_errno = EINVAL;
goto err;
}
@@ -1320,7 +1328,7 @@ dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
STACK_WIND (frame,
dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
+ lock_subvol, lock_subvol->fops->inodelk,
volume, loc, cmd, lock, xdata);
return 0;
@@ -1332,12 +1340,13 @@ err:
return 0;
}
-
int
dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+
+ dht_lk_inode_unref (frame, op_ret);
DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -1347,23 +1356,35 @@ int
dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ xlator_t *lock_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
+ local = dht_local_init (frame, NULL, fd, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->lock_type = lock->l_type;
+
+ lock_subvol = dht_get_lock_subvolume (this, lock, local);
+ if (!lock_subvol) {
gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
+ "no lock subvolume for fd=%p", fd);
op_errno = EINVAL;
goto err;
}
- STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ STACK_WIND (frame, dht_finodelk_cbk, lock_subvol,
+ lock_subvol->fops->finodelk,
volume, fd, cmd, lock, xdata);
return 0;
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index 044292d86ca..8c0b9103df1 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT
-#define GLFS_DHT_NUM_MESSAGES 114
+#define GLFS_DHT_NUM_MESSAGES 116
#define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
/* Messages with message IDs */
@@ -1064,5 +1064,12 @@
*/
#define DHT_MSG_LOCK_MIGRATION_FAILED (GLFS_DHT_BASE + 115)
+/*
+ * @messageid 109116
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED (GLFS_DHT_BASE + 116)
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* _DHT_MESSAGES_H_ */