From 7eff3f644b67fdb7a44174f836ffe1a84b6f479e Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Mon, 10 Jul 2017 09:38:54 +0530 Subject: cluster/dht: Fix fd check race There is a another race between the cached subvol being updated in the inode_ctx and the fd being opened on the target. 1. fop1 -> fd1 -> subvol0 2. file migrated from subvol0 to subvol1 and cached_subvol changed to subvol1 in inode_ctx 3. fop2 -> fd1 -> subvol1 [takes new cached subvol] 4. fop2 -> checks fd ctx (fd not open on subvol1) -> opens fd1 on subvol1 5. fop1 -> checks fd ctx (fd not open on subvol0) -> tries to open fd1 on subvol0 -> fails with "No such file on directory". Fix: If dht_fd_open_on_dst fails with ENOENT or ESTALE, wind to old subvol and let the phase1/phase2 checks handle it. Change-Id: I34f8011574a8b72e3bcfe03b0cc4f024b352f225 BUG: 1467010 > BUG: 1465075 > Signed-off-by: N Balachandran > Reviewed-on: https://review.gluster.org/17731 > Smoke: Gluster Build System > CentOS-regression: Gluster Build System > Reviewed-by: Raghavendra G > Reviewed-by: Amar Tumballi (cherry picked from commit f7a450c17fee7e43c544473366220887f0534ed7) Signed-off-by: N Balachandran Reviewed-on: https://review.gluster.org/17829 Smoke: Gluster Build System CentOS-regression: Gluster Build System Reviewed-by: Raghavendra Talur --- xlators/cluster/dht/src/dht-helper.c | 26 +++++++++++++++++ xlators/cluster/dht/src/dht-messages.h | 51 +++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) (limited to 'xlators') diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 3c392fe04be..c9bf60eb591 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -293,10 +293,12 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame, glusterfs_fop_t fop = 0; dht_local_t *local = NULL; xlator_t *subvol = NULL; + xlator_t *this = NULL; fd_t *fd = NULL; int op_errno = -1; local = frame->local; + this = frame->this; fop = local->fop; subvol = local->cached_subvol; fd = local->fd; @@ -385,6 +387,11 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame, break; default: + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", + fd, uuid_utoa (fd->inode->gfid), + subvol->name); break; } @@ -445,6 +452,11 @@ handle_err: break; default: + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", + fd, uuid_utoa (fd->inode->gfid), + subvol->name); break; } @@ -508,6 +520,20 @@ dht_check_and_open_fd_on_subvol_task (void *data) " (%p, flags=0%o) on file %s @ %s", fd, fd->flags, uuid_utoa (fd->inode->gfid), subvol->name); + /* This can happen if the cached subvol was updated in the + * inode_ctx and the fd was opened on the new cached suvol + * after this fop was wound on the old cached subvol. + * As we do not close the fd on the old subvol (a leak) + * don't treat ENOENT as an error and allow the phase1/phase2 + * checks to handle it. + */ + + if ((-ret != ENOENT) && (-ret != ESTALE)) { + local->op_errno = -ret; + ret = -1; + } else { + ret = 0; + } local->op_errno = -ret; ret = -1; diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 30b64eb5711..b6184ebe088 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES 118 +#define GLFS_DHT_NUM_MESSAGES 125 #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) /* Messages with message IDs */ @@ -1085,5 +1085,54 @@ */ #define DHT_MSG_DIR_LOOKUP_FAILED (GLFS_DHT_BASE + 118) +/* + * @messageid 109119 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_INODELK_FAILED (GLFS_DHT_BASE + 119) + +/* + * @messageid 109120 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOCK_FRAME_FAILED (GLFS_DHT_BASE + 120) + +/* + * @messageid 109121 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOCAL_LOCK_INIT_FAILED (GLFS_DHT_BASE + 121) + +/* + * @messageid 109122 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_ENTRYLK_ERROR (GLFS_DHT_BASE + 122) + +/* + * @messageid 109123 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_INODELK_ERROR (GLFS_DHT_BASE + 123) + +/* + * @messageid 109124 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOC_FAILED (GLFS_DHT_BASE + 124) + +/* + * @messageid 109125 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_UNKNOWN_FOP (GLFS_DHT_BASE + 125) + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* _DHT_MESSAGES_H_ */ -- cgit