diff options
| author | karthik-us <ksubrahm@redhat.com> | 2017-12-18 16:46:39 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2018-01-13 02:55:44 +0000 | 
| commit | ba149bac92d169ae2256dbc75202dc9e5d06538e (patch) | |
| tree | 246c76c0dacfb4ef9813d2801a6e2c37cdf57c10 /xlators | |
| parent | e9358bc4d275602529a4a0167ebaa053db3a0e1b (diff) | |
cluster/afr: Fixing the flaws in arbiter becoming source patch
Problem:
Setting the write_subvol value to read_subvol in case of metadata
transaction during pre-op (commit 19f9bcff4aada589d4321356c2670ed283f02c03)
might lead to the original problem of arbiter becoming source.
Scenario:
1) All bricks are up and good
2) 2 writes w1 and w2 are in progress in parallel
3) ctx->read_subvol is good for all the subvolumes
4) w1 succeeds on brick0 and fails on brick1, yet to do post-op on
   the disk
5) read/lookup comes on the same file and refreshes read_subvols back
   to all good
6) metadata transaction happens which makes ctx->write_subvol to be
   assigned with ctx->read_subvol which is all good
7) w2 succeeds on brick1 and fails on brick0 and this will update the
   brick in reverse order leading to arbiter becoming source
Fix:
Instead of setting the ctx->write_subvol to ctx->read_subvol in the
pre-op statge, if there is a metadata transaction, check in the
function __afr_set_in_flight_sb_status() if it is a data/metadata
transaction. Use the value of ctx->write_subvol if it is a data
transactions and ctx->read_subvol value for other transactions.
With this patch we assign the value of ctx->write_subvol in the
afr_transaction_perform_fop() with the on disk value, instead of
assigning it in the afr_changelog_pre_op() with the in memory value.
Change-Id: Id2025a7e965f0578af35b1abaac793b019c43cc4
BUG: 1482064
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 266 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 16 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 57 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 42 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-messages.h | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 45 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 22 | 
7 files changed, 271 insertions, 180 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f02bc5a9e37..42a82f006b4 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -145,6 +145,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)                  tmp_ctx->spb_choice = -1;                  tmp_ctx->read_subvol = 0;                  tmp_ctx->write_subvol = 0; +                tmp_ctx->lock_count = 0;          } else {                  tmp_ctx = (afr_inode_ctx_t *) ctx_int;          } @@ -190,7 +191,6 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,                                 inode_t *inode)  {          int                 i               = 0; -        int                 ret             = -1;          int                 txn_type        = 0;          int                 count           = 0;          int                 index           = -1; @@ -203,16 +203,14 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,          uint32_t            event           = 0;          uint64_t            val             = 0;          afr_private_t      *priv            = NULL; -        afr_inode_ctx_t    *ctx             = NULL;          priv = this->private;          txn_type = local->transaction.type; -        ret = __afr_inode_ctx_get (this, inode, &ctx); -        if (ret < 0) -                return ret; - -        val = ctx->write_subvol; +        if (txn_type == AFR_DATA_TRANSACTION) +                val = local->inode_ctx->write_subvol; +        else +                val = local->inode_ctx->read_subvol;          metadatamap_old = metadatamap = (val & 0x000000000000ffff);          datamap_old = datamap = (val & 0x00000000ffff0000) >> 16; @@ -273,10 +271,11 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,                  (((uint64_t) datamap) << 16) |                  (((uint64_t) event) << 32); -        ctx->write_subvol = val; -        ctx->read_subvol = val; +        if (txn_type == AFR_DATA_TRANSACTION) +                local->inode_ctx->write_subvol = val; +        local->inode_ctx->read_subvol = val; -        return ret; +        return 0;  }  gf_boolean_t @@ -995,6 +994,81 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,  }  int +afr_readables_fill (call_frame_t *frame, xlator_t *this, inode_t *inode, +                    unsigned char *data_accused, +                    unsigned char *metadata_accused, +                    unsigned char *data_readable, +                    unsigned char *metadata_readable, +                    struct afr_reply *replies) +{ +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        dict_t *xdata = NULL; +        int i = 0; +        int ret = 0; +        ia_type_t ia_type = IA_INVAL; + +        local = frame->local; +        priv = this->private; + +        for (i = 0; i < priv->child_count; i++) { +                data_readable[i] = 1; +                metadata_readable[i] = 1; +        } +        if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) { +                data_readable[ARBITER_BRICK_INDEX] =  0; +                metadata_readable[ARBITER_BRICK_INDEX] = 0; +        } + +        for (i = 0; i < priv->child_count; i++) { +                if (replies) {/* Lookup */ +                        if (!replies[i].valid || replies[i].op_ret == -1 || +                            (replies[i].xdata && dict_get (replies[i].xdata, +                                                        GLUSTERFS_BAD_INODE))) { +                                data_readable[i] = 0; +                                metadata_readable[i] = 0; +                                continue; +                        } + +                        xdata = replies[i].xdata; +                        ia_type = replies[i].poststat.ia_type; +                } else {/* pre-op xattrop */ +                        xdata = local->transaction.pre_op_xdata[i]; +                        ia_type = inode->ia_type; +                } + +                afr_accused_fill (this, xdata, data_accused, +                                  (ia_type == IA_IFDIR) ? +                                  AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION); + +                afr_accused_fill (this, xdata, +                                  metadata_accused, AFR_METADATA_TRANSACTION); +        } + +        if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR && +            /* We want to accuse small files only when we know for +             * sure that there is no IO happening. Otherwise, the +             * ia_sizes obtained in post-refresh replies may +             * mismatch due to a race between inode-refresh and +             * ongoing writes, causing spurious heal launches*/ +            !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this)) { +                afr_accuse_smallfiles (this, replies, data_accused); +        } + +        for (i = 0; i < priv->child_count; i++) { +                if (data_accused[i]) { +                        data_readable[i] = 0; +                        ret = 1; +                } +                if (metadata_accused[i]) { +                        metadata_readable[i] = 0; +                        ret = 1; +                } +        } +        return ret; +} + +int  afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,                         gf_boolean_t *start_heal)  { @@ -1019,62 +1093,9 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,  	metadata_accused = alloca0 (priv->child_count);  	metadata_readable = alloca0 (priv->child_count); -	for (i = 0; i < priv->child_count; i++) { -		data_readable[i] = 1; -		metadata_readable[i] = 1; -	} -        if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) { -                data_readable[ARBITER_BRICK_INDEX] =  0; -                metadata_readable[ARBITER_BRICK_INDEX] = 0; -        } - -	for (i = 0; i < priv->child_count; i++) { -		if (!replies[i].valid) { -			data_readable[i] = 0; -			metadata_readable[i] = 0; -			continue; -		} - -		if (replies[i].op_ret == -1) { -			data_readable[i] = 0; -			metadata_readable[i] = 0; -			continue; -		} - -                if (replies[i].xdata && -                    dict_get (replies[i].xdata, GLUSTERFS_BAD_INODE)) { -			data_readable[i] = 0; -			metadata_readable[i] = 0; -			continue; -                } - -		afr_accused_fill (this, replies[i].xdata, data_accused, -				  (replies[i].poststat.ia_type == IA_IFDIR) ? -				   AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION); - -		afr_accused_fill (this, replies[i].xdata, -				  metadata_accused, AFR_METADATA_TRANSACTION); - -	} - -	if ((inode->ia_type != IA_IFDIR) && -            /* We want to accuse small files only when we know for sure that -             * there is no IO happening. Otherwise, the ia_sizes obtained in -             * post-refresh replies may  mismatch due to a race between inode- -             * refresh and ongoing writes, causing spurious heal launches*/ -            !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this)) -		afr_accuse_smallfiles (this, replies, data_accused); - -	for (i = 0; i < priv->child_count; i++) { -		if (data_accused[i]) { -			data_readable[i] = 0; -			ret = 1; -		} -		if (metadata_accused[i]) { -			metadata_readable[i] = 0; -			ret = 1; -		} -	} +        ret = afr_readables_fill (frame, this, inode, data_accused, +                                  metadata_accused, data_readable, +                                  metadata_readable, replies);  	for (i = 0; i < priv->child_count; i++) {                  if (start_heal && priv->child_up[i] && @@ -5583,13 +5604,13 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)          if (!local->transaction.pre_op)                  goto out; -        if (priv->arbiter_count == 1) { -                local->transaction.pre_op_xdata = -                        GF_CALLOC (sizeof (*local->transaction.pre_op_xdata), -                                   priv->child_count, gf_afr_mt_dict_t); -                if (!local->transaction.pre_op_xdata) -                        goto out; +        local->transaction.pre_op_xdata = +                GF_CALLOC (sizeof (*local->transaction.pre_op_xdata), +                           priv->child_count, gf_afr_mt_dict_t); +        if (!local->transaction.pre_op_xdata) +                goto out; +        if (priv->arbiter_count == 1) {                  local->transaction.pre_op_sources =                          GF_CALLOC (sizeof (*local->transaction.pre_op_sources),                                     priv->child_count, gf_afr_mt_char); @@ -6567,42 +6588,45 @@ int  afr_write_subvol_set (call_frame_t *frame, xlator_t *this)  {          afr_local_t      *local = NULL; -        afr_inode_ctx_t  *ctx   = NULL; +        afr_private_t    *priv  = NULL; +        unsigned char    *data_accused = NULL; +        unsigned char    *metadata_accused = NULL; +        unsigned char    *data_readable = NULL; +        unsigned char    *metadata_readable = NULL; +        uint16_t          datamap = 0; +        uint16_t          metadatamap = 0;          uint64_t          val   = 0; -        uint64_t          val1  = 0; -        int               ret   = -1; +        int               event = 0; +        int               i     = 0;          local = frame->local; +        priv = this->private; +        data_accused = alloca0 (priv->child_count); +        metadata_accused = alloca0 (priv->child_count); +        data_readable = alloca0 (priv->child_count); +        metadata_readable = alloca0 (priv->child_count); +        event = local->event_generation; + +        afr_readables_fill (frame, this, local->inode, data_accused, +                            metadata_accused, data_readable, metadata_readable, +                            NULL); + +        for (i = 0; i < priv->child_count; i++) { +                if (data_readable[i]) +                        datamap |= (1 << i); +                if (metadata_readable[i]) +                        metadatamap |= (1 << i); +        } + +        val = ((uint64_t) metadatamap) | +              (((uint64_t) datamap) << 16) | +              (((uint64_t) event) << 32); +          LOCK(&local->inode->lock);          { -                ret = __afr_inode_ctx_get (this, local->inode, &ctx); -                if (ret < 0) { -                        gf_msg (this->name, GF_LOG_ERROR, 0, -                                AFR_MSG_DICT_GET_FAILED, -                                "ERROR GETTING INODE CTX"); -                        UNLOCK(&local->inode->lock); -                        return ret; -                } - -                val = ctx->write_subvol; -                /* -                 * We need to set the value of write_subvol to read_subvol in 2 -                 * cases: -                 * 1. Initially when the value is 0. i.e., it's the first lock -                 * request. -                 * 2. If it's a metadata transaction. If metadata transactions -                 * comes in between data transactions and we have a brick -                 * disconnect, the next metadata transaction won't get the -                 * latest value of readables, since we do resetting of -                 * write_subvol in unlock code path only if it's a data -                 * transaction. To handle those scenarios we need to set the -                 * value of write_subvol to read_subvol in case of metadata -                 * transactions. -                */ -                if (val == 0 || -                    local->transaction.type == AFR_METADATA_TRANSACTION) { -                        val1 = ctx->read_subvol; -                        ctx->write_subvol = val1; +                if (local->inode_ctx->write_subvol == 0 && +                    local->transaction.type == AFR_DATA_TRANSACTION) { +                        local->inode_ctx->write_subvol = val;                  }          }          UNLOCK (&local->inode->lock); @@ -6614,23 +6638,37 @@ int  afr_write_subvol_reset (call_frame_t *frame, xlator_t *this)  {          afr_local_t      *local = NULL; -        afr_inode_ctx_t  *ctx   = NULL; -        int               ret   = -1;          local = frame->local;          LOCK(&local->inode->lock);          { -                ret = __afr_inode_ctx_get (this, local->inode, &ctx); -                if (ret < 0) { -                        gf_msg (this->name, GF_LOG_ERROR, 0, -                                AFR_MSG_DICT_GET_FAILED, -                                "ERROR GETTING INODE CTX"); -                        UNLOCK(&local->inode->lock); -                        return ret; -                } -                ctx->write_subvol = 0; +                local->inode_ctx->lock_count--; + +                if (!local->inode_ctx->lock_count) +                        local->inode_ctx->write_subvol = 0;          }          UNLOCK(&local->inode->lock);          return 0;  } + +int +afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode) +{ +        int ret = 0; + +        local->inode = inode_ref (inode); +        LOCK(&local->inode->lock); +        { +                ret = __afr_inode_ctx_get (this, local->inode, +                                           &local->inode_ctx); +        } +        UNLOCK (&local->inode->lock); +        if (ret < 0) { +                gf_msg_callingfn (this->name, GF_LOG_ERROR, ENOMEM, +                                  AFR_MSG_INODE_CTX_GET_FAILED, +                                  "Error getting inode ctx %s", +                                  uuid_utoa (local->inode->gfid)); +        } +        return ret; +} diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 1afd5d39670..92876930e73 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -476,7 +476,7 @@ afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,  	if (!local->fd_ctx)  		goto out; -	local->inode = inode_ref (loc->inode); +        local->inode = inode_ref (loc->inode);  	local->parent = inode_ref (loc->parent);          local->op                = GF_FOP_CREATE; @@ -608,7 +608,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,  		goto out;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        local->inode = inode_ref (loc->inode);  	local->parent = inode_ref (loc->parent);          local->op               = GF_FOP_MKNOD; @@ -739,7 +739,7 @@ afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,  		goto out;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        local->inode = inode_ref (loc->inode);  	local->parent = inode_ref (loc->parent);          local->cont.mkdir.mode  = mode; @@ -876,7 +876,7 @@ afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,          loc_copy (&local->loc,    oldloc);          loc_copy (&local->newloc, newloc); -	local->inode = inode_ref (oldloc->inode); +        local->inode = inode_ref (oldloc->inode);  	local->parent = inode_ref (newloc->parent);          if (xdata) @@ -1004,7 +1004,7 @@ afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,  		goto out;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        local->inode = inode_ref (loc->inode);  	local->parent = inode_ref (loc->parent);          local->cont.symlink.linkpath = gf_strdup (linkpath); @@ -1141,7 +1141,7 @@ afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,          loc_copy (&local->loc,    oldloc);          loc_copy (&local->newloc, newloc); -	local->inode = inode_ref (oldloc->inode); +        local->inode = inode_ref (oldloc->inode);  	local->parent = inode_ref (oldloc->parent);  	local->parent2 = inode_ref (newloc->parent); @@ -1294,7 +1294,7 @@ afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,          loc_copy (&local->loc, loc);          local->xflag = xflag; -	local->inode = inode_ref (loc->inode); +        local->inode = inode_ref (loc->inode);  	local->parent = inode_ref (loc->parent);          if (xdata) @@ -1420,7 +1420,7 @@ afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        local->inode = inode_ref (loc->inode);  	local->parent = inode_ref (loc->parent);          local->cont.rmdir.flags = flags; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 8078f4403c0..89ec6ee4d61 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -510,6 +510,7 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,  {          afr_local_t *local = NULL;          int op_errno = ENOMEM; +        int ret = -1;  	local = AFR_FRAME_INIT (frame, op_errno);  	if (!local) @@ -532,7 +533,9 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,  		goto out;          local->fd = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {  		op_errno = ENOMEM; @@ -657,7 +660,9 @@ afr_truncate (call_frame_t *frame, xlator_t *this,          local->transaction.unwind = afr_truncate_unwind;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        ret = afr_set_inode_local (this, local, loc->inode); +        if (ret) +                goto out;          local->op = GF_FOP_TRUNCATE; @@ -771,7 +776,9 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,  		goto out;          local->fd = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;          local->op = GF_FOP_FTRUNCATE; @@ -889,7 +896,9 @@ afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,          local->transaction.unwind = afr_setattr_unwind;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        ret = afr_set_inode_local (this, local, loc->inode); +        if (ret) +                goto out;  	local->op = GF_FOP_SETATTR; @@ -994,7 +1003,9 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,          local->transaction.unwind = afr_fsetattr_unwind;          local->fd                 = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	local->op = GF_FOP_FSETATTR; @@ -1636,7 +1647,9 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,          local->transaction.unwind = afr_setxattr_unwind;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        ret = afr_set_inode_local (this, local, loc->inode); +        if (ret) +                goto out;          local->transaction.main_frame = frame;          local->transaction.start   = LLONG_MAX - 1; @@ -1748,7 +1761,9 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,          local->transaction.unwind = afr_fsetxattr_unwind;          local->fd                 = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	local->op = GF_FOP_FSETXATTR; @@ -1861,7 +1876,9 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,          local->transaction.unwind = afr_removexattr_unwind;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        ret = afr_set_inode_local (this, local, loc->inode); +        if (ret) +                goto out;  	local->op = GF_FOP_REMOVEXATTR; @@ -1968,7 +1985,9 @@ afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,          local->transaction.unwind = afr_fremovexattr_unwind;          local->fd = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	local->op = GF_FOP_FREMOVEXATTR; @@ -2063,7 +2082,9 @@ afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,          local->cont.fallocate.len = len;          local->fd = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	if (xdata)  		local->xdata_req = dict_copy_with_ref (xdata, NULL); @@ -2175,7 +2196,9 @@ afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,          local->cont.discard.len = len;          local->fd = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	if (xdata)  		local->xdata_req = dict_copy_with_ref (xdata, NULL); @@ -2284,7 +2307,9 @@ afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,          local->cont.zerofill.len = len;          local->fd = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	if (xdata)  		local->xdata_req = dict_copy_with_ref (xdata, NULL); @@ -2396,7 +2421,9 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,          local->transaction.unwind = afr_xattrop_unwind;          loc_copy (&local->loc, loc); -	local->inode = inode_ref (loc->inode); +        ret = afr_set_inode_local (this, local, loc->inode); +        if (ret) +                goto out;  	local->op = GF_FOP_XATTROP; @@ -2490,7 +2517,9 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,          local->transaction.unwind = afr_fxattrop_unwind;  	local->fd = fd_ref (fd); -	local->inode = inode_ref (fd->inode); +        ret = afr_set_inode_local (this, local, fd->inode); +        if (ret) +                goto out;  	local->op = GF_FOP_FXATTROP; diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index c17f60f62c4..f50c7b6464a 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -615,14 +615,14 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  {          afr_local_t             *local          = NULL;          afr_internal_lock_t     *int_lock       = NULL; -        afr_fd_ctx_t            *fd_ctx         = NULL; -        afr_private_t           *priv           = NULL;          int                      call_count     = 0;          int                      ret            = 0;          local    = frame->local;          int_lock = &local->internal_lock; -        priv = this->private; + +        if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1) +                ret = afr_write_subvol_reset (frame, this);          LOCK (&frame->lock);          { @@ -633,11 +633,6 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (call_count == 0) {                  gf_msg_trace (this->name, 0,                                "All internal locks unlocked"); -                if (local->fd) { -                        fd_ctx = afr_fd_ctx_get (local->fd, this); -                        if (0 == AFR_COUNT (fd_ctx->lock_acquired, priv->child_count)) -                                ret = afr_write_subvol_reset (frame, this); -                }                  int_lock->lock_cbk (frame, this);          } @@ -947,6 +942,15 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          } else {                                  int_lock->locked_nodes[child_index] |= LOCKED_YES;                                  int_lock->lock_count++; + +                                if (local->transaction.type == +                                    AFR_DATA_TRANSACTION) { +                                        LOCK(&local->inode->lock); +                                        { +                                                local->inode_ctx->lock_count++; +                                        } +                                        UNLOCK (&local->inode->lock); +                                }                          }                  }                  afr_lock_blocking (frame, this, cky + 1); @@ -1502,13 +1506,12 @@ int32_t  afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                               int32_t op_ret, int32_t op_errno, dict_t *xdata)  { -        afr_internal_lock_t *int_lock = NULL; -        afr_inodelk_t       *inodelk  = NULL; -        afr_local_t         *local    = NULL; -        int call_count  = 0; -        int child_index = (long) cookie; -        afr_fd_ctx_t        *fd_ctx = NULL; - +        afr_internal_lock_t *int_lock    = NULL; +        afr_inodelk_t       *inodelk     = NULL; +        afr_local_t         *local       = NULL; +        afr_fd_ctx_t        *fd_ctx      = NULL; +        int                  call_count  = 0; +        int                  child_index = (long) cookie;          local    = frame->local;          int_lock = &local->internal_lock; @@ -1553,6 +1556,15 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                          fd_ctx->lock_acquired[child_index]++;  				}  			} + +                        if (local->transaction.type == AFR_DATA_TRANSACTION && +                            op_ret == 0) { +                                LOCK(&local->inode->lock); +                                { +                                        local->inode_ctx->lock_count++; +                                } +                                UNLOCK (&local->inode->lock); +                        }  		}                  call_count = --int_lock->lk_call_count; diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index ad1021de2b4..743bd945821 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -65,7 +65,8 @@ GLFS_MSGID(AFR,          AFR_MSG_ADD_BRICK_STATUS,          AFR_MSG_NO_CHANGELOG,          AFR_MSG_TIMER_CREATE_FAIL, -        AFR_MSG_SBRAIN_FAV_CHILD_POLICY +        AFR_MSG_SBRAIN_FAV_CHILD_POLICY, +        AFR_MSG_INODE_CTX_GET_FAILED  );  #endif /* !_AFR_MESSAGES_H_ */ diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 440e8d1ed97..639f48687df 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -372,14 +372,27 @@ afr_txn_arbitrate_fop (call_frame_t *frame, xlator_t *this)  int  afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)  { -        afr_local_t     *local = NULL; -        afr_private_t   *priv = NULL; -        fd_t            *fd   = NULL; +        afr_local_t   *local = NULL; +        afr_private_t *priv  = NULL; +        fd_t          *fd    = NULL; +        int           i      = 0; +        int           ret    = 0;          local = frame->local;          priv = this->private;          fd    = local->fd; +        if (local->transaction.type == AFR_DATA_TRANSACTION && +            !local->transaction.inherited) { +                ret = afr_write_subvol_set (frame, this); +                if (ret) { +                        /*act as if operation failed on all subvols*/ +                        local->op_ret = -1; +                        local->op_errno = -ret; +                        for (i = 0; i < priv->child_count; i++) +                                local->transaction.failed_subvols[i] = 1; +                } +        }          /*  Perform fops with the lk-owner from top xlator.           *  Eg: lk-owner of posix-lk and flush should be same,           *  flush cant clear the  posix-lks without that lk-owner. @@ -1116,32 +1129,28 @@ unlock:  int  afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -		   int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +                   int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)  {          afr_local_t *local = NULL; -        afr_private_t *priv = NULL;          int call_count = -1;          int child_index = -1;          local = frame->local; -        priv = this->private;          child_index = (long) cookie; -	if (op_ret == -1) { +        if (op_ret == -1) {                  local->op_errno = op_errno; -		afr_transaction_fop_failed (frame, this, child_index); +                afr_transaction_fop_failed (frame, this, child_index);          } -        if (priv->arbiter_count == 1 && !op_ret) { -                if (xattr) -                        local->transaction.pre_op_xdata[child_index] = -                                                               dict_ref (xattr); -        } +        if (xattr) +                local->transaction.pre_op_xdata[child_index] = dict_ref (xattr); -	call_count = afr_frame_return (frame); +        call_count = afr_frame_return (frame); -        if (call_count == 0) -		local->transaction.changelog_resume (frame, this); +        if (call_count == 0) { +                local->transaction.changelog_resume (frame, this); +        }          return 0;  } @@ -1748,10 +1757,6 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)  	if (pre_nop)  		goto next; -        ret = afr_write_subvol_set (frame, this); -        if (ret) -                goto err; -  	if (!local->pre_op_compat) {  		dict_copy (xdata_req, local->xdata_req);  		goto next; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 7a7cb2e656f..fb8b3b1d136 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -376,6 +376,16 @@ typedef enum {          AFR_FOP_LOCK_QUORUM_FAILED,  } afr_fop_lock_state_t; +typedef struct _afr_inode_ctx { +        uint64_t        read_subvol; +        uint64_t        write_subvol; +        int             lock_count; +        int             spb_choice; +        gf_timer_t      *timer; +        gf_boolean_t    need_refresh; +} afr_inode_ctx_t; + +  typedef struct _afr_local {  	glusterfs_fop_t  op;          unsigned int call_count; @@ -832,17 +842,10 @@ typedef struct _afr_local {          compound_args_t *c_args;          gf_boolean_t is_read_txn; +        afr_inode_ctx_t *inode_ctx;  } afr_local_t; -typedef struct _afr_inode_ctx { -        uint64_t        read_subvol; -        uint64_t        write_subvol; -        int             spb_choice; -        gf_timer_t      *timer; -        gf_boolean_t    need_refresh; -} afr_inode_ctx_t; -  typedef struct afr_spbc_timeout {          call_frame_t *frame;          gf_boolean_t d_spb; @@ -1274,4 +1277,7 @@ afr_write_subvol_set (call_frame_t *frame, xlator_t *this);  int  afr_write_subvol_reset (call_frame_t *frame, xlator_t *this); + +int +afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode);  #endif /* __AFR_H__ */  | 
