diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2016-05-18 14:37:46 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-05-24 01:23:46 -0700 | 
| commit | 86a87a2ec0984f450b36ae6414c2d6d66870af73 (patch) | |
| tree | d70f8dc5f37f252c9f28eba63ac58646e34ca1fe | |
| parent | b01fb8d3bb9772d94073aaa52b2d8210ac4fabb8 (diff) | |
cluster/afr: Check for required number of entrylks
Problem:
Parallel rmdir operations on the same directory results in ENOTCONN messages
eventhough there was no network disconnect.
In blocking entry lock during rmdir, AFR takes 2 set of locks on all its
children-One (parentdir,name of dir to be deleted), the other (full lock
on the dir being deleted). We proceed to pre-op stage even if only a single
lock (but not all the needed locks) was obtained, only to fail it with ENOTCONN
because afr_locked_nodes_get() returns zero nodes  in afr_changelog_pre_op().
Fix:
After we get replies for all blocking lock requests, if we don't have
the minimum number of locks to carry out the FOP, unlock and fail the
FOP. The op_errno will be that of the last failed reply we got, i.e.
whatever is set in afr_lock_cbk().
Change-Id: Ibef25e65b468ebb5ea6ae1f5121a5f1201072293
BUG: 1336381
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/14358
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 88 | 
1 files changed, 83 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 53bb7920089..c2a5f526c08 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1029,6 +1029,88 @@ _is_lock_wind_needed (afr_local_t *local, int child_index)          return _gf_true;  } +static void +afr_log_entry_locks_failure(xlator_t *this, afr_local_t *local, +                            afr_internal_lock_t *int_lock) +{ +        const char *fop = NULL; +        char *pargfid = NULL; +        const char *name = NULL; + +        fop = gf_fop_list[local->op]; + +        switch (local->op) { +        case GF_FOP_LINK: +                pargfid = uuid_utoa(local->newloc.pargfid); +                name = local->newloc.name; +                break; +        default: +                pargfid = uuid_utoa(local->loc.pargfid); +                name = local->loc.name; +                break; +        } + +        gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED, +                "Unable to obtain sufficient blocking entry locks on at least " +                "one child while attempting %s on {pgfid:%s, name:%s}.", fop, +                pargfid, name); +} + +static gf_boolean_t +is_blocking_locks_count_sufficient (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t  *local = NULL; +        afr_private_t *priv = NULL; +        afr_internal_lock_t *int_lock = NULL; +        gf_boolean_t is_entrylk = _gf_false; +        int child = 0; +        int nlockee = 0; +        int lockee_count = 0; +        gf_boolean_t ret = _gf_true; + +        local = frame->local; +        priv = this->private; +        int_lock = &local->internal_lock; +        lockee_count = int_lock->lockee_count; +        is_entrylk = afr_is_entrylk (int_lock, local->transaction.type); + +        if (!is_entrylk) { +                if (int_lock->lock_count == 0) { +                        gf_msg (this->name, GF_LOG_WARNING, 0, +                                AFR_MSG_BLOCKING_LKS_FAILED, "Unable to obtain " +                                "blocking inode lock on even one child for " +                                "gfid:%s.", uuid_utoa (local->inode->gfid)); +                        return _gf_false; +                } else { +                        /*inodelk succeded on atleast one child. */ +                        return _gf_true; +                } + +        } else { +                if (int_lock->entrylk_lock_count == 0) { +                        afr_log_entry_locks_failure (this, local, int_lock); +                        return _gf_false; +                } +                /* For FOPS that take multiple sets of locks (mkdir, rename), +                 * there must be atleast one brick on which the locks from +                 * all lock sets were successful. */ +                for (child = 0; child < priv->child_count; child++) { +                        ret = _gf_true; +                        for (nlockee = 0; nlockee < lockee_count; nlockee++) { +                                if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES)) +                                        ret = _gf_false; +                        } +                        if (ret) +                                return ret; +                } +                if (!ret) +                        afr_log_entry_locks_failure (this, local, int_lock); +        } + +        return ret; + +} +  int  afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)  { @@ -1079,11 +1161,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)          }          if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { -                if ((is_entrylk && int_lock->entrylk_lock_count == 0) || -                    (!is_entrylk && int_lock->lock_count == 0)) { -                        gf_msg (this->name, GF_LOG_INFO, 0, -                                AFR_MSG_BLOCKING_LKS_FAILED, -                                "unable to lock on even one child"); +                if (!is_blocking_locks_count_sufficient (frame, this)) {                          local->op_ret           = -1;                          int_lock->lock_op_ret   = -1;  | 
