summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2016-03-31 14:40:09 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-04-20 09:46:52 -0700
commitc55da44b49f0183948f464dae4a5e11d9ed63a24 (patch)
tree093594e45f732f703387eb8d6812a33a901bd601 /xlators
parente4aef8290e8aac8d7fa345db8703a9c3f95a9f66 (diff)
cluster/afr: Fix spurious entries in heal info
Problem: Locking schemes in afr-v1 were locking the directory/file completely during self-heal. Newer schemes of locking don't require Full directory, file locking. But afr-v2 still has compatibility code to work-well with older clients, where in entry-self-heal it takes a lock on a special 256 character name which can't be created on the fs. Similarly for data self-heal there used to be a lock on (LLONG_MAX-2, 1). Old locking scheme requires heal info to take sh-domain locks before examining heal-state. If it doesn't take sh-domain locks, then there is a possibility of heal-info hanging till self-heal completes because of compatibility locks. But the problem with heal-info taking sh-domain locks is that if two heal-info or shd, heal-info try to inspect heal state in parallel using trylocks on sh-domain, there is a possibility that both of them assuming a heal is in progress. This was leading to spurious entries being shown in heal-info. Fix: As long as there is afr-v1 way of locking, we can't fix this problem with simple solutions. If we know that the cluster is running newer versions of locking schemes, in those cases we can give accurate information in heal-info. So introduce a new option called 'locking-scheme' which if it is 'granular' will give correct information in heal-info. Not only that, Extra network hops for taking compatibility locks, sh-domain locks in heal info will not be necessary anymore. Thus it improves performance. >BUG: 1322850 >Change-Id: Ia563c5f096b5922009ff0ec1c42d969d55d827a3 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> >Reviewed-on: http://review.gluster.org/13873 >Smoke: Gluster Build System <jenkins@build.gluster.com> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >CentOS-regression: Gluster Build System <jenkins@build.gluster.com> >Reviewed-by: Ashish Pandey <aspandey@redhat.com> >Reviewed-by: Anuradha Talur <atalur@redhat.com> >Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com> >(cherry picked from commit b6a0780d86e7c6afe7ae0d9a87e6fe5c62b4d792) Change-Id: If7eee18843b48bbeff4c1355c102aa572b2c155a BUG: 1294675 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/14039 Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c32
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c17
-rw-r--r--xlators/cluster/afr/src/afr.c14
-rw-r--r--xlators/cluster/afr/src/afr.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c7
5 files changed, 54 insertions, 17 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index b799bffe697..c79ec06972a 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4554,8 +4554,11 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
afr_private_t *priv = NULL;
fd_t *fd = NULL;
struct afr_reply *locked_replies = NULL;
+ gf_boolean_t granular_locks = _gf_false;
priv = this->private;
+ if (strcmp ("granular", priv->locking_scheme) == 0)
+ granular_locks = _gf_true;
locked_on = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
sources = alloca0 (priv->child_count);
@@ -4576,10 +4579,12 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
- ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain,
- 0, 0, locked_on);
+ if (!granular_locks) {
+ ret = afr_selfheal_tryinodelk (frame, this, inode,
+ priv->sh_domain, 0, 0, locked_on);
+ }
{
- if (ret == 0) {
+ if (!granular_locks && (ret == 0)) {
ret = -afr_final_errno (frame->local, priv);
if (ret == 0)
ret = -ENOTCONN;/* all invalid responses */
@@ -4606,8 +4611,9 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
data_lock);
}
unlock:
- afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
- locked_on);
+ if (!granular_locks)
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0,
+ 0, locked_on);
out:
if (locked_replies)
afr_replies_wipe (locked_replies, priv->child_count);
@@ -4630,8 +4636,11 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
struct afr_reply *locked_replies = NULL;
+ gf_boolean_t granular_locks = _gf_false;
priv = this->private;
+ if (strcmp ("granular", priv->locking_scheme) == 0)
+ granular_locks = _gf_true;
locked_on = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
sources = alloca0 (priv->child_count);
@@ -4640,10 +4649,12 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
- ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain,
- NULL, locked_on);
+ if (!granular_locks) {
+ ret = afr_selfheal_tryentrylk (frame, this, inode,
+ priv->sh_domain, NULL, locked_on);
+ }
{
- if (ret == 0) {
+ if (!granular_locks && ret == 0) {
ret = -afr_final_errno (frame->local, priv);
if (ret == 0)
ret = -ENOTCONN;/* all invalid responses */
@@ -4673,8 +4684,9 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
data_lock);
}
unlock:
- afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,
- locked_on);
+ if (!granular_locks)
+ afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain,
+ NULL, locked_on);
out:
if (locked_replies)
afr_replies_wipe (locked_replies, priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a93c6400e0a..30db66b5fe9 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -749,8 +749,11 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
unsigned char *long_name_locked = NULL;
fd_t *fd = NULL;
int ret = 0;
+ gf_boolean_t granular_locks = _gf_false;
priv = this->private;
+ if (strcmp ("granular", priv->locking_scheme) == 0)
+ granular_locks = _gf_true;
fd = afr_selfheal_data_opendir (this, inode);
if (!fd)
@@ -777,10 +780,13 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
goto unlock;
}
- ret = afr_selfheal_tryentrylk (frame, this, inode, this->name,
- LONG_FILENAME, long_name_locked);
+ if (!granular_locks) {
+ ret = afr_selfheal_tryentrylk (frame, this, inode,
+ this->name, LONG_FILENAME,
+ long_name_locked);
+ }
{
- if (ret < 1) {
+ if (!granular_locks && ret < 1) {
gf_msg_debug (this->name, 0, "%s: Skipping"
" entry self-heal as only %d "
"sub-volumes could be "
@@ -793,8 +799,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
}
ret = __afr_selfheal_entry (frame, this, fd, locked_on);
}
- afr_selfheal_unentrylk (frame, this, inode, this->name,
- LONG_FILENAME, long_name_locked);
+ if (!granular_locks)
+ afr_selfheal_unentrylk (frame, this, inode, this->name,
+ LONG_FILENAME, long_name_locked);
}
unlock:
afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, locked_on);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index c47e6377a4c..ad92eaf0c3e 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -186,7 +186,10 @@ reconfigure (xlator_t *this, dict_t *options)
priv->read_child = index;
}
- GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool, out);
+ GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool,
+ out);
+ GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
+ out);
GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
@@ -377,6 +380,7 @@ init (xlator_t *this)
GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
+ GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
GF_OPTION_INIT ("quorum-type", qtype, str, out);
@@ -862,5 +866,13 @@ struct volume_options options[] = {
.description = "This option can be used to control number of heals"
" that can wait in SHD per subvolume",
},
+ { .key = {"locking-scheme"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "full", "granular"},
+ .default_value = "full",
+ .description = "If this option is set to granular, self-heal will "
+ "stop being compatible with afr-v1, which helps afr "
+ "be more granular while self-healing",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 1a08ff52dfd..d3c1e1066b6 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -144,6 +144,7 @@ typedef struct _afr_private {
/* pump dependencies */
void *pump_private;
gf_boolean_t use_afr_in_pump;
+ char *locking_scheme;
} afr_private_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index d15ee835a4e..b3fdf1ad5f9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2717,7 +2717,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_7_12,
.flags = OPT_FLAG_CLIENT_OPT
},
-
+ { .key = "cluster.locking-scheme",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = NULL
}
};