summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2016-03-17 19:42:00 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-04-16 22:46:21 -0700
commit6b88d97c4a9e999180d77463e38ad14fc9d944cf (patch)
treeeba5134f30864dc9352770bbfc34e1577e250914
parentbd8ab0567017fd4dddfea738eae0bccc40b85527 (diff)
cluster/afr: Fix partial heals in 3-way replication
Problem: When there are 2 sources and one sink and if two self-heal daemons try to acquire locks at the same time, there is a chance that it gets a lock on one source and sink leading partial to heal. This will need one more heal from the remaining source to sink for the complete self-heal. This is not optimal. Fix: Upgrade non-blocking locks to blocking lock on all the subvolumes, if the number of locks acquired is majority and there were eagains. >BUG: 1318751 >Change-Id: Iae10b8d3402756c4164b98cc49876056ff7a61e5 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> >Reviewed-on: http://review.gluster.org/13766 >Smoke: Gluster Build System <jenkins@build.gluster.com> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >CentOS-regression: Gluster Build System <jenkins@build.gluster.com> >Reviewed-by: Ravishankar N <ravishankar@redhat.com> >(cherry picked from commit 8deedef565df49def75083678f8d1558c7b1f7d3) Change-Id: Ia164360dc1474a717f63633f5deb2c39cc15017c BUG: 1327863 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/14008 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c121
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c10
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c7
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h13
5 files changed, 138 insertions, 15 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index b3dbc95218c..130388dd75e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -727,11 +727,14 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
void
afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
- int source, unsigned char *healed_sinks)
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks)
{
char *status = NULL;
char *sinks_str = NULL;
char *p = NULL;
+ char *sources_str = NULL;
+ char *q = NULL;
afr_private_t *priv = NULL;
gf_loglevel_t loglevel = GF_LOG_NONE;
int i = 0;
@@ -739,10 +742,18 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
priv = this->private;
sinks_str = alloca0 (priv->child_count * 8);
p = sinks_str;
+ sources_str = alloca0 (priv->child_count * 8);
+ q = sources_str;
for (i = 0; i < priv->child_count; i++) {
- if (!healed_sinks[i])
- continue;
- p += sprintf (p, "%d ", i);
+ if (healed_sinks[i])
+ p += sprintf (p, "%d ", i);
+ if (sources[i]) {
+ if (source == i) {
+ q += sprintf (q, "[%d] ", i);
+ } else {
+ q += sprintf (q, "%d ", i);
+ }
+ }
}
if (ret < 0) {
@@ -755,8 +766,8 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
gf_msg (this->name, loglevel, 0,
AFR_MSG_SELF_HEAL_INFO, "%s %s selfheal on %s. "
- "source=%d sinks=%s", status, type, uuid_utoa (gfid),
- source, sinks_str);
+ "sources=%s sinks=%s", status, type, uuid_utoa (gfid),
+ sources_str, sinks_str);
}
int
@@ -1012,6 +1023,67 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
return afr_locked_fill (frame, this, locked_on);
}
+static void
+afr_get_lock_and_eagain_counts (afr_private_t *priv, struct afr_reply *replies,
+ int *lock_count, int *eagain_count)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == 0) {
+ (*lock_count)++;
+ } else if (replies[i].op_ret == -1 &&
+ replies[i].op_errno == EAGAIN) {
+ (*eagain_count)++;
+ }
+ }
+}
+
+/*Do blocking locks if number of locks acquired is majority and there were some
+ * EAGAINs. Useful for odd-way replication*/
+int
+afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on)
+{
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count,
+ &eagain_count);
+
+ if (lock_count > priv->child_count/2 && eagain_count) {
+ afr_locked_fill (frame, this, locked_on);
+ afr_selfheal_uninodelk (frame, this, inode, dom, off,
+ size, locked_on);
+
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLKW, &flock, NULL);
+ }
+
+ loc_wipe (&loc);
+
+ return afr_locked_fill (frame, this, locked_on);
+}
int
afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
@@ -1092,6 +1164,43 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
return afr_locked_fill (frame, this, locked_on);
}
+int
+afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on)
+{
+ loc_t loc = {0,};
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count,
+ &eagain_count);
+
+ if (lock_count > priv->child_count/2 && eagain_count) {
+ afr_locked_fill (frame, this, locked_on);
+ afr_selfheal_unentrylk (frame, this, inode, dom, name,
+ locked_on);
+
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
+
+ loc_wipe (&loc);
+
+ return afr_locked_fill (frame, this, locked_on);
+}
+
int
afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 332471c87e6..67bb8e6d3c9 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -786,7 +786,7 @@ out:
if (did_sh)
afr_log_selfheal (fd->inode->gfid, this, ret, "data", source,
- healed_sinks);
+ sources, healed_sinks);
else
ret = 1;
@@ -844,8 +844,9 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
locked_on = alloca0 (priv->child_count);
- ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
- locked_on);
+ ret = afr_selfheal_tie_breaker_inodelk (frame, this, inode,
+ priv->sh_domain, 0, 0,
+ locked_on);
{
if (ret < AFR_SH_MIN_PARTICIPANTS) {
gf_msg_debug (this->name, 0, "%s: Skipping "
@@ -864,7 +865,8 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = __afr_selfheal_data (frame, this, fd, locked_on);
}
unlock:
- afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on);
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
if (fd)
fd_unref (fd);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 0837e5a30fa..a93c6400e0a 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -704,7 +704,7 @@ postop_unlock:
out:
if (did_sh)
afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source,
- healed_sinks);
+ sources, healed_sinks);
else
ret = 1;
@@ -759,8 +759,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
locked_on = alloca0 (priv->child_count);
long_name_locked = alloca0 (priv->child_count);
- ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL,
- locked_on);
+ ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode,
+ priv->sh_domain, NULL,
+ locked_on);
{
if (ret < AFR_SH_MIN_PARTICIPANTS) {
gf_msg_debug (this->name, 0, "%s: Skipping "
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 778f2a1e0da..d6daadc2ba6 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -427,7 +427,7 @@ unlock:
if (did_sh)
afr_log_selfheal (inode->gfid, this, ret, "metadata", source,
- healed_sinks);
+ sources, healed_sinks);
else
ret = 1;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index b0f545f2816..e0a33418ae7 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -113,6 +113,11 @@ afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *locked_on);
int
+afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on);
+
+int
afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
char *dom, off_t off, size_t size,
const unsigned char *locked_on);
@@ -126,6 +131,11 @@ afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
char *dom, const char *name, unsigned char *locked_on);
int
+afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on);
+
+int
afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
char *dom, const char *name, unsigned char *locked_on);
@@ -197,7 +207,8 @@ afr_success_count (struct afr_reply *replies, unsigned int count);
void
afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
- int source, unsigned char *healed_sinks);
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks);
void
afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,