diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2016-03-17 19:42:00 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-04-16 22:46:21 -0700 | 
| commit | 6b88d97c4a9e999180d77463e38ad14fc9d944cf (patch) | |
| tree | eba5134f30864dc9352770bbfc34e1577e250914 /xlators/cluster/afr/src | |
| parent | bd8ab0567017fd4dddfea738eae0bccc40b85527 (diff) | |
cluster/afr: Fix partial heals in 3-way replication
Problem:
When there are 2 sources and one sink and if two self-heal daemons
try to acquire locks at the same time, there is a chance that it
gets a lock on one source and sink leading partial to heal. This will
need one more heal from the remaining source to sink for the complete
self-heal. This is not optimal.
Fix:
Upgrade non-blocking locks to blocking lock on all the subvolumes, if
the number of locks acquired is majority and there were eagains.
 >BUG: 1318751
 >Change-Id: Iae10b8d3402756c4164b98cc49876056ff7a61e5
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/13766
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
 >Reviewed-by: Ravishankar N <ravishankar@redhat.com>
 >(cherry picked from commit 8deedef565df49def75083678f8d1558c7b1f7d3)
Change-Id: Ia164360dc1474a717f63633f5deb2c39cc15017c
BUG: 1327863
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/14008
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 121 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 10 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 13 | 
5 files changed, 138 insertions, 15 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b3dbc95218c..130388dd75e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -727,11 +727,14 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,  void  afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, -                  int source, unsigned char *healed_sinks) +                  int source, unsigned char *sources, +                  unsigned char *healed_sinks)  {          char *status = NULL;          char *sinks_str = NULL;          char *p = NULL; +        char *sources_str = NULL; +        char *q = NULL;          afr_private_t *priv = NULL;          gf_loglevel_t loglevel = GF_LOG_NONE;          int i = 0; @@ -739,10 +742,18 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,          priv = this->private;          sinks_str = alloca0 (priv->child_count * 8);          p = sinks_str; +        sources_str = alloca0 (priv->child_count * 8); +        q = sources_str;          for (i = 0; i < priv->child_count; i++) { -                if (!healed_sinks[i]) -                        continue; -                p += sprintf (p, "%d ", i); +                if (healed_sinks[i]) +                        p += sprintf (p, "%d ", i); +                if (sources[i]) { +                        if (source == i) { +                                q += sprintf (q, "[%d] ", i); +                        } else { +                                q += sprintf (q, "%d ", i); +                        } +                }          }          if (ret < 0) { @@ -755,8 +766,8 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,          gf_msg (this->name, loglevel, 0,                  AFR_MSG_SELF_HEAL_INFO, "%s %s selfheal on %s. " -                "source=%d sinks=%s", status, type, uuid_utoa (gfid), -                source, sinks_str); +                "sources=%s sinks=%s", status, type, uuid_utoa (gfid), +                sources_str, sinks_str);  }  int @@ -1012,6 +1023,67 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	return afr_locked_fill (frame, this, locked_on);  } +static void +afr_get_lock_and_eagain_counts (afr_private_t *priv, struct afr_reply *replies, +                                int *lock_count, int *eagain_count) +{ +	int i = 0; + +	for (i = 0; i < priv->child_count; i++) { +	        if (!replies[i].valid) +	                continue; +	        if (replies[i].op_ret == 0) { +	                (*lock_count)++; +                } else if (replies[i].op_ret == -1 && +		         replies[i].op_errno == EAGAIN) { +		        (*eagain_count)++; +                } +	} +} + +/*Do blocking locks if number of locks acquired is majority and there were some + * EAGAINs. Useful for odd-way replication*/ +int +afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this, +                                  inode_t *inode, char *dom, off_t off, +                                  size_t size, unsigned char *locked_on) +{ +	loc_t loc = {0,}; +	struct gf_flock flock = {0, }; +	afr_local_t *local = NULL; +	afr_private_t *priv = NULL; +	int lock_count = 0; +	int eagain_count = 0; + +	priv = this->private; +	local = frame->local; + +	loc.inode = inode_ref (inode); +	gf_uuid_copy (loc.gfid, inode->gfid); + +	flock.l_type = F_WRLCK; +	flock.l_start = off; +	flock.l_len = size; + +	AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, +		   &loc, F_SETLK, &flock, NULL); + +        afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, +                                        &eagain_count); + +	if (lock_count > priv->child_count/2 && eagain_count) { +                afr_locked_fill (frame, this, locked_on); +                afr_selfheal_uninodelk (frame, this, inode, dom, off, +                                        size, locked_on); + +                AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom, +                         &loc, F_SETLKW, &flock, NULL); +        } + +	loc_wipe (&loc); + +	return afr_locked_fill (frame, this, locked_on); +}  int  afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, @@ -1092,6 +1164,43 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	return afr_locked_fill (frame, this, locked_on);  } +int +afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this, +                                  inode_t *inode, char *dom, const char *name, +                                  unsigned char *locked_on) +{ +	loc_t loc = {0,}; +	afr_local_t *local = NULL; +	afr_private_t *priv = NULL; +	int lock_count = 0; +	int eagain_count = 0; + +	priv = this->private; +	local = frame->local; + +	loc.inode = inode_ref (inode); +	gf_uuid_copy (loc.gfid, inode->gfid); + +	AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, +		   name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + +        afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, +                                        &eagain_count); + +	if (lock_count > priv->child_count/2 && eagain_count) { +                afr_locked_fill (frame, this, locked_on); +                afr_selfheal_unentrylk (frame, this, inode, dom, name, +                                        locked_on); + +                AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom, +                         &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); +	} + +	loc_wipe (&loc); + +	return afr_locked_fill (frame, this, locked_on); +} +  int  afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 332471c87e6..67bb8e6d3c9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -786,7 +786,7 @@ out:          if (did_sh)                  afr_log_selfheal (fd->inode->gfid, this, ret, "data", source, -                                  healed_sinks); +                                  sources, healed_sinks);          else                  ret = 1; @@ -844,8 +844,9 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)  	locked_on = alloca0 (priv->child_count); -	ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0, -				       locked_on); +	ret = afr_selfheal_tie_breaker_inodelk (frame, this, inode, +	                                        priv->sh_domain, 0, 0, +				                locked_on);  	{  		if (ret < AFR_SH_MIN_PARTICIPANTS) {                          gf_msg_debug (this->name, 0, "%s: Skipping " @@ -864,7 +865,8 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)  		ret = __afr_selfheal_data (frame, this, fd, locked_on);  	}  unlock: -	afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on); +	afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, +	                        locked_on);  	if (fd)  		fd_unref (fd); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 0837e5a30fa..a93c6400e0a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -704,7 +704,7 @@ postop_unlock:  out:          if (did_sh)                  afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source, -                                  healed_sinks); +                                  sources, healed_sinks);          else                  ret = 1; @@ -759,8 +759,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)  	locked_on = alloca0 (priv->child_count);  	long_name_locked = alloca0 (priv->child_count); -	ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL, -				       locked_on); +	ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode, +	                                        priv->sh_domain, NULL, +                                                locked_on);  	{  		if (ret < AFR_SH_MIN_PARTICIPANTS) {                          gf_msg_debug (this->name, 0, "%s: Skipping " diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 778f2a1e0da..d6daadc2ba6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -427,7 +427,7 @@ unlock:          if (did_sh)                  afr_log_selfheal (inode->gfid, this, ret, "metadata", source, -                                  healed_sinks); +                                  sources, healed_sinks);          else                  ret = 1; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index b0f545f2816..e0a33418ae7 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -113,6 +113,11 @@ afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,  			 unsigned char *locked_on);  int +afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this, +                                  inode_t *inode, char *dom, off_t off, +                                  size_t size, unsigned char *locked_on); + +int  afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,  			char *dom, off_t off, size_t size,  			const unsigned char *locked_on); @@ -126,6 +131,11 @@ afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,  			 char *dom, const char *name, unsigned char *locked_on);  int +afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this, +                                  inode_t *inode, char *dom, const char *name, +                                  unsigned char *locked_on); + +int  afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,  			char *dom, const char *name, unsigned char *locked_on); @@ -197,7 +207,8 @@ afr_success_count (struct afr_reply *replies, unsigned int count);  void  afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, -                  int source, unsigned char *healed_sinks); +                  int source, unsigned char *sources, +                  unsigned char *healed_sinks);  void  afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,  | 
