summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith K <pranithk@gluster.com>2011-02-08 02:22:52 +0000
committerAnand V. Avati <avati@dev.gluster.com>2011-02-17 23:32:56 -0800
commit52cf9e992749a7cd5b2411581eff4c052d055ea9 (patch)
tree932da5f539a2c8d7c54ae86d991e63088fdf56d0
parente77eae0f04b90ad9722ce82d59fd552cae63c347 (diff)
cluster/afr: stop spawning self-heal loops when the self-heal fails
With the current model of self-heal, any loop wont resume if a self-heal failure happens. I have added the fix to stop spawning the initial loops on self-heal failure. Fixed the invalid read shown in valgrind in diff-self-heal. Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 1174 (Replicate spawns read loops even after destination fails) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1174
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c36
1 files changed, 24 insertions, 12 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index a65fae0ac..f72da7741 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -219,7 +219,6 @@ sh_full_read_cbk (call_frame_t *rw_frame, void *cookie,
if (op_ret <= 0) {
sh->op_failed = 1;
-
sh_full_loop_return (rw_frame, this, offset);
return 0;
}
@@ -353,8 +352,13 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_
UNLOCK (&sh_priv->lock);
while (loop--) {
- sh_full_read_write (frame, this, offset);
- offset += block_size;
+ if (sh->op_failed) {
+ // op failed in other loop, stop spawning more loops
+ sh_full_loop_driver (frame, this, _gf_false);
+ } else {
+ sh_full_read_write (frame, this, offset);
+ offset += block_size;
+ }
}
if (is_driver_done) {
@@ -492,12 +496,16 @@ sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this)
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
afr_sh_algo_diff_private_t *sh_priv = NULL;
+ int32_t total_blocks = 0;
+ int32_t diff_blocks = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+ total_blocks = sh_priv->total_blocks;
+ diff_blocks = sh_priv->diff_blocks;
sh_diff_private_cleanup (frame, this);
if (sh->op_failed) {
@@ -514,9 +522,8 @@ sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_NORMAL,
"diff self-heal on %s: %d blocks of %d were different (%.2f%%)",
- local->loc.path, sh_priv->diff_blocks,
- sh_priv->total_blocks,
- ((sh_priv->diff_blocks * 1.0)/sh_priv->total_blocks) * 100);
+ local->loc.path, diff_blocks, total_blocks,
+ ((diff_blocks * 1.0)/total_blocks) * 100);
local->self_heal.algo_completion_cbk (frame, this);
}
@@ -1014,8 +1021,13 @@ sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
UNLOCK (&sh_priv->lock);
while (loop--) {
- sh_diff_checksum (frame, this, offset);
- offset += block_size;
+ if (sh->op_failed) {
+ // op failed in other loop, stop spawning more loops
+ sh_diff_loop_driver (frame, this, _gf_false, NULL);
+ } else {
+ sh_diff_checksum (frame, this, offset);
+ offset += block_size;
+ }
}
if (is_driver_done) {