From 2b1bf891f550487d2c05fc9e377f0d15d4a08d0a Mon Sep 17 00:00:00 2001 From: Venkatesh Somyajula Date: Fri, 23 Nov 2012 18:12:52 +0530 Subject: Cluster/afr: Fix output for gluster volume heal vn info healed Problem: Whenever gluster volume heal vol full command is executed, the entries stored in the circual buffer for sh->healed are added in the dictionary in the _crawl_post_sh_action function irrespective of whether actual self heal (due to non-zero values in chage log) takes place or not. Fix: Value of key (actual-sh-done) will be set to 1 whenever self heal takes place due to non-zero change log values and if for some FOP self heal daemon finds that no self heal required after examining the pending matrix, the value will be 0. Change-Id: I11fd0b9ee76759af17c5bca6bfafbaf66bcaacbc BUG: 863068 Signed-off-by: Venkatesh Somyajula Reviewed-on: http://review.gluster.org/4181 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- tests/bugs/bug-863068.t | 76 ++++++++++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 11 ++++ xlators/cluster/afr/src/afr-self-heal-common.c | 5 ++ xlators/cluster/afr/src/afr-self-heal-data.c | 2 + xlators/cluster/afr/src/afr-self-heal-entry.c | 1 + xlators/cluster/afr/src/afr-self-heal-metadata.c | 1 + xlators/cluster/afr/src/afr-self-heald.c | 48 ++++++++++----- xlators/cluster/afr/src/afr.h | 2 +- 8 files changed, 129 insertions(+), 17 deletions(-) create mode 100644 tests/bugs/bug-863068.t diff --git a/tests/bugs/bug-863068.t b/tests/bugs/bug-863068.t new file mode 100644 index 00000000000..56041b15dd3 --- /dev/null +++ b/tests/bugs/bug-863068.t @@ -0,0 +1,76 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc + +cleanup; + +## This function get the No. of entries for +## gluster volume heal volnmae info healed command for brick1 and brick2 +## and compare the initial value (Before volume heal full) and final value +## (After gluster volume heal vol full) and compare. + +function getdiff() +{ + val=10 + if [ "$1" == "$3" ] + then + if [ "$2" == "$4" ] + then + val=0 + else + val=20 + fi + fi + + echo $val +} + + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2; +TEST $CLI volume start $V0; +mount -t glusterfs $H0:/$V0 $M0; +B0_hiphenated=`echo $B0 | tr '/' '-'` +kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ; + +mkdir $M0/{a,b,c}; +echo "GLUSTERFS" >> $M0/a/file; + +TEST $CLI volume start $V0 force; +sleep 5 +TEST $CLI volume heal $V0 full; +sleep 5 + +##First Brick Initial(Before full type self heal) value +FBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1` + +##Second Brick Initial Value +SBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1` +TEST $CLI volume heal $V0 full; + +sleep 5 + +##First Brick Final value +##Number of entries from output of + +FBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1` + +##Second Brick Final Value +SBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1` + +##get the difference of values +EXPECT "0" getdiff $FBI $SBI $FBF $SBF; + +## Tests after this comment checks for the background self heal + +TEST mkdir $M0/d +kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ; +TEST $CLI volume set $V0 self-heal-daemon off +dd if=/dev/random of=$M0/d/file1 bs=100M count=1 2>/dev/null; +TEST $CLI volume start $V0 force +sleep 3 +TEST ls -l $M0/d + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 21272c0d728..693c3a070a2 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1624,6 +1624,17 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this, if (ret) gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set " "sh-failed to %d", local->loc.path, sh_failed); + + if (local->self_heal.actual_sh_started == _gf_true && + sh_failed == 0) { + ret = dict_set_int32 (xattr, "actual-sh-done", 1); + if (ret) + gf_log(this->name, GF_LOG_ERROR, "%s: Failed to" + " set actual-sh-done to %d", + local->loc.path, + local->self_heal.actual_sh_started); + } + } out: AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b6593a7ec6f..49484bf7b7d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2156,6 +2156,8 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) afr_private_t * priv = NULL; afr_local_t * local = NULL; afr_self_heal_t * sh = NULL; + afr_local_t * orig_frame_local = NULL; + afr_self_heal_t * orig_frame_sh = NULL; char sh_type_str[256] = {0,}; gf_boolean_t split_brain = _gf_false; @@ -2189,6 +2191,9 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) FRAME_SU_UNDO (bgsh_frame, afr_local_t); if (!sh->unwound && sh->unwind) { + orig_frame_local = sh->orig_frame->local; + orig_frame_sh = &orig_frame_local->self_heal; + orig_frame_sh->actual_sh_started = _gf_true; sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, sh->op_failed); } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 6501e596c32..42bc0afc18d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -666,6 +666,8 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) "self-healing file %s from subvolume %s to %d other", local->loc.path, priv->children[sh->source]->name, sh->active_sinks); + + sh->actual_sh_started = _gf_true; afr_sh_data_trim_sinks (frame, this); } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 60c140c1a66..0c70d4b6e60 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -2234,6 +2234,7 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this) "merging all entries as a conservative decision", local->loc.path); + sh->actual_sh_started = _gf_true; afr_sh_entry_open (frame, this); return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 6e98f2a6375..647ee47a178 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -341,6 +341,7 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this) local->loc.path, priv->children[source]->name, sh->active_sinks); + sh->actual_sh_started = _gf_true; STACK_WIND (frame, afr_sh_metadata_getxattr_cbk, priv->children[source], priv->children[source]->fops->getxattr, diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index b3dee3f592c..214c0fff44e 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -307,7 +307,7 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, shd_event_t *event = NULL; int32_t sh_failed = 0; gf_boolean_t split_brain = 0; - + int32_t actual_sh_done = 0; priv = this->private; shd = &priv->shd; if (crawl_data->crawl == INDEX) { @@ -328,26 +328,42 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, } } - if (xattr_rsp) + if (xattr_rsp) { ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed); + ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done); + } + split_brain = afr_is_split_brain (this, child->inode); - if ((op_ret < 0 && op_errno == EIO) || split_brain) + + if ((op_ret < 0 && op_errno == EIO) || split_brain) { eh = shd->split_brain; - else if ((op_ret < 0) || sh_failed) + } else if ((op_ret < 0) || sh_failed) { eh = shd->heal_failed; - else - eh = shd->healed; + } else if (actual_sh_done == 1) { + eh = shd->healed; + } + ret = -1; - event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t); - if (!event) - goto out; - event->child = crawl_data->child; - event->path = path; - ret = eh_save_history (eh, event); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to " - "event history, (%d, %s)", path, op_ret, strerror (op_errno)); - goto out; + + if (eh != NULL) { + event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t); + if (!event) + goto out; + event->child = crawl_data->child; + event->path = path; + + ret = eh_save_history (eh, event); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save " + "to event history, (%d, %s)", path, op_ret, + strerror (op_errno)); + + goto out; + } + } else { + gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ", + path); + } ret = 0; out: diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 3f2bbbebc24..a0d1f3a7466 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -241,7 +241,7 @@ typedef struct { gf_boolean_t entries_skipped; int op_failed; - + gf_boolean_t actual_sh_started; gf_boolean_t sync_done; gf_boolean_t data_lock_held; gf_boolean_t eof_reached; -- cgit