diff options
| -rw-r--r-- | tests/afr.rc | 12 | ||||
| -rwxr-xr-x | tests/bugs/bug-1015990-rep.t | 14 | ||||
| -rwxr-xr-x | tests/bugs/bug-1015990.t | 30 | ||||
| -rw-r--r-- | tests/bugs/bug-1101647.t | 8 | ||||
| -rw-r--r-- | tests/bugs/bug-1190069-afr-stale-index-entries.t | 57 | ||||
| -rw-r--r-- | tests/bugs/bug-874498.t | 2 | ||||
| -rw-r--r-- | tests/bugs/bug-957877.t | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 14 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 55 | 
9 files changed, 137 insertions, 57 deletions
diff --git a/tests/afr.rc b/tests/afr.rc index 938f9c07ab2..50067274d89 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -73,11 +73,9 @@ function is_file_heal_done {  function count_sh_entries()  { -    val1=0 -    for g in `ls $1/.glusterfs/indices/xattrop` -    do -	val1=$(( val1 + 1 )) -    done - -    echo $val1; +        ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l +} +function count_index_entries() +{ +    ls $1/.glusterfs/indices/xattrop | wc -l  } diff --git a/tests/bugs/bug-1015990-rep.t b/tests/bugs/bug-1015990-rep.t index bca0d7aff07..5a6564d9f7a 100755 --- a/tests/bugs/bug-1015990-rep.t +++ b/tests/bugs/bug-1015990-rep.t @@ -32,8 +32,6 @@ sleep 5  for  i in  {1..100}; do echo "STRING" > $M0/File$i; done -brick_2_sh_entries=$(count_sh_entries $B0/$V0"2") -brick_4_sh_entries=$(count_sh_entries $B0/$V0"4")  command_output=$(gluster volume heal $V0 statistics heal-count replica $H0:$B0/$V0"1") @@ -58,19 +56,9 @@ brick_2_entries_count=$count  xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2") -##Remove the count of the xattrop-gfid entry count as it does not contribute -##to the number of files to be healed -sub_val=1 -xattrop_count_brick_2=$(($xattrop_count_brick_2-$sub_val)) +EXPECT $brick_2_entries_count echo $xattrop_count_brick_2 -ret=0 -if [ "$xattrop_count_brick_2" -eq "$brick_2_entries_count" ] -        then -                ret=$(($ret + $sub_val)) -fi - -EXPECT "1" echo $ret  ## Finish up  TEST $CLI volume stop $V0;  EXPECT 'Stopped' volinfo_field $V0 'Status'; diff --git a/tests/bugs/bug-1015990.t b/tests/bugs/bug-1015990.t index 165af5168a0..43b7d6c585c 100755 --- a/tests/bugs/bug-1015990.t +++ b/tests/bugs/bug-1015990.t @@ -54,36 +54,10 @@ done <<< "$command_output"  brick_2_entries_count=$(($count-$value))  brick_4_entries_count=$value -  xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2") -##Remove the count of the xattrop-gfid entry count as it does not contribute -##to the number of files to be healed - -sub_val=1 -xattrop_count_brick_2=$(($xattrop_count_brick_2-$sub_val)) -  xattrop_count_brick_4=$(count_sh_entries $B0/$V0"4") -##Remove xattrop-gfid entry count - -xattrop_count_brick_4=$(($xattrop_count_brick_4-$sub_val)) - - -ret=0 -if [ "$xattrop_count_brick_2" -eq "$brick_2_entries_count" ] -        then -                ret=$(($ret + $sub_val)) -fi - -EXPECT "1" echo $ret - - -ret=0 -if [ "$xattrop_count_brick_4" -eq "$brick_4_entries_count" ] -        then -                ret=$(($ret + $sub_val)) -fi - -EXPECT "1" echo $ret +EXPECT $brick_2_entries_count echo $xattrop_count_brick_2 +EXPECT $brick_4_entries_count echo $xattrop_count_brick_4  ## Finish up  TEST $CLI volume stop $V0; diff --git a/tests/bugs/bug-1101647.t b/tests/bugs/bug-1101647.t index ccfa7e2138b..2337dac6feb 100644 --- a/tests/bugs/bug-1101647.t +++ b/tests/bugs/bug-1101647.t @@ -13,13 +13,13 @@ TEST $CLI volume start $V0;  TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0  EXPECT_WITHIN 20 "Y" glustershd_up_status -#Create base entry in indices/xattrop and indices/base_indices_holder +#Create base entry in indices/xattrop  echo "Data">$M0/file  TEST $CLI volume heal $V0 -#Entries from indices/xattrop and indices/base_indices_holder should not be cleared after a heal. -EXPECT 1 count_sh_entries  $B0/$V0"1" -EXPECT 1 count_sh_entries  $B0/$V0"2" +#Entries from indices/xattrop should not be cleared after a heal. +EXPECT 1 count_index_entries  $B0/$V0"1" +EXPECT 1 count_index_entries  $B0/$V0"2"  TEST kill_brick $V0 $H0 $B0/${V0}2  echo "More data">>$M0/file diff --git a/tests/bugs/bug-1190069-afr-stale-index-entries.t b/tests/bugs/bug-1190069-afr-stale-index-entries.t new file mode 100644 index 00000000000..8d2871e8270 --- /dev/null +++ b/tests/bugs/bug-1190069-afr-stale-index-entries.t @@ -0,0 +1,57 @@ +#!/bin/bash +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../afr.rc +cleanup; + +#Stale entries in xattrop folder for files which do not need heal must be removed during the next index heal crawl. + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}; +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0; +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST `echo hello>$M0/datafile` +TEST touch $M0/mdatafile + +#Create split-brain and reset the afr xattrs, so that we have only the entry inside xattrop folder. +#This is to simulate the case where the brick crashed just before pre-op happened, but index xlator created the entry inside xattrop folder. + +#Create data, metadata SB. +TEST kill_brick $V0 $H0 $B0/$V0"1" +TEST stat $M0/datafile +TEST `echo append>>$M0/datafile` +TEST chmod +x $M0/mdatafile +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1 +TEST kill_brick $V0 $H0 $B0/$V0"0" +TEST stat $M0/datafile +TEST `echo append>>$M0/datafile` +TEST chmod +x $M0/mdatafile +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST ! cat $M0/datafile + +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT '2' echo $(count_sh_entries $B0/$V0"0") +EXPECT_WITHIN $HEAL_TIMEOUT '2' echo $(count_sh_entries $B0/$V0"1") + +#Reset xattrs and trigger heal. +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/datafile +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/datafile + +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/mdatafile +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/mdatafile + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT '0' echo $(count_sh_entries $B0/$V0"0") +EXPECT_WITHIN $HEAL_TIMEOUT '0' echo $(count_sh_entries $B0/$V0"1") + +cleanup diff --git a/tests/bugs/bug-874498.t b/tests/bugs/bug-874498.t index 843698d2a61..19c09e78cad 100644 --- a/tests/bugs/bug-874498.t +++ b/tests/bugs/bug-874498.t @@ -56,7 +56,7 @@ TEST $CLI volume heal $V0  ##Expected number of entries are 0 in the .glusterfs/indices/xattrop directory -EXPECT_WITHIN $HEAL_TIMEOUT '1' count_sh_entries $FILEN; +EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $FILEN;  TEST $CLI volume stop $V0;  TEST $CLI volume delete $V0; diff --git a/tests/bugs/bug-957877.t b/tests/bugs/bug-957877.t index 52bbd62ec2a..90836679de9 100644 --- a/tests/bugs/bug-957877.t +++ b/tests/bugs/bug-957877.t @@ -23,7 +23,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1  TEST $CLI volume heal $V0  # Wait for self-heal to complete -EXPECT_WITHIN $HEAL_TIMEOUT '1' count_sh_entries $BRICK; +EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $BRICK;  TEST getfattr -n "user.foo" $B0/${V0}0/f1; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index c6b14e1def7..7fb6e2b9dc4 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1141,6 +1141,11 @@ afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid)  	if (ret)  		goto out; +        if (!(data_selfheal || metadata_selfheal || entry_selfheal)) { +                ret = 2; +                goto out; +        } +  	if (data_selfheal)                  data_ret = afr_selfheal_data (frame, this, inode); @@ -1169,9 +1174,12 @@ out:          return ret;  }  /* - * This is the entry point for healing a given GFID - * The function returns 0 if self-heal was successful, appropriate errno - * in case of a failure and 1 in case self-heal was never needed on the gfid. + * This is the entry point for healing a given GFID. The return values for this + * function are as follows: + * '0' if the self-heal is successful + * '1' if the afr-xattrs are non-zero (due to on-going IO) and no heal is needed + * '2' if the afr-xattrs are all-zero and no heal is needed + * $errno if the heal on the gfid failed.   */  int diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 992ed28b994..7c235aca429 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -313,6 +313,53 @@ afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)  	return ret;  } +void +afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid) +{ + +        call_frame_t *frame = NULL; +        inode_t *inode = NULL; +        afr_private_t *priv = NULL; +        dict_t  *xattr = NULL; +        int ret = 0; +        int i = 0; +        int raw[AFR_NUM_CHANGE_LOGS] = {0}; + +        priv = this->private; +        frame = afr_frame_create (this); +        if (!frame) +                goto out; +        inode = afr_inode_find (this, gfid); +        if (!inode) +                goto out; +        xattr = dict_new(); +        if (!xattr) +                goto out; +        ret = dict_set_static_bin (xattr, AFR_DIRTY, raw, +                                   sizeof(int) * AFR_NUM_CHANGE_LOGS); +        if (ret) +                goto out; +        for (i = 0; i < priv->child_count; i++) { +                ret = dict_set_static_bin (xattr, priv->pending_key[i], raw, +                                           sizeof(int) * AFR_NUM_CHANGE_LOGS); +                if (ret) +                        goto out; +        } + +        /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or +        * has valid repies for this gfid seems a bit of an overkill.*/ +        for (i = 0; i < priv->child_count; i++) +                afr_selfheal_post_op (frame, this, inode, i, xattr); + +out: +        if (frame) +                AFR_STACK_DESTROY (frame); +        if (inode) +                inode_unref (inode); +        if (xattr) +                dict_unref (xattr); +        return; +}  int  afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent, @@ -480,6 +527,14 @@ afr_shd_index_sweep (struct subvol_healer *healer)  				afr_shd_index_purge (subvol, fd->inode,  						     entry->d_name);  			} +                        if (ret == 2) +                        /* If bricks crashed in pre-op after creating +                         * indices/xattrop link but before setting afr +                         * changelogs, we end up with stale xattrop links but +                         * zero changelogs. Remove such entries by sending a +                         * post-op with zero changelogs. +                         */ +                                afr_shd_zero_xattrop (healer->this, gfid);                          ret = 0;  		}  | 
