diff options
| author | Richard Wareing <rwareing@fb.com> | 2015-12-10 22:27:27 -0800 | 
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-08-01 11:48:43 +0000 | 
| commit | 72d9e7144855b56e23c7a1f160eb5bb956287eaa (patch) | |
| tree | eec3ddd56d78a1e5662e760e518d5f3e51c18963 | |
| parent | eac58993014a23276adf036da91f14efbfe6c84f (diff) | |
cluster/afr: Fix case in PGFID healing where NOOP was not being honored
Summary:
- PGFID healing should not be triggered in the case where there is
  nothing to do (ret = 2).  Instead this return code should be returned
  to the heal daemon to trigger the reap of the entry.
- Reworked shd-pgfid-heal.t to queue up heal naturally instead of
  synthetically
Test Plan: - Run tests/basic/afr/shd-pgfid-heal.t
Differential Revision: https://phabricator.fb.com/D2748578
Change-Id: I74300de2b4dce23867f4111548de35f58bf77453
Signed-off-by: Jeff Darcy <jdarcy@fb.com>
Reviewed-on: https://review.gluster.org/17936
Smoke: Gluster Build System <jenkins@build.gluster.org>
Tested-by: Jeff Darcy <jeff@pl.atyp.us>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
| -rw-r--r-- | tests/basic/afr/shd-pgfid-heal.t | 8 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 9 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 9 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 | 
5 files changed, 27 insertions, 7 deletions
diff --git a/tests/basic/afr/shd-pgfid-heal.t b/tests/basic/afr/shd-pgfid-heal.t index d12d29e13ba..6213e4c6374 100644 --- a/tests/basic/afr/shd-pgfid-heal.t +++ b/tests/basic/afr/shd-pgfid-heal.t @@ -17,6 +17,7 @@ TEST $CLI volume set $V0 nfs.disable on  TEST $CLI volume set $V0 cluster.quorum-type none  #EST $CLI volume set $V0 cluster.favorite-child-by-majority on  #EST $CLI volume set $V0 cluster.favorite-child-by-mtime on +TEST $CLI volume set $V0 cluster.pgfid-self-heal on  TEST $CLI volume set $V0 cluster.favorite-child-policy majority  TEST $CLI volume set $V0 storage.build-pgfid on  TEST $CLI volume set $V0 cluster.metadata-self-heal off @@ -31,13 +32,15 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \  cd $M0  mkdir -p a/b/c  dd if=/dev/urandom of=a/b/c/testfile bs=128k count=5 2>/dev/null -MD5=$(md5sum a/b/c/testfile | cut -d\  -f1)  # Kill the SHD while we setup the test  pkill -f gluster/glustershd  # Kill the brick as well such that   TEST kill_brick $V0 $H0 $B0/${V0}1 +echo stuff >> $M0/a/b/c/testfile +MD5=$(md5sum a/b/c/testfile | cut -d\  -f1) +  # Grab the GFID of the file and parent dir  GFID_PARENT_B_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/a/b 2>/dev/null | grep trusted.gfid | cut -d= -f2)  GFID_PARENT_B_FORMATTED=$(echo "$GFID_PARENT_B_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}') @@ -67,9 +70,6 @@ rm -f $GFID_PARENT_C_LINK_B1  rmdir $B0/${V0}1/a/b  rm -f $GFID_PARENT_B_LINK_B1 -# Now manually queue up the parent directory for healing -touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_FORMATTED -  # Kick off the SHD and wait 30 seconds for healing to take place  TEST gluster vol start patchy force  EXPECT_WITHIN 30 "0" get_pending_heal_count $V0 diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 98ad65f29fd..94ecf454651 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1310,7 +1310,6 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,                          }                  }          } -          if (ret < 0) {                  status = "Failed";                  loglevel = GF_LOG_DEBUG; @@ -2193,6 +2192,9 @@ afr_selfheal (xlator_t *this, uuid_t gfid)          char *ancestry_path = "Unknown";          char *pgfid_str = NULL;          char *gfid_str = NULL; +        afr_private_t *priv = NULL; + +        priv = this->private;  heal_gfid:   	frame = afr_frame_create (this); @@ -2205,8 +2207,9 @@ heal_gfid:          ret = afr_selfheal_do (frame, this, gfid); -        if (tried_parent == _gf_false && ret && -                        !gf_uuid_is_null (local->heal_pgfid)) { +        if (priv->pgfid_self_heal == _gf_true && +            tried_parent == _gf_false && (ret != 0 || ret != 2) && +            !gf_uuid_is_null (local->heal_pgfid)) {                  tried_parent = _gf_true;                  pgfid_str = alloca (strlen (UUID0_STR) + 1);                  gfid_str = alloca (strlen (UUID0_STR) + 1); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 31949bdcea4..fc5fda6844f 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -164,6 +164,9 @@ reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,                            bool, out); +        GF_OPTION_RECONF ("pgfid-self-heal", priv->pgfid_self_heal, +                          options, bool, out); +          GF_OPTION_RECONF ("data-self-heal-window-size",                            priv->data_self_heal_window_size, options,                            uint32, out); @@ -421,6 +424,8 @@ init (xlator_t *this)          GF_OPTION_INIT ("shd-wait-qlength", priv->shd.wait_qlength,                           uint32, out); +        GF_OPTION_INIT ("pgfid-self-heal", priv->pgfid_self_heal, bool, out); +          GF_OPTION_INIT ("background-self-heal-count",                          priv->background_self_heal_count, uint32, out); @@ -1102,5 +1107,9 @@ struct volume_options options[] = {                           " with identical mtime and size in more than half the "                           "number of bricks in the replica.",          }, +        { .key  = {"pgfid-self-heal"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +        },          { .key  = {NULL} },  }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index abd2f470131..1d6a0dc5157 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -98,6 +98,7 @@ typedef struct _afr_private {          unsigned char *child_up;          int64_t *child_latency; +        gf_boolean_t pgfid_self_heal;          unsigned char *local;          char **pending_key; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 20697aff6d4..d29f32d1963 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1313,6 +1313,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .op_version = GD_OP_VERSION_3_7_12,            .flags      = OPT_FLAG_CLIENT_OPT          }, +        { .key         = "cluster.pgfid-self-heal", +          .voltype     = "cluster/replicate", +          .op_version  = 2, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .description = "Use PGFID attribute if available to remediate " +                         "failed heals." +        },          /* stripe xlator options */          { .key         = "cluster.stripe-block-size",  | 
