diff options
| -rw-r--r-- | tests/bugs/replicate/bug-1637249-gfid-heal.t | 149 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 52 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 46 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 2 | 
5 files changed, 204 insertions, 51 deletions
diff --git a/tests/bugs/replicate/bug-1637249-gfid-heal.t b/tests/bugs/replicate/bug-1637249-gfid-heal.t new file mode 100644 index 00000000000..e824f14531e --- /dev/null +++ b/tests/bugs/replicate/bug-1637249-gfid-heal.t @@ -0,0 +1,149 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}; +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume start $V0; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +############################################################################### + +# Test for gfid + name heal when there is no 'source' brick, i.e. parent dir +# xattrs are in split-brain or have dirty xattrs. + +TEST mkdir $M0/dir_pending +TEST dd if=/dev/urandom of=$M0/dir_pending/file1 bs=1024 count=1024 +TEST mkdir $M0/dir_pending/dir11 +TEST mkdir $M0/dir_dirty +TEST touch $M0/dir_dirty/file2 + +# Set pending entry xattrs on dir_pending and remove gfid of entries under it on one brick. +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir_pending +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/dir_pending + +gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file1) +gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1) +TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/file1 +TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 + +gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/dir11) +gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11) +TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/dir11 +TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11 + + +# Set dirty entry xattrs on dir_dirty and remove gfid of entries under it on one brick. +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dir_dirty +gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir_dirty/file2) +gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2) +TEST setfattr -x trusted.gfid $B0/${V0}1/dir_dirty/file2 +TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 + +# Create a file under dir_pending directly on the backend only on 1 brick +TEST touch $B0/${V0}1/dir_pending/file3 + +# Create a file under dir_pending directly on the backend on all bricks +TEST touch $B0/${V0}0/dir_pending/file4 +TEST touch $B0/${V0}1/dir_pending/file4 + +# Stop  & start the volume and mount client again. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume stop  $V0 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST stat $M0/dir_pending/file1 +EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file1 +TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 + +TEST stat $M0/dir_pending/dir11 +EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_pending/dir11 +TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11 + + +TEST stat $M0/dir_dirty/file2 +EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir_dirty/file2 +TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 + +TEST stat $M0/dir_pending/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick. +gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_pending/file3) +TEST [ ! -z "$gfid_f3" ] +EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_pending/file3 + +TEST stat $M0/dir_pending/file4 +gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file4) +TEST [ ! -z "$gfid_f4" ] +EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file4 +############################################################################### + +# Test for gfid + name heal when all bricks are 'source', i.e. parent dir +# does not have any pending or dirty xattrs. + +TEST mkdir $M0/dir_clean +TEST dd if=/dev/urandom of=$M0/dir_clean/file1 bs=1024 count=1024 +TEST mkdir $M0/dir_clean/dir11 + +gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file1) +gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1) +TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/file1 +TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 + +gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/dir11) +gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11) +TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/dir11 +TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11 + +# Create a file under dir_clean directly on the backend only on 1 brick +TEST touch $B0/${V0}1/dir_clean/file3 + +# Create a file under dir_clean directly on the backend on all bricks +TEST touch $B0/${V0}0/dir_clean/file4 +TEST touch $B0/${V0}1/dir_clean/file4 + +# Stop  & start the volume and mount client again. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume stop  $V0 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST stat $M0/dir_clean/file1 +EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file1 +TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 + +TEST stat $M0/dir_clean/dir11 +EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_clean/dir11 +TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11 + +TEST stat $M0/dir_clean/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick. +gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_clean/file3) +TEST [ ! -z "$gfid_f3" ] +EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_clean/file3 + +TEST stat $M0/dir_clean/file4 +gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file4) +TEST [ ! -z "$gfid_f4" ] +EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file4 +############################################################################### + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 44d724112a9..a05c90f1b33 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -22,7 +22,8 @@ afr_heal_synctask (xlator_t *this, afr_local_t *local);  int  afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,                            inode_t *inode, struct afr_reply *replies, -                          int source,  unsigned char *sources, void *gfid) +                          int source,  unsigned char *sources, void *gfid, +                          int *gfid_idx)  {          afr_private_t *priv = NULL;          call_frame_t   *frame    = NULL; @@ -36,15 +37,30 @@ afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,          priv = this->private;          wind_on = alloca0 (priv->child_count); -        ia_type = replies[source].poststat.ia_type; -        if ((ia_type == IA_INVAL) && -            (AFR_COUNT(sources, priv->child_count) == priv->child_count)) { -                /* If a file is present on some bricks of the replica but parent -                 * dir does not have pending xattrs, all bricks are sources and -                 * the 'source' we  selected earlier might be one where the file -                 * is not actually present. Hence check if file is present in -                 * any of the sources.*/ -                for (i = 0; i < priv->child_count; i++) { +        if (source >= 0 && replies[source].valid && replies[source].op_ret == 0) +                ia_type = replies[source].poststat.ia_type; + +        if (ia_type != IA_INVAL) +                goto heal; + +        /* If ia_type is still invalid, it means either +         * (a)'source' was -1, i.e. parent dir pending xattrs are in split-brain +         * (or) (b) The parent dir pending xattrs are all zeroes (i.e. all bricks +         * are sources) and the 'source' we selected earlier might be the one where +         * the file is not actually present. +         * +         * In both cases, let us pick a brick with a successful reply and use its +         * ia_type. +         */ +        for (i = 0; i < priv->child_count; i++) { +                if (source == -1) { +                        /* case (a) above. */ +                        if (replies[i].valid && replies[i].op_ret == 0) { +                                ia_type = replies[i].poststat.ia_type; +                                break; +                        } +                } else { +                        /* case (b) above. */                          if (i == source)                                  continue;                          if (sources[i] && replies[i].valid && @@ -55,6 +71,7 @@ afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,                  }          } +heal:          /* gfid heal on those subvolumes that do not have gfid associated           * with the inode and update those replies.           */ @@ -104,7 +121,22 @@ afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,                  afr_reply_wipe (&replies[i]);                  afr_reply_copy (&replies[i], &local->replies[i]);          } +        if (gfid_idx && (*gfid_idx == -1)) { +                /*Pick a brick where the gifd heal was successful.*/ +                for (i = 0; i < priv->child_count; i++) { +                        if (!wind_on[i]) +                                continue; +                        if (replies[i].valid && replies[i].op_ret == 0 && +                            !gf_uuid_is_null(replies[i].poststat.ia_gfid)) { +                                *gfid_idx = i; +                                break; +                        } +                } +        }  out: +        if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) { +                ret = -afr_final_errno(local, priv); +        }          loc_wipe (&loc);          if (frame)                  AFR_STACK_DESTROY (frame); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 8514f0df595..cb03a3f9717 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -188,7 +188,8 @@ __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,          if (replies[source].op_ret == 0) {                  ret = afr_lookup_and_heal_gfid (this, fd->inode, name,                                                  inode, replies, source, sources, -                                             &replies[source].poststat.ia_gfid); +                                              &replies[source].poststat.ia_gfid, +                                              NULL);                  if (ret)                          return ret;          } @@ -321,7 +322,8 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,          ret = afr_lookup_and_heal_gfid (this, fd->inode, name, inode, replies,                                          source, sources, -                                        &replies[source].poststat.ia_gfid); +                                        &replies[source].poststat.ia_gfid, +                                        NULL);          if (ret)                  return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 0a5be29d5ee..869bdbd49c6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -19,7 +19,8 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid,                              const char *bname, inode_t *inode,                              struct afr_reply *replies, void *gfid,                              unsigned char *locked_on, int source, -                            unsigned char *sources, gf_boolean_t is_gfid_absent) +                            unsigned char *sources, gf_boolean_t is_gfid_absent, +                            int *gfid_idx)  {  	int             ret          = 0;          int             up_count     = 0; @@ -48,7 +49,7 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid,          }          afr_lookup_and_heal_gfid (this, parent, bname, inode, replies, source, -                                  sources, gfid); +                                  sources, gfid, gfid_idx);  out:  	return ret; @@ -152,35 +153,6 @@ __afr_selfheal_name_expunge (xlator_t *this, inode_t *parent, uuid_t pargfid,  } -/* This function is to be called after ensuring that there is no gfid mismatch - * for the inode across multiple sources - */ -static int -afr_selfheal_gfid_idx_get (xlator_t *this, struct afr_reply *replies, -                           unsigned char *sources) -{ -        int             i        =  0; -        int             gfid_idx = -1; -        afr_private_t  *priv     =  NULL; - -        priv = this->private; - -        for (i = 0; i < priv->child_count; i++) { -                if (!replies[i].valid || replies[i].op_ret != 0) -                        continue; - -                if (!sources[i]) -                        continue; - -                if (gf_uuid_is_null (replies[i].poststat.ia_gfid)) -                        continue; - -                gfid_idx = i; -                break; -        } -        return gfid_idx; -} -  static gf_boolean_t  afr_selfheal_name_need_heal_check (xlator_t *this, struct afr_reply *replies)  { @@ -421,21 +393,19 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,                  gfid = gfid_req;          } else {                  gfid = &replies[gfid_idx].poststat.ia_gfid; +                if (source == -1) +                        /* Either entry split-brain or dirty xattrs are +                        * present on parent.*/ +                        source = gfid_idx;          }          is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;  	ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode,                                            replies, gfid, locked_on, source, -                                          sources, is_gfid_absent); +                                          sources, is_gfid_absent, &gfid_idx);          if (ret)                  return ret; -        if (gfid_idx == -1) { -                gfid_idx = afr_selfheal_gfid_idx_get (this, replies, sources); -                if (gfid_idx == -1) -                        return -1; -        } -  	ret = __afr_selfheal_name_impunge (frame, this, parent, pargfid,                                             bname, inode, replies, gfid_idx);          if (ret == -EIO) diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index bac5e51341e..d7491e1522d 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -113,7 +113,7 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode);  int  afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,                            inode_t *inode, struct afr_reply *replies, int source, -                          unsigned char *sources, void *gfid); +                          unsigned char *sources, void *gfid, int *gfid_idx);  int  afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,  | 
