diff options
| author | karthik-us <ksubrahm@redhat.com> | 2017-06-07 15:56:13 +0530 | 
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-07-18 15:24:54 +0000 | 
| commit | 657d78dbad118e511e1fca8b1badb9f8ae7a6f60 (patch) | |
| tree | 5c2001d6dab7536d8ffeccb1dafc6e5585d7d07c | |
| parent | ae14513eb82929662b11e4c304877030a7d685cd (diff) | |
cluster/afr: GFID split-brain resolution with existing CLI
Problem:
Currently there is no way for the admin from CLI to resolve gfid
split-brain based on some policy like choice of the brick, mtime
or size.
Fix:
With the existing CLI options based on size, mtime, and choice of
brick, we do lookup on the parent for the specified file. As
part of the lookup, if we find gfid mismatch, we resolve them
based on the policy and return. If the file is not in gfid split-
brain, then we check for the data and metadata split-brain in the
getxattr code path, and resolve if any.
This will work provided absolute path to the file with the CLI
and not with gfid of the file. Hence the source-brick policy
without any file path will also not resolve the gfid split-brain
since it uses the gfid of the files. But it can resolve any other
type of split-brains and skip the gfid mismatch resolution with
the usual error message.
Reverting the change https://review.gluster.org/17290. This patch
resolves the issue.
Fixes gluster/glusterfs#135
Change-Id: Iaeba6fc32f184a34255d03be87cda02773130a09
BUG: 1459530
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://review.gluster.org/17485
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
| -rw-r--r-- | heal/src/glfs-heal.c | 114 | ||||
| -rw-r--r-- | tests/basic/afr/gfid-mismatch-resolution-with-cli.t | 168 | ||||
| -rw-r--r-- | tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 18 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 250 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 107 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 65 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 8 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 2 | 
9 files changed, 584 insertions, 151 deletions
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index fb997948f57..27115f3ca6c 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -21,6 +21,7 @@  #include <string.h>  #include <time.h>  #include "glusterfs.h" +#include <libgen.h>  #if (HAVE_LIB_XML)  #include <libxml/encoding.h> @@ -1031,26 +1032,43 @@ _validate_directory (dict_t *xattr_req, char *file)  int  glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, -                           char *file, dict_t *xattr_req) +                            char *file, dict_t *xattr_req)  { -        int          ret        = -1; -        int          reval      = 0; -        loc_t        loc        = {0, }; -        char        *path       = NULL; -        char        *filename   = NULL; -        struct iatt  iatt       = {0, }; -        xlator_t    *xl         = top_subvol; -        dict_t      *xattr_rsp  = NULL; -        char        *sh_fail_msg = NULL; -        int32_t      op_errno   = 0; +        int          ret           = -1; +        int          reval         = 0; +        loc_t        loc           = {0, }; +        char        *path          = NULL; +        char        *path1         = NULL; +        char        *path2         = NULL; +        char        *filename      = NULL; +        char        *filename1     = NULL; +        struct iatt  iatt          = {0, }; +        xlator_t    *xl            = top_subvol; +        dict_t      *xattr_rsp     = NULL; +        char        *sh_fail_msg   = NULL; +        char        *gfid_heal_msg = NULL; +        int32_t      op_errno      = 0; +        gf_boolean_t flag          = _gf_false;          if (!strncmp (file, "gfid:", 5)) {                  filename = gf_strdup(file); +                if (!filename) { +                        printf ("Error allocating memory to filename\n"); +                        goto out; +                }                  path = strtok (filename, ":");                  path = strtok (NULL, ";");                  gf_uuid_parse (path, loc.gfid);                  loc.path = gf_strdup (uuid_utoa (loc.gfid)); +                if (!loc.path) { +                        printf ("Error allocating memory to path\n"); +                        goto out; +                }                  loc.inode = inode_new (rootloc->inode->table); +                if (!loc.inode) { +                        printf ("Error getting inode\n"); +                        goto out; +                }                  ret = syncop_lookup (xl, &loc, &iatt, 0, xattr_req, &xattr_rsp);                  if (ret) {                          op_errno = -ret; @@ -1065,9 +1083,72 @@ glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,                          ret = -1;                          goto out;                  } -retry: +                path1 = gf_strdup (file); +                if (!path1) { +                        printf ("Error allocating memory to path\n"); +                        ret = -1; +                        goto out; +                } +                path2 = gf_strdup (file); +                if (!path2) { +                        printf ("Error allocating memory to path\n"); +                        ret = -1; +                        goto out; +                } +                path = dirname (path1); +                filename1 = basename (path2); +retry1: +                ret = glfs_resolve (fs, xl, path, &loc, &iatt, reval); +                ESTALE_RETRY (ret, errno, reval, &loc, retry1); +                if (ret) { +                        printf("Lookup failed on %s:%s\n", +                               path, strerror (errno)); +                        goto out; +                } +                GF_FREE ((char *)loc.path); +                loc.path = gf_strdup (file); +                if (!loc.path) { +                        printf ("Error allocating memory for path\n"); +                        ret = -1; +                        goto out; +                } +                loc.parent = inode_unref (loc.parent); +                loc.parent = inode_ref (loc.inode); +                loc.inode = inode_unref (loc.inode); +                loc.inode = inode_new (rootloc->inode->table); +                if (!loc.inode) { +                        printf ("Error getting inode\n"); +                        ret = -1; +                        goto out; +                } +                loc.name = filename1; +                gf_uuid_copy (loc.pargfid, loc.gfid); +                gf_uuid_clear (loc.gfid); + +                ret = syncop_lookup (xl, &loc, &iatt, 0, xattr_req, &xattr_rsp); +                if (ret) { +                        op_errno = -ret; +                        printf ("Lookup failed on %s:%s.\n", file, +                                strerror(op_errno)); +                        flag = _gf_true; +                } + +                ret = dict_get_str (xattr_rsp, "gfid-heal-msg", &gfid_heal_msg); +                if (!ret) { +                        printf ("%s for file %s\n", gfid_heal_msg, file); +                        loc_wipe (&loc); +                        goto out; +                } +                if (flag) +                        goto out; + +                reval = 0; +                loc_wipe (&loc); +                memset (&iatt, 0, sizeof(iatt)); + +retry2:                  ret = glfs_resolve (fs, xl, file, &loc, &iatt, reval); -                ESTALE_RETRY (ret, errno, reval, &loc, retry); +                ESTALE_RETRY (ret, errno, reval, &loc, retry2);                  if (ret) {                          printf("Lookup failed on %s:%s\n",                                 file, strerror (errno)); @@ -1098,6 +1179,13 @@ retry:  out:          if (xattr_rsp)                  dict_unref (xattr_rsp); +        if (path1) +                GF_FREE (path1); +        if (path2) +                GF_FREE (path2); +        if (filename) +                GF_FREE (filename); +        loc_wipe (&loc);          return ret;  } diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-cli.t b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t new file mode 100644 index 00000000000..b739ddc49cc --- /dev/null +++ b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t @@ -0,0 +1,168 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +cd $M0 + +##### Healing from latest mtime ###### + +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Sink based on mtime" > f1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Source based on mtime" > f1 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#We know that first brick has the latest mtime +LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain latest-mtime /f1 + +#gfid split-brain should be resolved +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" == "$gfid_1" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5=$(md5sum $B0/${V0}1/f1 | awk '{print $1}') +TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ] + + +##### Healing from bigger file ###### + +TEST mkdir test +TEST $CLI volume set $V0 self-heal-daemon off +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Bigger file" > test/f2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Small file" > test/f2 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f2) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#We know that second brick has the bigger file +BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/test/f2 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain bigger-file /test/f2 + +#gfid split-brain should be resolved +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2) +TEST [ "$gfid_0" == "$gfid_1" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5=$(md5sum $B0/${V0}0/test/f2 | awk '{print $1}') +TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ] + + +#Add one more brick, and heal. +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + +##### Healing from source brick ###### + +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.quorum-type none +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "We will consider these as sinks" > test/f3 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +echo "We will take this as source" > test/f3 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f3) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3) +TEST [ "$gfid_0" != "$gfid_1" ] +TEST [ "$gfid_1" == "$gfid_2" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +#We will try to heal the split-brain with bigger file option. +#It should fail, since we have same file size in bricks 1 & 2. +EXPECT "No bigger file for file /test/f3" $CLI volume heal $V0 split-brain bigger-file /test/f3 + +#Now heal from taking the brick 0 as the source +SOURCE_MD5=$(md5sum $B0/${V0}0/test/f3 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}0 /test/f3 + +#gfid split-brain should be resolved +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3) +TEST [ "$gfid_0" == "$gfid_1" ] +TEST [ "$gfid_0" == "$gfid_2" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5_1=$(md5sum $B0/${V0}1/test/f3 | awk '{print $1}') +HEALED_MD5_2=$(md5sum $B0/${V0}2/test/f3 | awk '{print $1}') +TEST [ "$SOURCE_MD5" == "$HEALED_MD5_1" ] +TEST [ "$SOURCE_MD5" == "$HEALED_MD5_2" ] + +cd - +cleanup; diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t index 9f1347bbb44..2f14f838e49 100644 --- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t +++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t @@ -226,6 +226,3 @@ HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\  -f1)  TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]  cleanup; - -#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1450730 -#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1450730 diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 064320441b7..cba18b2ff8f 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2114,6 +2114,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)  	int                 op_errno = 0;  	int                 read_subvol = 0;          int                 par_read_subvol = 0; +        int                 ret         = -1;  	unsigned char      *readable = NULL;  	int                 event = 0;  	struct afr_reply   *replies = NULL; @@ -2124,6 +2125,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)          int                 spb_choice = -1;          ia_type_t           ia_type = IA_INVAL;          afr_read_subvol_args_t args = {0,}; +        char               *gfid_heal_msg = NULL;          priv  = this->private;          local = frame->local; @@ -2258,6 +2260,19 @@ unwind:                          local->op_errno = ENOTCONN;          } +        ret = dict_get_str (local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); +        if (!ret) { +                ret = dict_set_str (local->replies[read_subvol].xdata, +                                    "gfid-heal-msg", gfid_heal_msg); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_DICT_SET_FAILED, +                                "Error setting gfid-heal-msg dict"); +                        local->op_ret = -1; +                        local->op_errno = ENOMEM; +                } +        } +  	AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,  			  local->inode, &local->replies[read_subvol].poststat,  			  local->replies[read_subvol].xdata, @@ -2520,7 +2535,7 @@ afr_lookup_selfheal_wrap (void *opaque)          loc_pargfid (&local->loc, pargfid);  	ret = afr_selfheal_name (frame->this, pargfid, local->loc.name, -                                 &local->cont.lookup.gfid_req); +                                 &local->cont.lookup.gfid_req, local->xattr_req);          if (ret == -EIO)                  goto unwind; @@ -2581,6 +2596,7 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)  	}  	if (need_heal) { +  		heal = copy_frame (frame);  		if (heal)  			heal->root->pid = GF_CLIENT_PID_SELF_HEALD; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 6b5e50d6c56..9ecd63ce10c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -20,6 +20,256 @@ void  afr_heal_synctask (xlator_t *this, afr_local_t *local);  int +afr_gfid_sbrain_source_from_src_brick (xlator_t *this, +                                       struct afr_reply *replies, +                                       char *src_brick) +{ +        int             i        = 0; +        afr_private_t  *priv     = NULL; + +        priv = this->private; +        for (i = 0; i < priv->child_count; i++) { +                if (!replies[i].valid || replies[i].op_ret == -1) +                        continue; +                if (strcmp (priv->children[i]->name, src_brick) == 0) +                        return i; +        } +        return -1; +} + +int +afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies, +                                        int child_count) +{ +        int             j                  = 0; +        int             i                  = 0; +        int             src                = -1; +        int             votes[child_count]; + +        for (i = 0; i < child_count; i++) { +                if (!replies[i].valid || replies[i].op_ret == -1) +                        continue; + +                votes[i] = 1; +                for (j = i+1; j < child_count; j++) { +                        if ((!gf_uuid_compare (replies[i].poststat.ia_gfid, +                            replies[j].poststat.ia_gfid))) +                                votes[i]++; +                        if (votes[i] > child_count / 2) { +                                src = i; +                                goto out; +                        } +                } +        } + +out: +        return src; +} + +int afr_gfid_sbrain_source_from_bigger_file (struct afr_reply *replies, +                                             int child_count) +{ +        int       i       = 0; +        int       src     = -1; +        uint64_t  size    = 0; + +        for (i = 0; i < child_count; i++) { +                if (!replies[i].valid || replies[i].op_ret == -1) +                        continue; +                if (size < replies[i].poststat.ia_size) { +                        src = i; +                        size = replies[i].poststat.ia_size; +                } else if (replies[i].poststat.ia_size == size) { +                        src = -1; +                } +        } +        return src; +} + +int afr_gfid_sbrain_source_from_latest_mtime (struct afr_reply *replies, +                                              int child_count) +{ +        int       i             = 0; +        int       src           = -1; +        uint32_t  mtime         = 0; +        uint32_t  mtime_nsec    = 0; + +        for (i = 0; i < child_count; i++) { +                if (!replies[i].valid || replies[i].op_ret != 0) +                        continue; +                if ((mtime < replies[i].poststat.ia_mtime) || +                    ((mtime == replies[i].poststat.ia_mtime) && +                     (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) { +                        src = i; +                        mtime = replies[i].poststat.ia_mtime; +                        mtime_nsec = replies[i].poststat.ia_mtime_nsec; +                } else if ((mtime == replies[i].poststat.ia_mtime) && +                           (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) { +                        src = -1; +                } +        } +        return src; +} + +int +afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, +                             inode_t *inode, uuid_t pargfid, const char *bname, +                             int src_idx, int child_idx, +                             unsigned char *locked_on, int *src, dict_t *xdata) +{ +        afr_private_t   *priv      = NULL; +        char             g1[64]    = {0,}; +        char             g2[64]    = {0,}; +        int              up_count  = 0; +        int              heal_op   = -1; +        int              ret       = -1; +        char            *src_brick = NULL; + +        *src = -1; +        priv = this->private; +        up_count = AFR_COUNT (locked_on, priv->child_count); +        if (up_count != priv->child_count) { +                gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, +                        "All the bricks should be up to resolve the gfid split " +                        "barin"); +                if (xdata) { +                        ret = dict_set_str (xdata, "gfid-heal-msg", "All the " +                                            "bricks should be up to resolve the" +                                            " gfid split barin"); +                        if (ret) +                                gf_msg (this->name, GF_LOG_ERROR, 0, +                                        AFR_MSG_DICT_SET_FAILED, "Error setting" +                                        " gfid-heal-msg dict"); +                } +                goto out; +        } + +        if (xdata) { +                ret = dict_get_int32 (xdata, "heal-op", &heal_op); +                if (ret) +                        goto fav_child; +        } else { +                goto fav_child; +        } + +        switch (heal_op) { +        case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: +                *src = afr_gfid_sbrain_source_from_bigger_file (replies, +                                                                priv->child_count); +                if (*src == -1) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_SPLIT_BRAIN, "No bigger file"); +                        if (xdata) { +                                ret = dict_set_str (xdata, "gfid-heal-msg", +                                                    "No bigger file"); +                                if (ret) +                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                                AFR_MSG_DICT_SET_FAILED, "Error" +                                                " setting gfid-heal-msg dict"); +                        } +                } +                break; + +        case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME: +                *src = afr_gfid_sbrain_source_from_latest_mtime (replies, +                                                                 priv->child_count); +                if (*src == -1) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_SPLIT_BRAIN, "No difference in mtime"); +                        if (xdata) { +                                ret = dict_set_str (xdata, "gfid-heal-msg", +                                                    "No difference in mtime"); +                                if (ret) +                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                                AFR_MSG_DICT_SET_FAILED, "Error" +                                                "setting gfid-heal-msg dict"); +                        } +                } +                break; + +        case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: +                ret = dict_get_str (xdata, "child-name", &src_brick); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_SPLIT_BRAIN, "Error getting the source " +                                "brick"); +                        break; +                } +                *src = afr_gfid_sbrain_source_from_src_brick (this, replies, +                                                              src_brick); +                if (*src == -1) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_SPLIT_BRAIN, "Error getting the source " +                                "brick"); +                        if (xdata) { +                                ret = dict_set_str (xdata, "gfid-heal-msg", +                                                    "Error getting the source " +                                                    "brick"); +                                if (ret) +                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                                AFR_MSG_DICT_SET_FAILED, "Error" +                                                " setting gfid-heal-msg dict"); +                        } +                } +                break; + +        default: +                break; +        } +        goto out; + +fav_child: +        switch (priv->fav_child_policy) { +        case AFR_FAV_CHILD_BY_SIZE: +                *src = afr_sh_fav_by_size (this, replies, inode); +                break; +        case AFR_FAV_CHILD_BY_MTIME: +                *src = afr_sh_fav_by_mtime (this, replies, inode); +                break; +        case AFR_FAV_CHILD_BY_CTIME: +                *src = afr_sh_fav_by_ctime(this, replies, inode); +                break; +        case AFR_FAV_CHILD_BY_MAJORITY: +                if (priv->child_count != 2) +                        *src = afr_selfheal_gfid_mismatch_by_majority (replies, +                                                                       priv->child_count); +                else +                        *src = -1; + +                if (*src == -1) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_SPLIT_BRAIN, "No majority to resolve " +                                "gfid split brain"); +                } +                break; +        default: +                break; +        } + +out: +        if (*src == -1) { +                gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, +                        "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and" +                        " %s on %s.", uuid_utoa (pargfid), bname, +                        uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), +                        priv->children[child_idx]->name, +                        uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), +                        priv->children[src_idx]->name); +                gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file=" +                          "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;" +                          "child-%d=%s;gfid-%d=%s", this->name, +                          uuid_utoa (pargfid), bname, child_idx, +                          priv->children[child_idx]->name, child_idx, +                          uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), +                          src_idx, priv->children[src_idx]->name, src_idx, +                          uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); +                return -1; +        } +        return 0; +} + + +int  afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  			  int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)  { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 82ae6432d7d..d7e9e60a7bf 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -17,105 +17,6 @@  #include "syncop-utils.h"  #include "events.h" -int -afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies, -                                        int child_count) -{ -        int             j                  = 0; -        int             i                  = 0; -        int             src                = -1; -        int             votes[child_count]; - -        for (i = 0; i < child_count; i++) { -                if (!replies[i].valid || replies[i].op_ret == -1) -                        continue; - -                votes[i] = 1; -                for (j = i+1; j < child_count; j++) { -                        if ((!gf_uuid_compare (replies[i].poststat.ia_gfid, -                            replies[j].poststat.ia_gfid))) -                                votes[i]++; -                        if (votes[i] > child_count / 2) { -                                src = i; -                                goto out; -                        } -                } -        } - -out: -        return src; -} - -int -afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, -                             inode_t *inode, uuid_t pargfid, char *bname, -                             int src_idx, int child_idx, -                             unsigned char *locked_on, int *src) -{ -        afr_private_t   *priv     = NULL; -        char             g1[64]   = {0,}; -        char             g2[64]   = {0,}; -        int              up_count = 0; - -        priv = this->private; -        up_count = AFR_COUNT (locked_on, priv->child_count); -        if (up_count != priv->child_count) { -                gf_msg (this->name, GF_LOG_ERROR, 0, -                        AFR_MSG_SPLIT_BRAIN, -                        "All the bricks should be up to resolve the gfid split " -                        "brain"); -                goto out; -        } -        switch (priv->fav_child_policy) { -        case AFR_FAV_CHILD_BY_SIZE: -                *src = afr_sh_fav_by_size (this, replies, inode); -                break; -        case AFR_FAV_CHILD_BY_MTIME: -                *src = afr_sh_fav_by_mtime (this, replies, inode); -                break; -        case AFR_FAV_CHILD_BY_CTIME: -                *src = afr_sh_fav_by_ctime(this, replies, inode); -                break; -        case AFR_FAV_CHILD_BY_MAJORITY: -                if (priv->child_count != 2) -                        *src = afr_selfheal_gfid_mismatch_by_majority (replies, -                                                                       priv->child_count); -                else -                        *src = -1; - -                if (*src == -1) { -                        gf_msg (this->name, GF_LOG_ERROR, 0, -                                AFR_MSG_SPLIT_BRAIN, "No majority to resolve " -                                "gfid split brain"); -                } -                break; -        default: -                break; -        } - -out: -        if (*src == -1) { -                gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, -                        "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and" -                        " %s on %s. Skipping conservative merge on the file.", -                        uuid_utoa (pargfid), bname, -                        uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), -                        priv->children[child_idx]->name, -                        uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), -                        priv->children[src_idx]->name); -                gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file=" -                          "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;" -                          "child-%d=%s;gfid-%d=%s", this->name, -                          uuid_utoa (pargfid), bname, child_idx, -                          priv->children[child_idx]->name, child_idx, -                          uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), -                          src_idx, priv->children[src_idx]->name, src_idx, -                          uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); -                return -1; -        } -        return 0; -} -  static int  afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,                             inode_t *inode, int child, struct afr_reply *replies) @@ -332,7 +233,13 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,                          ret = afr_gfid_split_brain_source (this, replies, inode,                                                             pargfid, bname,                                                             src_idx, i, -                                                           locked_on, src); +                                                           locked_on, src, +                                                           NULL); +                        if (ret) +                                gf_msg (this->name, GF_LOG_ERROR, 0, +                                        AFR_MSG_SPLIT_BRAIN, +                                        "Skipping conservative merge on the " +                                        "file.");                          return ret;                  } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 8372cb6e376..1d198a8883e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -330,14 +330,15 @@ static int  afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies,                                         int source, unsigned char *sources,                                         int *gfid_idx, uuid_t pargfid, -                                       const char *bname) +                                       const char *bname, inode_t *inode, +                                       unsigned char *locked_on, dict_t *xdata)  {          int             i             = 0;  	int             gfid_idx_iter = -1; +        int             ret           = -1;          void           *gfid          = NULL;          void           *gfid1         = NULL;          afr_private_t  *priv          = NULL; -	char g1[64], g2[64];          priv = this->private; @@ -358,31 +359,29 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies  		if (sources[i] || source == -1) {  			if ((sources[gfid_idx_iter] || source == -1) &&  			    gf_uuid_compare (gfid, gfid1)) { -			        gf_msg (this->name, GF_LOG_WARNING, 0, -                                        AFR_MSG_SPLIT_BRAIN, -					"GFID mismatch for <gfid:%s>/%s " -					"%s on %s and %s on %s", -					uuid_utoa (pargfid), bname, -					uuid_utoa_r (gfid1, g1), -					priv->children[i]->name, -					uuid_utoa_r (gfid, g2), -					priv->children[gfid_idx_iter]->name); -                                gf_event (EVENT_AFR_SPLIT_BRAIN, -                                        "subvol=%s;type=gfid;" -                                        "file=<gfid:%s>/%s;count=2;" -                                        "child-%d=%s;gfid-%d=%s;child-%d=%s;" -                                        "gfid-%d=%s", this->name, -                                        uuid_utoa (pargfid), bname, i, -                                        priv->children[i]->name, i, -                                        uuid_utoa_r (gfid1, g1), -                                        gfid_idx_iter, -                                        priv->children[gfid_idx_iter]->name, -                                        gfid_idx_iter, -                                        uuid_utoa_r (gfid, g2)); - -				return -EIO; +                                ret = afr_gfid_split_brain_source (this, +                                                                   replies, +                                                                   inode, +                                                                   pargfid, +                                                                   bname, +                                                                   gfid_idx_iter, +                                                                   i, locked_on, +                                                                   gfid_idx, +                                                                   xdata); +                                if (!ret && *gfid_idx >= 0) { +                                        ret = dict_set_str (xdata, +                                                             "gfid-heal-msg", +                                                             "GFID split-brain " +                                                             "resolved"); +                                        if (ret) +                                                gf_msg (this->name, +                                                        GF_LOG_ERROR, 0, +                                                        AFR_MSG_DICT_SET_FAILED, +                                                        "Error setting gfid-" +                                                        "heal-msg dict"); +                                } +                                return ret;  			} -                          gfid = &replies[i].poststat.ia_gfid;  			gfid_idx_iter = i;  		} @@ -427,7 +426,7 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,                          unsigned char *sources, unsigned char *sinks,  			unsigned char *healed_sinks, int source,  			unsigned char *locked_on, struct afr_reply *replies, -                        void *gfid_req) +                        void *gfid_req, dict_t *xdata)  {  	int             gfid_idx        = -1;          int             ret             = -1; @@ -458,7 +457,8 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,          ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source,                                                       sources, &gfid_idx, -                                                     pargfid, bname); +                                                     pargfid, bname, inode, +                                                     locked_on, xdata);          if (ret)                  return ret; @@ -583,7 +583,8 @@ out:  int  afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, -		      uuid_t pargfid, const char *bname, void *gfid_req) +		      uuid_t pargfid, const char *bname, void *gfid_req, +                      dict_t *xdata)  {  	afr_private_t *priv = NULL;  	unsigned char *sources = NULL; @@ -640,7 +641,7 @@ afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,  		ret = __afr_selfheal_name_do (frame, this, parent, pargfid,                                                bname, inode, sources, sinks,                                                healed_sinks, source, locked_on, -                                              replies, gfid_req); +                                              replies, gfid_req, xdata);  	}  unlock:  	afr_selfheal_unentrylk (frame, this, parent, this->name, bname, @@ -707,7 +708,7 @@ afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this,  int  afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname, -                   void *gfid_req) +                   void *gfid_req, dict_t *xdata)  {  	inode_t *parent = NULL;  	call_frame_t *frame = NULL; @@ -729,7 +730,7 @@ afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname,  	if (need_heal) {  		ret = afr_selfheal_name_do (frame, this, parent, pargfid, bname, -                                            gfid_req); +                                            gfid_req, xdata);                  if (ret)                          goto out;          } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 2e22ac2d7a1..36f081ec354 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -99,7 +99,7 @@ afr_throttled_selfheal (call_frame_t *frame, xlator_t *this);  int  afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name, -                   void *gfid_req); +                   void *gfid_req, dict_t *xdata);  int  afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode); @@ -330,4 +330,10 @@ int  afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies,                       inode_t *inode); +int +afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, +                             inode_t *inode, uuid_t pargfid, const char *bname, +                             int src_idx, int child_idx, +                             unsigned char *locked_on, int *src, dict_t *xdata); +  #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index e1a40521709..08817202b33 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -301,7 +301,7 @@ afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent,  {  	int ret = -1; -	ret = afr_selfheal_name (THIS, parent, bname, NULL); +	ret = afr_selfheal_name (THIS, parent, bname, NULL, NULL);  	return ret;  }  | 
