summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cli/src/cli-cmd-volume.c8
-rw-r--r--heal/src/glfs-heal.c79
-rw-r--r--tests/basic/afr/split-brain-heal-info.t60
-rw-r--r--tests/volume.rc5
-rw-r--r--xlators/cluster/afr/src/afr-common.c13
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c4
6 files changed, 149 insertions, 20 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 3035ad4d566..68755630d87 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1881,7 +1881,8 @@ cli_print_brick_status (cli_volume_status_t *status)
#define NEEDS_GLFS_HEAL(op) ((op == GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) || \
(op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) || \
- (op == GF_AFR_OP_INDEX_SUMMARY))
+ (op == GF_AFR_OP_INDEX_SUMMARY) || \
+ (op == GF_AFR_OP_SPLIT_BRAIN_FILES))
int
cli_launch_glfs_heal (int heal_op, dict_t *options)
@@ -1907,7 +1908,7 @@ cli_launch_glfs_heal (int heal_op, dict_t *options)
ret = dict_get_str (options, "file", &filename);
runner_add_args (&runner, "bigger-file", filename, NULL);
break;
- case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK:
+ case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK:
ret = dict_get_str (options, "heal-source-hostname",
&hostname);
ret = dict_get_str (options, "heal-source-brickpath",
@@ -1917,6 +1918,9 @@ cli_launch_glfs_heal (int heal_op, dict_t *options)
if (dict_get_str (options, "file", &filename) == 0)
runner_argprintf (&runner, filename);
break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ runner_add_args (&runner, "split-brain-info", NULL);
+ break;
default:
ret = -1;
}
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index f49f3a58afc..a6208fa052f 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -21,7 +21,10 @@
#define DEFAULT_HEAL_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
#define USAGE_STR "Usage: %s <VOLNAME> [bigger-file <FILE> | "\
- "source-brick <HOSTNAME:BRICKNAME> [<FILE>]]\n"
+ "source-brick <HOSTNAME:BRICKNAME> [<FILE>] | "\
+ "split-brain-info]\n"
+
+typedef void (*print_status) (dict_t *, char *, uuid_t, uint64_t *);
int glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol,
loc_t *rootloc, char *file, dict_t *xattr_req);
@@ -190,6 +193,25 @@ out:
}
void
+glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid,
+ uint64_t *num_entries)
+{
+ char *value = NULL;
+ int ret = 0;
+
+ ret = dict_get_str (dict, "heal-info", &value);
+ if (ret)
+ return;
+
+ if (!strcmp (value, "split-brain")) {
+ (*num_entries)++;
+ printf ("%s\n",
+ path ? path : uuid_utoa (gfid));
+ }
+ return;
+}
+
+void
glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid,
uint64_t *num_entries)
{
@@ -250,7 +272,8 @@ glfsh_heal_entries (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
static int
glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
- uint64_t *offset, uint64_t *num_entries)
+ uint64_t *offset, uint64_t *num_entries,
+ print_status glfsh_print_status)
{
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
@@ -291,8 +314,8 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
continue;
}
if (dict)
- glfsh_print_heal_status (dict, path, gfid,
- num_entries);
+ glfsh_print_status (dict, path, gfid,
+ num_entries);
}
ret = 0;
GF_FREE (path);
@@ -331,8 +354,17 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
goto out;
if (heal_op == GF_AFR_OP_INDEX_SUMMARY) {
- ret = glfsh_process_entries (readdir_xl, fd, &entries,
- &offset, &num_entries);
+ ret = glfsh_process_entries (readdir_xl, fd,
+ &entries, &offset,
+ &num_entries,
+ glfsh_print_heal_status);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES) {
+ ret = glfsh_process_entries (readdir_xl, fd,
+ &entries, &offset,
+ &num_entries,
+ glfsh_print_spb_status);
if (ret < 0)
goto out;
} else if (heal_op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) {
@@ -353,6 +385,9 @@ out:
} else {
if (heal_op == GF_AFR_OP_INDEX_SUMMARY)
printf ("Number of entries: %"PRIu64"\n", num_entries);
+ else if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES)
+ printf ("Number of entries in split-brain: %"PRIu64"\n"
+ , num_entries);
else if (heal_op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK)
printf ("Number of healed entries: %"PRIu64"\n",
num_entries);
@@ -412,7 +447,7 @@ out:
void
glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
- xlator_t *xl)
+ xlator_t *xl, gf_xl_afr_op_t heal_op)
{
int ret = 0;
loc_t dirloc = {0};
@@ -424,7 +459,7 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
if (!xattr_req)
goto out;
- ret = dict_set_int32 (xattr_req, "heal-op", GF_AFR_OP_INDEX_SUMMARY);
+ ret = dict_set_int32 (xattr_req, "heal-op", heal_op);
if (ret)
goto out;
ret = glfsh_print_brick (xl, rootloc);
@@ -453,8 +488,13 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
fd_unref (fd);
if (xattr_req)
dict_unref (xattr_req);
- if (ret < 0)
- printf ("Failed to find entries with pending self-heal\n");
+ if (ret < 0) {
+ if (heal_op == GF_AFR_OP_INDEX_SUMMARY)
+ printf ("Failed to find entries with pending"
+ " self-heal\n");
+ if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES)
+ printf ("Failed to find entries in split-brain\n");
+ }
out:
loc_wipe (&dirloc);
return;
@@ -521,7 +561,8 @@ out:
int
-glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc)
+glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ gf_xl_afr_op_t heal_op)
{
xlator_t *xl = NULL;
xlator_t *afr_xl = NULL;
@@ -537,7 +578,8 @@ glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc)
old_THIS = THIS;
THIS = afr_xl;
glfsh_print_pending_heals (fs, top_subvol,
- rootloc, xl);
+ rootloc, xl,
+ heal_op);
THIS = old_THIS;
printf ("\n");
}
@@ -711,6 +753,15 @@ main (int argc, char **argv)
case 2:
heal_op = GF_AFR_OP_INDEX_SUMMARY;
break;
+ case 3:
+ if (!strcmp (argv[2], "split-brain-info")) {
+ heal_op = GF_AFR_OP_SPLIT_BRAIN_FILES;
+ } else {
+ printf (USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+ break;
case 4:
if (!strcmp (argv[2], "bigger-file")) {
heal_op = GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE;
@@ -799,7 +850,9 @@ main (int argc, char **argv)
switch (heal_op) {
case GF_AFR_OP_INDEX_SUMMARY:
- ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc);
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc,
+ heal_op);
break;
case GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
ret = glfsh_heal_from_bigger_file (fs, top_subvol,
diff --git a/tests/basic/afr/split-brain-heal-info.t b/tests/basic/afr/split-brain-heal-info.t
new file mode 100644
index 00000000000..eabfbd0880a
--- /dev/null
+++ b/tests/basic/afr/split-brain-heal-info.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function volume_start_force()
+{
+ local vol=$1
+ TEST $CLI volume start $vol force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 1
+}
+
+TESTS_EXPECTED_IN_LOOP=15
+SPB_FILES=0
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST mkdir $M0/dspb
+TEST mkdir $M0/mspb
+TEST mkdir $M0/espb
+TEST touch $M0/dspb/file
+
+#### Simlulate data-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo "abc" > $M0/dspb/file`
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST `echo "def" > $M0/dspb/file`
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+### Simulate metadata-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 757 $M0/mspb
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 747 $M0/mspb
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+#### Simulate entry-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/espb/a
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mkdir $M0/espb/a
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+#Multiply by 2, for each brick in replica pair
+SPB_FILES=$(($SPB_FILES * 2))
+EXPECT "$SPB_FILES" afr_get_split_brain_count $V0
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index 2fd07cd8745..36f1350b9bc 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -210,6 +210,11 @@ function afr_get_pending_heal_count {
gluster volume heal $vol info | grep "Number of entries" | awk '{ sum+=$4} END {print sum}'
}
+function afr_get_split_brain_count {
+ local vol=$1
+ gluster volume heal $vol info split-brain | grep "Number of entries in split-brain" | awk '{ sum+=$6} END {print sum}'
+}
+
function afr_get_index_path {
local brick_path=$1
echo "$brick_path/.glusterfs/indices/xattrop"
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e6d45add4e8..533a7b5d5a1 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4452,7 +4452,11 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc,
dict = afr_set_heal_info ("split-brain");
} else if (ret == -EAGAIN) {
dict = afr_set_heal_info ("possibly-healing");
- } else if (ret == 0) {
+ } else if (ret >= 0) {
+ /* value of ret = source index
+ * so ret >= 0 and at least one of the 3 booleans set to
+ * true means a source is identified; heal is required.
+ */
if (!data_selfheal && !entry_selfheal &&
!metadata_selfheal) {
dict = afr_set_heal_info ("no-heal");
@@ -4460,6 +4464,13 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc,
dict = afr_set_heal_info ("heal");
}
} else if (ret < 0) {
+ /* Apart from above checked -ve ret values, there are
+ * other possible ret values like ENOTCONN
+ * (returned when number of valid replies received are
+ * less than 2)
+ * in which case heal is required when one of the
+ * selfheal booleans is set.
+ */
if (data_selfheal || entry_selfheal ||
metadata_selfheal) {
dict = afr_set_heal_info ("heal");
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 05d9f2b4917..cd8bb688a11 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -210,12 +210,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
struct iatt first = {0, };
int source = -1;
int sources_count = 0;
- dict_t *xdata_req = NULL;
- afr_local_t *local = NULL;
priv = this->private;
- local = frame->local;
- xdata_req = local->xdata_req;
sources_count = AFR_COUNT (sources, priv->child_count);