summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--heal/src/Makefile.am5
-rw-r--r--heal/src/glfs-heal.c290
-rw-r--r--libglusterfs/src/glusterfs.h2
-rw-r--r--tests/basic/afr/self-heald.t71
-rw-r--r--xlators/cluster/afr/src/afr-common.c313
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c5
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c21
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c24
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c6
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h22
-rw-r--r--xlators/cluster/afr/src/afr.h4
11 files changed, 498 insertions, 265 deletions
diff --git a/heal/src/Makefile.am b/heal/src/Makefile.am
index b4089278607..80be3d443cf 100644
--- a/heal/src/Makefile.am
+++ b/heal/src/Makefile.am
@@ -6,14 +6,12 @@ glfsheal_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
$(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
$(top_builddir)/api/src/libgfapi.la \
- $(top_builddir)/xlators/cluster/afr/src/afr.la \
$(GF_GLUSTERFS_LIBS) $(XML_LIBS)
glfsheal_LDFLAGS = $(GF_LDFLAGS)
AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/xlators/lib/src\
- -I$(top_srcdir)/xlators/cluster/afr/src\
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
-I$(top_srcdir)/rpc/xdr/src\
-I$(top_srcdir)/api/src\
@@ -30,6 +28,3 @@ CLEANFILES =
$(top_builddir)/libglusterfs/src/libglusterfs.la:
$(MAKE) -C $(top_builddir)/libglusterfs/src/ all
-
-$(top_builddir)/xlators/cluster/afr/src/afr.la:
- $(MAKE) -C $(top_builddir)/xlators/cluster/afr/src/ all
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index 9416316458d..f1249ec6670 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -17,9 +17,6 @@
#include "syncop.h"
#include <string.h>
#include <time.h>
-#include "afr.h"
-#include "afr-self-heal.h"
-#include "afr-self-heald.h"
#define DEFAULT_HEAL_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
@@ -88,198 +85,105 @@ out:
return ret;
}
-int
-afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
- inode_t *inode,
- gf_boolean_t *metadata_selfheal)
+static xlator_t*
+_get_afr_ancestor (xlator_t *xl)
{
- int ret = -1;
- unsigned char *locked_on = NULL;
- afr_private_t *priv = this->private;
-
- locked_on = alloca0 (priv->child_count);
-
- ret = afr_selfheal_inodelk (frame, this, inode, this->name,
- LLONG_MAX - 1, 0, locked_on);
- {
- if (ret == 0) {
- /* Not a single lock */
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;/* all invalid responses */
- goto out;
- }
- ret = afr_selfheal_unlocked_inspect (frame, this, inode->gfid,
- NULL, NULL,
- metadata_selfheal, NULL);
- }
- afr_selfheal_uninodelk (frame, this, inode, this->name,
- LLONG_MAX - 1, 0, locked_on);
-out:
- return ret;
-}
+ if (!xl || !xl->parents)
+ return NULL;
-int
-afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *data_selfheal)
-{
- int ret = -1;
- afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *data_lock = NULL;
-
- priv = this->private;
- locked_on = alloca0 (priv->child_count);
- data_lock = alloca0 (priv->child_count);
-
- ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain,
- 0, 0, locked_on);
- {
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;/* all invalid responses */
- goto out;
- }
- ret = afr_selfheal_inodelk (frame, this, inode, this->name,
- 0, 0, data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;
- /* all invalid responses */
- goto unlock;
- }
- ret = afr_selfheal_unlocked_inspect (frame, this,
- inode->gfid, NULL,
- data_selfheal,
- NULL, NULL);
- }
- afr_selfheal_uninodelk (frame, this, inode, this->name, 0, 0,
- data_lock);
+ while (xl->parents) {
+ xl = xl->parents->xlator;
+ if (!xl)
+ break;
+ if (strcmp (xl->type, "cluster/replicate") == 0)
+ return xl;
}
-unlock:
- afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
- locked_on);
-out:
- return ret;
+
+ return NULL;
}
int
-afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *entry_selfheal)
+glfsh_index_purge (xlator_t *subvol, inode_t *inode, char *name)
{
- int ret = -1;
- afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *data_lock = NULL;
-
- priv = this->private;
- locked_on = alloca0 (priv->child_count);
- data_lock = alloca0 (priv->child_count);
-
- ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain,
- NULL, locked_on);
- {
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;/* all invalid responses */
- goto out;
- }
+ loc_t loc = {0, };
+ int ret = 0;
- ret = afr_selfheal_entrylk (frame, this, inode, this->name,
- NULL, data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;
- /* all invalid responses */
- goto unlock;
- }
- ret = afr_selfheal_unlocked_inspect (frame, this,
- inode->gfid,
- NULL, NULL, NULL,
- entry_selfheal);
- }
- afr_selfheal_unentrylk (frame, this, inode, this->name, NULL,
- data_lock);
- }
-unlock:
- afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,
- locked_on);
-out:
+ loc.parent = inode_ref (inode);
+ loc.name = name;
+
+ ret = syncop_unlink (subvol, &loc);
+
+ loc_wipe (&loc);
return ret;
}
int
-afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid,
- inode_t **inode,
- gf_boolean_t *data_selfheal,
- gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal)
+glfsh_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p)
{
- int ret = -1;
- gf_boolean_t dsh = _gf_false;
- gf_boolean_t msh = _gf_false;
- gf_boolean_t esh = _gf_false;
+ int ret = 0;
+ char *path = NULL;
+ loc_t loc = {0,};
+ dict_t *xattr = NULL;
- ret = afr_selfheal_unlocked_inspect (frame, this, gfid, inode,
- &dsh, &msh, &esh);
+ uuid_copy (loc.gfid, gfid);
+ loc.inode = inode_new (this->itable);
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, GFID_TO_PATH_KEY);
if (ret)
goto out;
- /* For every heal type hold locks and check if it indeed needs heal */
-
- if (msh) {
- msh = _gf_false;
- ret = afr_selfheal_locked_metadata_inspect (frame, this,
- *inode, &msh);
- if (msh || ret < 0)
- goto out;
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret || !path) {
+ ret = -EINVAL;
+ goto out;
}
- if (dsh) {
- dsh = _gf_false;
- ret = afr_selfheal_locked_data_inspect (frame, this, *inode,
- &dsh);
- if (dsh || ret < 0)
- goto out;
+ *path_p = gf_strdup (path);
+ if (!*path_p) {
+ ret = -ENOMEM;
+ goto out;
}
- if (esh) {
- esh = _gf_false;
- ret = afr_selfheal_locked_entry_inspect (frame, this, *inode,
- &esh);
- }
+ ret = 0;
out:
- if (entry_selfheal)
- *entry_selfheal = esh;
- if (data_selfheal)
- *data_selfheal = dsh;
- if (metadata_selfheal)
- *metadata_selfheal = msh;
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&loc);
+
return ret;
}
-static xlator_t*
-_get_afr_ancestor (xlator_t *xl)
+void
+glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid,
+ uint64_t *num_entries)
{
- if (!xl || !xl->parents)
- return NULL;
-
- while (xl->parents) {
- xl = xl->parents->xlator;
- if (!xl)
- break;
- if (strcmp (xl->type, "cluster/replicate") == 0)
- return xl;
- }
-
- return NULL;
+ char *value = NULL;
+ int ret = 0;
+ char *status = NULL;
+
+ ret = dict_get_str (dict, "heal-info", &value);
+ if (ret || (!strcmp (value, "no-heal")))
+ return;
+
+ (*num_entries)++;
+ if (!strcmp (value, "heal")) {
+ ret = gf_asprintf (&status, " ");
+ } else if (!strcmp (value, "possibly-healing")) {
+ ret = gf_asprintf (&status, " - Possibly undergoing heal\n");
+ } else if (!strcmp (value, "split-brain")) {
+ ret = gf_asprintf (&status, " - Is in split-brain\n");
+ }
+ if (ret == -1)
+ status = NULL;
+
+ printf ("%s%s\n",
+ path ? path : uuid_utoa (gfid),
+ status);
+
+ if (status)
+ GF_FREE (status);
+ return;
}
static int
@@ -291,19 +195,10 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
int ret = 0;
char *path = NULL;
uuid_t gfid = {0};
- inode_t *inode = NULL;
- call_frame_t *frame = NULL;
xlator_t *this = NULL;
- gf_boolean_t data_selfheal = _gf_false;
- gf_boolean_t metadata_selfheal = _gf_false;
- gf_boolean_t entry_selfheal = _gf_false;
-
+ dict_t *dict = NULL;
+ loc_t loc = {0,};
this = THIS;
- frame = afr_frame_create (this);
- if (!frame) {
- ret = -1;
- goto out;
- }
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
*offset = entry->d_off;
@@ -314,44 +209,25 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
uuid_clear (gfid);
GF_FREE (path);
path = NULL;
- if (inode) {
- inode_forget (inode, 1);
- inode_unref (inode);
- inode = NULL;
- }
- AFR_STACK_RESET (frame);
uuid_parse (entry->d_name, gfid);
- ret = afr_selfheal_locked_inspect (frame, this, gfid, &inode,
- &data_selfheal,
- &metadata_selfheal,
- &entry_selfheal);
- if (ret == 0) {
- if (!entry_selfheal && !metadata_selfheal &&
- !data_selfheal)
- continue;
- }
+ uuid_copy (loc.gfid, gfid);
+ ret = syncop_getxattr (this, &loc, &dict, GF_AFR_HEAL_INFO);
+ if (ret)
+ continue;
- ret = afr_shd_gfid_to_path (this, xl, gfid, &path);
+ ret = glfsh_gfid_to_path (this, xl, gfid, &path);
if (ret == -ENOENT || ret == -ESTALE) {
- afr_shd_index_purge (xl, fd->inode, entry->d_name);
+ glfsh_index_purge (xl, fd->inode, entry->d_name);
ret = 0;
continue;
}
-
- (*num_entries)++;
- printf ("%s\n", path ? path : uuid_utoa (inode->gfid));
+ if (dict)
+ glfsh_print_heal_status (dict, path, gfid,
+ num_entries);
}
ret = 0;
-out:
- if (inode) {
- inode_forget (inode, 1);
- inode_unref (inode);
- }
-
- if (frame)
- AFR_STACK_DESTROY (frame);
GF_FREE (path);
return ret;
}
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index b0bf3efce03..a0a8befc447 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -136,6 +136,8 @@
#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
#define GF_XATTROP_INDEX_COUNT "glusterfs.xattrop_index_count"
+#define GF_AFR_HEAL_INFO "glusterfs.heal-info"
+
#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
/* replace-brick and pump related internal xattrs */
#define RB_PUMP_CMD_START "glusterfs.pump.start"
diff --git a/tests/basic/afr/self-heald.t b/tests/basic/afr/self-heald.t
index 6937cf227d7..966972e5443 100644
--- a/tests/basic/afr/self-heald.t
+++ b/tests/basic/afr/self-heald.t
@@ -6,12 +6,43 @@
cleanup;
START_TIMESTAMP=`date +%s`
+function kill_multiple_bricks {
+ local vol=$1
+ local host=$2
+ local brickpath=$3
+
+ if [ $decide_kill == 0 ]
+ then
+ for ((i=0; i<=4; i=i+2)) do
+ TEST kill_brick $vol $host $brickpath/${vol}$i
+ done
+ else
+ for ((i=1; i<=5; i=i+2)) do
+ TEST kill_brick $vol $host $brickpath/${vol}$i
+ done
+ fi
+}
+function check_bricks_up {
+ local vol=$1
+ if [ $decide_kill == 0 ]
+ then
+ for ((i=0; i<=4; i=i+2)) do
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i
+ done
+ else
+ for ((i=1; i<=5; i=i+2)) do
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i
+ done
+ fi
+}
+
function disconnected_brick_count {
local vol=$1
$CLI volume heal $vol info | \
egrep -i '(transport|Socket is not connected)' | wc -l
}
+TESTS_EXPECTED_IN_LOOP=20
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
@@ -19,9 +50,10 @@ TEST $CLI volume set $V0 cluster.background-self-heal-count 0
TEST $CLI volume set $V0 cluster.eager-lock off
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST kill_brick $V0 $H0 $B0/${V0}2
-TEST kill_brick $V0 $H0 $B0/${V0}4
+
+decide_kill=$((`date +"%j"`))%2
+
+kill_multiple_bricks $V0 $H0 $B0
cd $M0
HEAL_FILES=0
for i in {1..10}
@@ -61,9 +93,9 @@ TEST ! $CLI volume heal $V0 full
TEST $CLI volume start $V0 force
TEST $CLI volume set $V0 cluster.self-heal-daemon on
EXPECT_WITHIN 20 "Y" glustershd_up_status
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+
+check_bricks_up $V0
+
TEST $CLI volume heal $V0
sleep 5 #Until the heal-statistics command implementation
#check that this heals the contents partially
@@ -98,16 +130,13 @@ TEST mkdir $M0/d
#DATA
TEST $CLI volume set $V0 cluster.data-self-heal off
EXPECT "off" volume_option $V0 cluster.data-self-heal
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST kill_brick $V0 $H0 $B0/${V0}2
-TEST kill_brick $V0 $H0 $B0/${V0}4
+kill_multiple_bricks $V0 $H0 $B0
echo abc > $M0/f
EXPECT 1 afr_get_pending_heal_count $V0
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "Y" glustershd_up_status
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+check_bricks_up $V0
+
TEST $CLI volume heal $V0
EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
TEST $CLI volume set $V0 cluster.data-self-heal on
@@ -115,16 +144,14 @@ TEST $CLI volume set $V0 cluster.data-self-heal on
#METADATA
TEST $CLI volume set $V0 cluster.metadata-self-heal off
EXPECT "off" volume_option $V0 cluster.metadata-self-heal
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST kill_brick $V0 $H0 $B0/${V0}2
-TEST kill_brick $V0 $H0 $B0/${V0}4
+kill_multiple_bricks $V0 $H0 $B0
+
TEST chmod 777 $M0/f
EXPECT 1 afr_get_pending_heal_count $V0
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "Y" glustershd_up_status
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+check_bricks_up $V0
+
TEST $CLI volume heal $V0
EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
TEST $CLI volume set $V0 cluster.metadata-self-heal on
@@ -132,9 +159,7 @@ TEST $CLI volume set $V0 cluster.metadata-self-heal on
#ENTRY
TEST $CLI volume set $V0 cluster.entry-self-heal off
EXPECT "off" volume_option $V0 cluster.entry-self-heal
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST kill_brick $V0 $H0 $B0/${V0}2
-TEST kill_brick $V0 $H0 $B0/${V0}4
+kill_multiple_bricks $V0 $H0 $B0
TEST touch $M0/d/a
# 4 if mtime/ctime is modified for d in bricks without a
# 2 otherwise
@@ -142,9 +167,7 @@ PENDING=$( afr_get_pending_heal_count $V0 )
TEST test $PENDING -eq 2 -o $PENDING -eq 4
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "Y" glustershd_up_status
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+check_bricks_up $V0
TEST $CLI volume heal $V0
EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
TEST $CLI volume set $V0 cluster.entry-self-heal on
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 1527a47f716..aefad8be959 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -494,7 +494,6 @@ afr_selfheal_enabled (xlator_t *this)
}
-
int
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
@@ -4032,3 +4031,315 @@ afr_mark_pending_changelog (afr_private_t *priv, unsigned char *pending,
out:
return changelog;
}
+
+gf_boolean_t
+afr_decide_heal_info (afr_private_t *priv, unsigned char *sources, int ret)
+{
+ int sources_count = 0;
+
+ if (ret)
+ goto out;
+
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ if (sources_count == priv->child_count)
+ return _gf_false;
+out:
+ return _gf_true;
+}
+
+int
+afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, gf_boolean_t *msh)
+{
+ int ret = -1;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+
+ afr_private_t *priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ {
+ if (ret == 0) {
+ /* Not a single lock */
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+ ret = __afr_selfheal_metadata_prepare (frame, this, inode,
+ locked_on, sources,
+ sinks, healed_sinks,
+ locked_replies);
+ *msh = afr_decide_heal_info (priv, sources, ret);
+ }
+ afr_selfheal_uninodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+out:
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ return ret;
+}
+
+int
+afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, gf_boolean_t *dsh)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+
+ priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain,
+ 0, 0, locked_on);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ 0, 0, data_lock);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;
+ /* all invalid responses */
+ goto unlock;
+ }
+ ret = __afr_selfheal_data_prepare (frame, this, inode,
+ data_lock, sources,
+ sinks, healed_sinks,
+ locked_replies);
+ *dsh = afr_decide_heal_info (priv, sources, ret);
+ }
+ afr_selfheal_uninodelk (frame, this, inode, this->name, 0, 0,
+ data_lock);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
+out:
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ return ret;
+}
+
+int
+afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ gf_boolean_t *esh)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+
+ priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain,
+ NULL, locked_on);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+
+ ret = afr_selfheal_entrylk (frame, this, inode, this->name,
+ NULL, data_lock);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;
+ /* all invalid responses */
+ goto unlock;
+ }
+ ret = __afr_selfheal_entry_prepare (frame, this, inode,
+ data_lock, sources,
+ sinks, healed_sinks,
+ locked_replies,
+ &source);
+ if ((ret == 0) && source < 0)
+ ret = -EIO;
+ *esh = afr_decide_heal_info (priv, sources, ret);
+ }
+ afr_selfheal_unentrylk (frame, this, inode, this->name, NULL,
+ data_lock);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,
+ locked_on);
+out:
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ return ret;
+}
+
+int
+afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode,
+ gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal)
+
+{
+ int ret = -1;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
+
+ ret = afr_selfheal_unlocked_inspect (frame, this, gfid, inode,
+ &dsh, &msh, &esh);
+ if (ret)
+ goto out;
+
+ /* For every heal type hold locks and check if it indeed needs heal */
+
+ if (msh) {
+ ret = afr_selfheal_locked_metadata_inspect (frame, this,
+ *inode, &msh);
+ if (ret == -EIO)
+ goto out;
+ }
+
+ if (dsh) {
+ ret = afr_selfheal_locked_data_inspect (frame, this, *inode,
+ &dsh);
+ if (ret == -EIO || (ret == -EAGAIN))
+ goto out;
+ }
+
+ if (esh) {
+ ret = afr_selfheal_locked_entry_inspect (frame, this, *inode,
+ &esh);
+ }
+
+out:
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+ *metadata_selfheal = msh;
+ return ret;
+}
+
+dict_t*
+afr_set_heal_info (char *status)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (!strcmp (status, "heal")) {
+ ret = dict_set_str (dict, "heal-info", "heal");
+ if (ret)
+ gf_log ("", GF_LOG_WARNING,
+ "Failed to set heal-info key to"
+ "heal");
+ } else if (!strcmp (status, "split-brain")) {
+ ret = dict_set_str (dict, "heal-info", "split-brain");
+ if (ret)
+ gf_log ("", GF_LOG_WARNING,
+ "Failed to set heal-info key to"
+ "split-brain");
+ } else if (!strcmp (status, "possibly-healing")) {
+ ret = dict_set_str (dict, "heal-info", "possibly-healing");
+ if (ret)
+ gf_log ("", GF_LOG_WARNING,
+ "Failed to set heal-info key to"
+ "possibly-healing");
+ }
+out:
+ return dict;
+}
+
+int
+afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ dict_t *dict = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ inode_t *inode = NULL;
+
+ ret = afr_selfheal_locked_inspect (frame, this, loc->gfid, &inode,
+ &entry_selfheal,
+ &data_selfheal, &metadata_selfheal);
+
+ if (ret == -ENOMEM) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -EIO) {
+ dict = afr_set_heal_info ("split-brain");
+ } else if (ret == -EAGAIN) {
+ dict = afr_set_heal_info ("possibly-healing");
+ } else if (ret == 0) {
+ if (!data_selfheal && !entry_selfheal &&
+ !metadata_selfheal) {
+ dict = afr_set_heal_info ("no-heal");
+ } else {
+ dict = afr_set_heal_info ("heal");
+ }
+ } else if (ret < 0) {
+ if (data_selfheal || entry_selfheal ||
+ metadata_selfheal) {
+ dict = afr_set_heal_info ("heal");
+ }
+ }
+ ret = 0;
+
+out:
+ AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref (dict);
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 4cb219246f7..210d710a2b3 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1373,6 +1373,11 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
+ if (!strcmp (name, GF_AFR_HEAL_INFO)) {
+ afr_get_heal_info (frame, this, loc, xdata);
+ return 0;
+ }
+
/*
* if we are doing getxattr with pathinfo as the key then we
* collect information from all childs
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index f7503faa719..0a43d128634 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -569,10 +569,11 @@ out:
* The return value is the index of the subvolume to be used as the source
* for self-healing, or -1 if no healing is necessary/split brain.
*/
-static int
-__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
- unsigned char *locked_on, unsigned char *sources,
- unsigned char *sinks, unsigned char *healed_sinks,
+int
+__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
struct afr_reply *replies)
{
int ret = -1;
@@ -582,10 +583,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid,
+ ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
replies);
- if (ret)
- return ret;
witness = alloca0(priv->child_count * sizeof (*witness));
ret = afr_selfheal_find_direction (frame, this, replies,
@@ -650,8 +649,9 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto unlock;
}
- ret = __afr_selfheal_data_prepare (frame, this, fd, data_lock,
- sources, sinks, healed_sinks,
+ ret = __afr_selfheal_data_prepare (frame, this, fd->inode,
+ data_lock, sources, sinks,
+ healed_sinks,
locked_replies);
if (ret < 0)
goto unlock;
@@ -678,7 +678,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
unlock:
afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
data_lock);
- if (ret < 0)
+ if (ret < 0)
goto out;
ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks,
@@ -731,7 +731,6 @@ afr_selfheal_data_open (xlator_t *this, inode_t *inode)
return fd;
}
-
int
afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 3f753251e7c..63ac61bce31 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -357,11 +357,11 @@ __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
return source;
}
-
-static int
-__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
- unsigned char *locked_on, unsigned char *sources,
- unsigned char *sinks, unsigned char *healed_sinks,
+int
+__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
struct afr_reply *replies, int *source_p)
{
int ret = -1;
@@ -371,11 +371,8 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid,
+ ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
replies);
- if (ret)
- return ret;
-
witness = alloca0 (sizeof (*witness) * priv->child_count);
ret = afr_selfheal_find_direction (frame, this, replies,
AFR_ENTRY_TRANSACTION,
@@ -407,7 +404,6 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
}
-
static int
afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this,
fd_t *fd, char *name)
@@ -445,7 +441,8 @@ afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this,
goto unlock;
}
- ret = __afr_selfheal_entry_prepare (frame, this, fd, locked_on,
+ ret = __afr_selfheal_entry_prepare (frame, this, fd->inode,
+ locked_on,
sources, sinks,
healed_sinks, par_replies,
&source);
@@ -593,8 +590,9 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto unlock;
}
- ret = __afr_selfheal_entry_prepare (frame, this, fd, data_lock,
- sources, sinks, healed_sinks,
+ ret = __afr_selfheal_entry_prepare (frame, this, fd->inode,
+ data_lock, sources, sinks,
+ healed_sinks,
locked_replies, &source);
}
unlock:
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 87600df3bad..c09f19ac5fd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -294,7 +294,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
return source;
}
-static int
+
+int
__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *locked_on, unsigned char *sources,
unsigned char *sinks, unsigned char *healed_sinks,
@@ -310,9 +311,6 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i
ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
replies);
- if (ret)
- return ret;
-
witness = alloca0 (sizeof (*witness) * priv->child_count);
ret = afr_selfheal_find_direction (frame, this, replies,
AFR_METADATA_TRANSACTION,
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index bb3f0e7029f..50cff91ccb3 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -200,6 +200,28 @@ gf_boolean_t
afr_does_witness_exist (xlator_t *this, uint64_t *witness);
int
+__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies);
+
+int
+__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies);
+int
+__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p);
+
+int
afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
uuid_t gfid, inode_t **link_inode,
gf_boolean_t *data_selfheal,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 51e57e8207f..7e138c54ec0 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1011,4 +1011,8 @@ afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2);
gf_boolean_t
afr_is_xattr_ignorable (char *key);
+
+int
+afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
#endif /* __AFR_H__ */