summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cli/src/cli-cmd-parser.c72
-rw-r--r--cli/src/cli-cmd-volume.c25
-rw-r--r--cli/src/cli-rpc-ops.c110
-rw-r--r--cli/src/cli.h4
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c101
-rw-r--r--libglusterfs/src/globals.c2
-rw-r--r--libglusterfs/src/glusterfs.h3
-rw-r--r--libglusterfs/src/xlator.h12
-rw-r--r--rpc/rpc-lib/src/protocol-common.h11
-rw-r--r--xlators/cluster/afr/src/Makefile.am2
-rw-r--r--xlators/cluster/afr/src/afr-common.c32
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h10
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c776
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h24
-rw-r--r--xlators/cluster/afr/src/afr.c79
-rw-r--r--xlators/cluster/afr/src/afr.h23
-rw-r--r--xlators/cluster/afr/src/pump.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c263
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c30
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c14
24 files changed, 1274 insertions, 387 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 703a06e84a3..00c2f3618e6 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -2108,3 +2108,75 @@ out:
return ret;
}
+
+int
+cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, "volname", (char *) words[2]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname");
+ goto out;
+ }
+
+ if (wordcount == 3) {
+ ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_INDEX);
+ goto done;
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp (words[3], "full")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEAL_FULL);
+ goto done;
+ } else if (!strcmp (words[3], "info")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_INDEX_SUMMARY);
+ goto done;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (wordcount == 5) {
+ if (strcmp (words[3], "info")) {
+ ret = -1;
+ goto out;
+ }
+ if (!strcmp (words[4], "healed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEALED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "heal-failed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEAL_FAILED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "split-brain")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_SPLIT_BRAIN_FILES);
+ goto done;
+ }
+ ret = -1;
+ goto out;
+ }
+ ret = -1;
+ goto out;
+done:
+ *options = dict;
+out:
+ if (ret && dict) {
+ dict_unref (dict);
+ *options = NULL;
+ }
+
+ return ret;
+}
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 37b41c81e80..0906f3387ce 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1560,32 +1560,29 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
call_frame_t *frame = NULL;
int sent = 0;
int parse_error = 0;
- dict_t *dict = NULL;
+ dict_t *options = NULL;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
goto out;
- if (wordcount != 3) {
+ if (wordcount < 3) {
cli_usage_out (word->pattern);
- parse_error = 1;
+ parse_error = 1;
goto out;
}
- dict = dict_new ();
- if (!dict)
- goto out;
-
- ret = dict_set_str (dict, "volname", (char *) words[2]);
+ ret = cli_cmd_volume_heal_options_parse (words, wordcount, &options);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname");
+ cli_usage_out (word->pattern);
+ parse_error = 1;
goto out;
}
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ ret = proc->fn (frame, THIS, options);
}
out:
@@ -1595,8 +1592,8 @@ out:
cli_out ("Volume heal failed");
}
- if (dict)
- dict_unref (dict);
+ if (options)
+ dict_unref (options);
return ret;
}
@@ -1826,9 +1823,9 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_status_cbk,
"display status of all or specified volume(s)/brick"},
- { "volume heal <VOLNAME>",
+ { "volume heal <VOLNAME> [{full | info {healed | heal-failed | split-brain}}]",
cli_cmd_volume_heal_cbk,
- "Start healing of volume specified by <VOLNAME>"},
+ "self-heal commands on volume specified by <VOLNAME>"},
{"volume statedump <VOLNAME> [nfs] [all|mem|iobuf|callpool|priv|fd|"
"inode|history]...",
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index d1888415cef..9537c977d12 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -5320,6 +5320,38 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
return ret;
}
+void
+cmd_heal_volume_brick_out (dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ uint64_t i = 0;
+
+ snprintf (key, sizeof (key), "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof (key), "%d-path", brick);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof (key), "%d-count", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ cli_out ("\nEntries on %s:%s %"PRIu64, hostname, path, num_entries);
+ for (i = 0; i < num_entries; i++) {
+ snprintf (key, sizeof (key), "%d-%"PRIu64, brick, i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ continue;
+ cli_out (path);
+ }
+out:
+ return;
+}
+
int
gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
@@ -5329,7 +5361,11 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
cli_local_t *local = NULL;
char *volname = NULL;
call_frame_t *frame = NULL;
+ dict_t *input_dict = NULL;
dict_t *dict = NULL;
+ int brick_count = 0;
+ int i = 0;
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
if (-1 == req->rpc_status) {
goto out;
@@ -5348,21 +5384,24 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
frame->local = NULL;
}
- if (local)
- dict = local->dict;
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
-
- ret = dict_get_str (dict, "volname", &volname);
+ if (local) {
+ input_dict = local->dict;
+ ret = dict_get_int32 (input_dict, "heal-op",
+ (int32_t*)&heal_op);
+ }
+
+//#if (HAVE_LIB_XML)
+// if (global_state->mode & GLUSTER_MODE_XML) {
+// ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
+// rsp.op_errno, rsp.op_errstr);
+// if (ret)
+// gf_log ("cli", GF_LOG_ERROR,
+// "Error outputting to xml");
+// goto out;
+// }
+//#endif
+
+ ret = dict_get_str (input_dict, "volname", &volname);
if (ret) {
gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
goto out;
@@ -5376,14 +5415,51 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
cli_out ("Starting heal on volume %s has been %s", volname,
(rsp.op_ret) ? "unsuccessful": "successful");
+ if (rsp.op_ret) {
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
+ (heal_op == GF_AFR_OP_HEAL_INDEX)) {
+ ret = 0;
+ goto out;
+ }
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+
+ if (!brick_count) {
+ cli_out ("All bricks of volume %s are down.", volname);
+ goto out;
+ }
+
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_brick_out (dict, i);
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
if (local)
cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
if (rsp.op_errstr)
free (rsp.op_errstr);
if (dict)
diff --git a/cli/src/cli.h b/cli/src/cli.h
index 74e1423f5db..1f78da1fba0 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -259,6 +259,10 @@ cli_cmd_volume_status_parse (const char **words, int wordcount,
dict_t **options);
int
+cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int
cli_print_brick_status (cli_volume_status_t *status);
void
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 08f8a05f9de..de6e2d60c18 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -46,6 +46,7 @@
#include "rpcsvc.h"
#include "cli1-xdr.h"
#include "statedump.h"
+#include "syncop.h"
static char is_mgmt_rpc_reconnect;
@@ -317,8 +318,8 @@ out:
}
int
-glusterfs_translator_heal_response_send (rpcsvc_request_t *req, int op_ret,
- char *msg, dict_t *output)
+glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret,
+ char *msg, dict_t *output)
{
gd1_mgmt_brick_op_rsp rsp = {0,};
int ret = -1;
@@ -651,15 +652,14 @@ out:
}
int
-glusterfs_handle_translator_heal (rpcsvc_request_t *req)
+glusterfs_handle_translator_op (void *data)
{
int32_t ret = -1;
gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *dict = NULL;
+ dict_t *input = NULL;
xlator_t *xlator = NULL;
xlator_t *any = NULL;
dict_t *output = NULL;
- char msg[2048] = {0};
char key[2048] = {0};
char *xname = NULL;
glusterfs_ctx_t *ctx = NULL;
@@ -667,73 +667,76 @@ glusterfs_handle_translator_heal (rpcsvc_request_t *req)
xlator_t *this = NULL;
int i = 0;
int count = 0;
+ rpcsvc_request_t *req = data;
GF_ASSERT (req);
this = THIS;
GF_ASSERT (this);
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
-
- active = ctx->active;
- any = active->first;
if (!xdr_to_generic (req->msg[0], &xlator_req,
(xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
goto out;
}
- dict = dict_new ();
+ ctx = glusterfs_ctx_get ();
+ active = ctx->active;
+ any = active->first;
+ input = dict_new ();
ret = dict_unserialize (xlator_req.input.input_val,
xlator_req.input.input_len,
- &dict);
+ &input);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to "
"unserialize req-buffer to dictionary");
goto out;
+ } else {
+ input->extra_stdfree = xlator_req.input.input_val;
}
- ret = dict_get_int32 (dict, "count", &count);
- i = 0;
- while (i < count) {
- snprintf (key, sizeof (key), "heal-%d", i);
- ret = dict_get_str (dict, key, &xname);
+ ret = dict_get_int32 (input, "count", &count);
+
+ output = dict_new ();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++) {
+ snprintf (key, sizeof (key), "xl-%d", i);
+ ret = dict_get_str (input, key, &xname);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Couldn't get "
- "replicate xlator %s to trigger "
- "self-heal", xname);
+ "xlator %s ", key);
goto out;
}
xlator = xlator_search_by_name (any, xname);
if (!xlator) {
- snprintf (msg, sizeof (msg), "xlator %s is not loaded",
- xlator_req.name);
- ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "xlator %s is not "
+ "loaded", xname);
goto out;
}
-
- ret = xlator_notify (xlator, GF_EVENT_TRIGGER_HEAL, dict, NULL);
- i++;
}
- output = dict_new ();
- if (!output)
- goto out;
-
- /* output dict is not used currently, could be used later. */
- ret = glusterfs_translator_heal_response_send (req, ret, msg, output);
+ for (i = 0; i < count; i++) {
+ snprintf (key, sizeof (key), "xl-%d", i);
+ ret = dict_get_str (input, key, &xname);
+ xlator = xlator_search_by_name (any, xname);
+ XLATOR_NOTIFY (xlator, GF_EVENT_TRANSLATOR_OP, input, output);
+ if (ret)
+ break;
+ }
out:
- if (dict)
- dict_unref (dict);
- if (xlator_req.input.input_val)
- free (xlator_req.input.input_val); // malloced by xdr
+ glusterfs_xlator_op_response_send (req, ret, "", output);
+ if (input)
+ dict_unref (input);
if (output)
dict_unref (output);
if (xlator_req.name)
free (xlator_req.name); //malloced by xdr
- return ret;
+ return 0;
}
@@ -941,11 +944,20 @@ out:
return ret;
}
+static int
+glusterfs_command_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
int
glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
{
- int ret = -1;
- xlator_t *this = THIS;
+ int ret = -1;
+ xlator_t *this = THIS;
+ call_frame_t *frame = NULL;
+
GF_ASSERT (this);
switch (req->procnum) {
case GLUSTERD_BRICK_TERMINATE:
@@ -954,8 +966,13 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
case GLUSTERD_BRICK_XLATOR_INFO:
ret = glusterfs_handle_translator_info_get (req);
break;
- case GLUSTERD_BRICK_XLATOR_HEAL:
- ret = glusterfs_handle_translator_heal (req);
+ case GLUSTERD_BRICK_XLATOR_OP:
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = synctask_new (this->ctx->env,
+ glusterfs_handle_translator_op,
+ glusterfs_command_done, frame, req);
break;
case GLUSTERD_BRICK_STATUS:
ret = glusterfs_handle_brick_status (req);
@@ -966,7 +983,7 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
default:
break;
}
-
+out:
return ret;
}
@@ -1018,7 +1035,7 @@ rpcsvc_actor_t glusterfs_actors[] = {
[GLUSTERD_BRICK_NULL] = { "NULL", GLUSTERD_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL, 0},
[GLUSTERD_BRICK_TERMINATE] = { "TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
[GLUSTERD_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GLUSTERD_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL, 0},
+ [GLUSTERD_BRICK_XLATOR_OP] = { "TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP, glusterfs_handle_rpc_msg, NULL, NULL, 0},
[GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0},
[GLUSTERD_BRICK_XLATOR_DEFRAG] = { "TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_rpc_msg, NULL, NULL, 0}
};
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index 9355a3a4672..e041ce5d6b4 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -395,7 +395,7 @@ char eventstring[GF_EVENT_MAXVAL+1][64] = {
"Volfile Modified",
"New Volfile",
"Translator Info",
- "Trigger Volume Heal",
+ "Xlator Op",
"Authentication Failed",
"Invalid event",
};
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 076833d2352..63c28b8cf81 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -385,7 +385,7 @@ typedef enum {
GF_EVENT_VOLFILE_MODIFIED,
GF_EVENT_GRAPH_NEW,
GF_EVENT_TRANSLATOR_INFO,
- GF_EVENT_TRIGGER_HEAL,
+ GF_EVENT_TRANSLATOR_OP,
GF_EVENT_AUTH_FAILED,
GF_EVENT_VOLUME_DEFRAG,
GF_EVENT_MAXVAL,
@@ -403,7 +403,6 @@ struct gf_flock {
gf_lkowner_t l_owner;
};
-
extern char *glusterfs_strevent (glusterfs_event_t ev);
#define GF_MUST_CHECK __attribute__((warn_unused_result))
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 7604f8c21e9..5e4216deca5 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -832,6 +832,18 @@ struct _xlator {
#define xlator_has_parent(xl) (xl->parents != NULL)
+#define XLATOR_NOTIFY(_xl, params ...) \
+ do { \
+ xlator_t *_old_THIS = NULL; \
+ \
+ _old_THIS = THIS; \
+ THIS = _xl; \
+ \
+ ret = _xl->notify (_xl, params);\
+ \
+ THIS = _old_THIS; \
+ } while (0);
+
int32_t xlator_set_type_virtual (xlator_t *xl, const char *type);
int32_t xlator_set_type (xlator_t *xl, const char *type);
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 827201e2dad..cd7adde4e19 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -182,13 +182,22 @@ enum glusterd_brick_procnum {
GLUSTERD_BRICK_NULL, /* 0 */
GLUSTERD_BRICK_TERMINATE,
GLUSTERD_BRICK_XLATOR_INFO,
- GLUSTERD_BRICK_XLATOR_HEAL,
+ GLUSTERD_BRICK_XLATOR_OP,
GLUSTERD_BRICK_STATUS,
GLUSTERD_BRICK_OP,
GLUSTERD_BRICK_XLATOR_DEFRAG,
GLUSTERD_BRICK_MAXVALUE,
};
+typedef enum {
+ GF_AFR_OP_INVALID,
+ GF_AFR_OP_HEAL_INDEX,
+ GF_AFR_OP_HEAL_FULL,
+ GF_AFR_OP_INDEX_SUMMARY,
+ GF_AFR_OP_HEALED_FILES,
+ GF_AFR_OP_HEAL_FAILED_FILES,
+ GF_AFR_OP_SPLIT_BRAIN_FILES
+} gf_xl_afr_op_t ;
#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
#define GLUSTER_HNDSK_VERSION 1 /* 0.0.1 */
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 16ed25af10b..ed090181316 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -15,7 +15,7 @@ noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+ -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src
CLEANFILES =
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 1895150cd1d..9a78f6d3d4d 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3399,7 +3399,7 @@ find_child_index (xlator_t *this, xlator_t *child)
int32_t
afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+ void *data, void *data2)
{
afr_private_t *priv = NULL;
int i = -1;
@@ -3412,6 +3412,8 @@ afr_notify (xlator_t *this, int32_t event,
int ret = -1;
int call_psh = 0;
int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
priv = this->private;
@@ -3499,10 +3501,11 @@ afr_notify (xlator_t *this, int32_t event,
break;
- case GF_EVENT_TRIGGER_HEAL:
- gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered"
- " manually. Start crawling");
- call_psh = 1;
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
break;
default:
@@ -3552,7 +3555,7 @@ afr_notify (xlator_t *this, int32_t event,
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
- if (call_psh) {
+ if (call_psh && priv->shd.enabled) {
gf_log (this->name, GF_LOG_DEBUG, "start crawl: %d", up_child);
afr_do_poll_self_heal ((void*) (long) up_child);
}
@@ -3925,6 +3928,23 @@ afr_priv_destroy (afr_private_t *priv)
goto out;
inode_unref (priv->root_inode);
GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+ GF_FREE (priv->shd.sh_times);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
GF_FREE (priv->last_event);
if (priv->pending_key) {
for (i = 0; i < priv->child_count; i++)
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 22813940867..a138c967676 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -44,10 +44,12 @@ enum gf_afr_mem_types_ {
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
gf_afr_fd_paused_call_t,
- gf_afr_mt_afr_crawl_data_t,
- gf_afr_mt_afr_brick_pos_t,
- gf_afr_mt_afr_shd_bool_t,
- gf_afr_mt_afr_shd_timer_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 1f071b87150..fa7e61e49e8 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -25,11 +25,453 @@
#include "syncop.h"
#include "afr-self-heald.h"
#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+#include "event-history.h"
#define AFR_POLL_TIMEOUT 600
+typedef enum {
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+} afr_crawl_flags_t;
+
+typedef struct shd_dump {
+ dict_t *dict;
+ time_t sh_time;
+ xlator_t *this;
+ int child;
+} shd_dump_t;
+
+typedef struct shd_event_ {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef int
+(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done);
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+
+void
+shd_cleanup_event (void *event)
+{
+ shd_event_t *shd_event = event;
+
+ if (!shd_event)
+ goto out;
+ if (shd_event->path)
+ GF_FREE (shd_event->path);
+ GF_FREE (shd_event);
+out:
+ return;
+}
+
+int
+afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+{
+ int i = 0;
+ int ret = -1;
+ for (i = 0; i < child_count; i++) {
+ if (shd->pos[i] == AFR_POS_LOCAL) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+_add_str_to_dict (xlator_t *this, dict_t *output, int child, char *str,
+ gf_boolean_t dyn)
+{
+ //subkey not used for now
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ if (dyn)
+ ret = dict_set_dynstr (output, key, str);
+ else
+ ret = dict_set_str (output, key, str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ str);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
+ char **fpath)
+{
+ dict_t *xattr = NULL;
+ char *path = NULL;
+ int ret = -1;
+
+ ret = syncop_getxattr (readdir_xl, child, &xattr,
+ GFID_TO_PATH_KEY);
+ if (ret)
+ goto out;
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to get path for "
+ "gfid %s", uuid_utoa (child->gfid));
+ goto out;
+ }
+ path = gf_strdup (path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *fpath = path;
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+_add_event_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ if (shd_event->child != dump_data->child)
+ goto out;
+ if (cb->tv.tv_sec >= dump_data->sh_time)
+ ret = _add_str_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event->path,
+ _gf_false);
+out:
+ return ret;
+}
+
+int
+_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, time_t sh_time,
+ int child)
+{
+ shd_dump_t dump_data = {0};
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.sh_time = sh_time;
+ dump_data.child = child;
+ eh_dump (eh, &dump_data, _add_event_to_dict);
+ return 0;
+}
+
+int
+_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry,
+ loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+{
+ dict_t *output = NULL;
+ xlator_t *readdir_xl = NULL;
+ int ret = -1;
+ char *path = NULL;
+
+ if (uuid_is_null (childloc->gfid))
+ goto out;
+
+ output = crawl_data->op_data;
+ readdir_xl = crawl_data->readdir_xl;
+
+ ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path);
+ if (ret)
+ goto out;
+
+ ret = _add_str_to_dict (this, output, crawl_data->child, path,
+ _gf_true);
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return ret;
+}
+
+void
+_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
+ loc_t *parent, char *fname)
+{
+ int ret = 0;
+ loc_t index_loc = {0};
+
+ ret = _build_index_loc (this, &index_loc, fname, parent);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_INFO, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove"
+ " index on %s - %s", index_loc.name,
+ readdir_xl->name, strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+out:
+ return;
+}
+
+void
+_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t op_ret, int32_t op_errno,
+ afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ eh_t *eh = NULL;
+ char *path = NULL;
+ shd_event_t *event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (crawl_data->crawl == INDEX) {
+ if ((op_ret < 0) && (op_errno == ENOENT)) {
+ _remove_stale_index (this, crawl_data->readdir_xl,
+ parent, uuid_utoa (child->gfid));
+ goto out;
+ }
+ ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
+ child, &path);
+ if (ret)
+ goto out;
+ } else {
+ path = gf_strdup (child->path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (op_ret < 0 && op_errno == EIO)
+ eh = shd->split_brain;
+ else if (op_ret < 0)
+ eh = shd->heal_failed;
+ else
+ eh = shd->healed;
+ ret = -1;
+ event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
+ if (!event)
+ goto out;
+ event->child = crawl_data->child;
+ event->path = path;
+ ret = eh_save_history (eh, event);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to "
+ "eh, (%d, %s)", path, op_ret, strerror (op_errno));
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return;
+}
+
+int
+_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
+ loc_t *child, loc_t *parent, struct iatt *iattr)
+{
+ struct iatt parentbuf = {0};
+ int ret = 0;
+
+ if (uuid_is_null (child->gfid))
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
+ uuid_utoa (child->gfid));
+
+ ret = syncop_lookup (this, child, NULL,
+ iattr, NULL, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, crawl_data);
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl);
+_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ time (&shd->sh_times[child]);
+ afr_start_crawl (this, child, crawl, _self_heal_entry,
+ NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ afr_crawl_done);
+}
+
+void
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++)
+ _do_self_heal_on_subvol (this, i, INDEX);
+}
+
+void
+_do_self_heal_on_local_subvol (xlator_t *this, afr_crawl_type_t crawl)
+{
+ int local_child = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local_child = afr_get_local_child (&priv->shd,
+ priv->child_count);
+ if (local_child < -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "No local bricks found");
+ }
+ _do_self_heal_on_subvol (this, local_child, FULL);
+}
+
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++)
+ afr_start_crawl (this, i, INDEX, _add_summary_to_dict,
+ output, _gf_false, 0, NULL);
+ return 0;
+}
+
+int
+_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_eh_to_dict (this, eh, dict, shd->sh_times[i], i);
+ }
+ return 0;
+}
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int xl_id = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ _do_self_heal_on_local_subvols (this, INDEX);
+ ret = 0;
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ _do_self_heal_on_local_subvol (this, FULL);
+ ret = 0;
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ ret = _get_index_summary_on_local_subvols (this, output);
+ if (ret)
+ goto out;
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ ret = _add_all_subvols_eh_to_dict (this, shd->healed, output);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed,
+ output);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = _add_all_subvols_eh_to_dict (this, shd->split_brain,
+ output);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
void
afr_do_poll_self_heal (void *data)
@@ -39,21 +481,14 @@ afr_do_poll_self_heal (void *data)
struct timeval timeout = {0};
xlator_t *this = NULL;
long child = (long)data;
- int i = 0;
this = THIS;
priv = this->private;
shd = &priv->shd;
- if (child == AFR_ALL_CHILDREN) { //done by command
- for (i = 0; i < priv->child_count; i++)
- afr_start_crawl (this, i, INDEX);
+ _do_self_heal_on_subvol (this, child, INDEX);
+ if (shd->pos[child] == AFR_POS_REMOTE)
goto out;
- } else {
- afr_start_crawl (this, child, INDEX);
- if (shd->pos[child] == AFR_POS_REMOTE)
- goto out;
- }
timeout.tv_sec = AFR_POLL_TIMEOUT;
timeout.tv_usec = 0;
if (shd->timer[child])
@@ -71,9 +506,6 @@ out:
}
static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
- xlator_t *readdir_xl);
-static int
get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
{
char *start = NULL;
@@ -132,15 +564,16 @@ out:
int
afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
- loc_t *dirloc, xlator_t *readdir_xl)
+ loc_t *dirloc)
{
afr_private_t *priv = NULL;
dict_t *xattr = NULL;
void *index_gfid = NULL;
loc_t rootloc = {0};
- struct iatt iatt = {0};
+ struct iatt iattr = {0};
struct iatt parent = {0};
int ret = 0;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
priv = this->private;
if (crawl_data->crawl == FULL) {
@@ -167,13 +600,13 @@ afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
dirloc->path = "";
dirloc->inode = inode_new (priv->root_inode->table);
ret = syncop_lookup (readdir_xl, dirloc, NULL,
- &iatt, NULL, &parent);
+ &iattr, NULL, &parent);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "lookup failed on "
"index dir on %s", readdir_xl->name);
goto out;
}
- inode_link (dirloc->inode, NULL, NULL, &iatt);
+ inode_link (dirloc->inode, NULL, NULL, &iattr);
}
ret = 0;
out:
@@ -185,7 +618,7 @@ out:
int
afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
- loc_t *dirloc, xlator_t *readdir_xl)
+ loc_t *dirloc)
{
fd_t *fd = NULL;
int ret = 0;
@@ -199,7 +632,7 @@ afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
goto out;
}
- ret = syncop_opendir (readdir_xl, dirloc, fd);
+ ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"opendir failed on %s", dirloc->path);
@@ -247,7 +680,7 @@ afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
}
gf_boolean_t
-_crawl_proceed (xlator_t *this, int child)
+_crawl_proceed (xlator_t *this, int child, int crawl_flags)
{
afr_private_t *priv = this->private;
gf_boolean_t proceed = _gf_false;
@@ -258,77 +691,33 @@ _crawl_proceed (xlator_t *this, int child)
goto out;
}
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
- goto out;
- }
- proceed = _gf_true;
-out:
- return proceed;
-}
-
-static int
-_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
-{
- int ret = 0;
-
- uuid_copy (loc->pargfid, parent->inode->gfid);
- loc->path = "";
- loc->name = name;
- loc->parent = inode_ref (parent->inode);
- if (!loc->parent) {
- loc->path = NULL;
- loc_wipe (loc);
- ret = -1;
- }
- return ret;
-}
-
-void
-_index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc,
- gf_dirent_t *entry, int op_ret, int op_errno,
- xlator_t *readdir_xl)
-{
- loc_t index_loc = {0};
- int ret = 0;
-
- if (op_ret && (op_errno == ENOENT)) {
- ret = _build_index_loc (this, &index_loc, entry->d_name,
- parentloc);
- if (ret)
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
+ "< 2 children are up");
goto out;
- gf_log (this->name, GF_LOG_INFO, "Removing stale index "
- "for %s on %s", index_loc.name, readdir_xl->name);
- ret = syncop_unlink (readdir_xl, &index_loc);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove"
- " index on %s - %s", index_loc.name,
- readdir_xl->name, strerror (errno));
}
- index_loc.path = NULL;
- loc_wipe (&index_loc);
}
+ proceed = _gf_true;
out:
- return;
+ return proceed;
}
static int
-_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- off_t *offset, afr_crawl_data_t *crawl_data,
- xlator_t *readdir_xl)
+_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
+ off_t *offset, afr_crawl_data_t *crawl_data)
{
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
- struct iatt iatt = {0};
- struct iatt parent = {0};
int ret = 0;
loc_t entry_loc = {0};
fd_t *fd = NULL;
+ struct iatt iattr = {0};
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (!_crawl_proceed (this, crawl_data->child)) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags)) {
ret = -1;
goto out;
}
@@ -344,62 +733,51 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
continue;
}
+ if (crawl_data->crawl == INDEX)
+ entry_loc.path = NULL;//HACK
loc_wipe (&entry_loc);
ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
entry, crawl_data);
if (ret)
goto out;
- if (uuid_is_null (entry_loc.gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "failed to build "
- "location for %s", entry->d_name);
- continue;
- }
- if (entry_loc.path)
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
- entry_loc.path);
- else
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s",
- uuid_utoa (entry_loc.gfid));
-
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, NULL, &parent);
- if (crawl_data->crawl == INDEX) {
- _index_crawl_post_lookup_fop (this, parentloc, entry,
- ret, errno, readdir_xl);
- entry_loc.path = NULL;
- loc_wipe (&entry_loc);
+ ret = crawl_data->process_entry (this, crawl_data, entry,
+ &entry_loc, parentloc, &iattr);
+
+ if (crawl_data->crawl == INDEX)
continue;
- }
- //Don't fail the crawl if lookup fails as it
- //could be because of split-brain
- if (ret || (!IA_ISDIR (iatt.ia_type)))
+ if (ret || !IA_ISDIR (iattr.ia_type))
continue;
- inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt);
- ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc,
- readdir_xl);
+
+ inode_link (entry_loc.inode, parentloc->inode, NULL, &iattr);
+
+ fd = NULL;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
if (ret)
continue;
- ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl);
- fd_unref (fd);
+ ret = _crawl_directory (fd, &entry_loc, crawl_data);
+ if (fd)
+ fd_unref (fd);
}
ret = 0;
out:
+ if (crawl_data->crawl == INDEX)
+ entry_loc.path = NULL;
if (entry_loc.path)
loc_wipe (&entry_loc);
return ret;
}
static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
- xlator_t *readdir_xl)
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
{
xlator_t *this = NULL;
off_t offset = 0;
gf_dirent_t entries;
int ret = 0;
gf_boolean_t free_entries = _gf_false;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
INIT_LIST_HEAD (&entries.list);
this = THIS;
@@ -424,15 +802,16 @@ _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,
ret = 0;
free_entries = _gf_true;
- if (!_crawl_proceed (this, crawl_data->child)) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags)) {
ret = -1;
goto out;
}
if (list_empty (&entries.list))
goto out;
- ret = _perform_self_heal (this, loc, &entries, &offset,
- crawl_data, readdir_xl);
+ ret = _process_entries (this, loc, &entries, &offset,
+ crawl_data);
gf_dirent_free (&entries);
free_entries = _gf_false;
}
@@ -515,14 +894,6 @@ out:
return ret;
}
-static int
-afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
-{
- GF_FREE (data);
- STACK_DESTROY (sync_frame->root);
- return 0;
-}
-
static inline int
afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
{
@@ -530,17 +901,74 @@ afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
}
static int
-afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
+afr_dir_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ xlator_t *readdir_xl = NULL;
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags))
+ goto out;
+
+ ret = afr_find_child_position (this, crawl_data->child);
+ if (ret)
+ goto out;
+
+ if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
+ goto out;
+
+ readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
+ if (!readdir_xl)
+ goto out;
+ crawl_data->readdir_xl = readdir_xl;
+
+ ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = _crawl_directory (fd, &dirloc, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
+ readdir_xl->name);
+ else
+ gf_log (this->name, GF_LOG_INFO, "Crawl completed "
+ "on %s", readdir_xl->name);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+out:
+ if (fd)
+ fd_unref (fd);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
+ return ret;
+}
+
+static int
+afr_dir_exclusive_crawl (void *data)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
- loc_t dirloc = {0};
gf_boolean_t crawl = _gf_false;
int ret = 0;
- xlator_t *readdir_xl = NULL;
- fd_t *fd = NULL;
int child = -1;
+ xlator_t *this = NULL;
+ afr_crawl_data_t *crawl_data = data;
+ this = THIS;
priv = this->private;
shd = &priv->shd;
child = crawl_data->child;
@@ -548,7 +976,8 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
LOCK (&priv->lock);
{
if (shd->inprogress[child]) {
- shd->pending[child] = _gf_true;
+ if (shd->pending[child] != FULL)
+ shd->pending[child] = crawl_data->crawl;
} else {
shd->inprogress[child] = _gf_true;
crawl = _gf_true;
@@ -556,11 +985,6 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
}
UNLOCK (&priv->lock);
- if (!priv->root_inode) {
- ret = -1;
- goto out;
- }
-
if (!crawl) {
gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
"for %s", priv->children[child]->name);
@@ -568,87 +992,35 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)
}
do {
- readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
- if (!readdir_xl)
- goto done;
- ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc,
- readdir_xl);
- if (ret)
- goto done;
- ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc,
- readdir_xl);
- if (ret)
- goto done;
- ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
- readdir_xl->name);
- else
- gf_log (this->name, GF_LOG_INFO, "Crawl completed "
- "on %s", readdir_xl->name);
- fd_unref (fd);
- fd = NULL;
-done:
+ afr_dir_crawl (data);
LOCK (&priv->lock);
{
- if (shd->pending[child]) {
- shd->pending[child] = _gf_false;
+ if (shd->pending[child] != NONE) {
+ crawl_data->crawl = shd->pending[child];
+ shd->pending[child] = NONE;
} else {
shd->inprogress[child] = _gf_false;
crawl = _gf_false;
}
}
UNLOCK (&priv->lock);
- if (crawl_data->crawl == INDEX) {
- dirloc.path = NULL;
- loc_wipe (&dirloc);
- }
} while (crawl);
out:
- if (fd)
- fd_unref (fd);
- if (crawl_data->crawl == INDEX) {
- dirloc.path = NULL;
- loc_wipe (&dirloc);
- }
- return ret;
-}
-
-static int
-afr_crawl (void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int ret = -1;
- afr_crawl_data_t *crawl_data = data;
-
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
-
- if (!_crawl_proceed (this, crawl_data->child))
- goto out;
- ret = afr_find_child_position (this, crawl_data->child);
- if (ret)
- goto out;
-
- if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
- goto out;
-
- ret = afr_crawl_directory (this, crawl_data);
-out:
return ret;
}
void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl)
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
call_frame_t *frame = NULL;
afr_crawl_data_t *crawl_data = NULL;
int ret = 0;
+ int (*crawler) (void*) = NULL;
priv = this->private;
shd = &priv->shd;
@@ -662,16 +1034,24 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl)
afr_set_lk_owner (frame, this);
afr_set_low_priority (frame);
crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
- gf_afr_mt_afr_crawl_data_t);
+ gf_afr_mt_crawl_data_t);
if (!crawl_data)
goto out;
+ crawl_data->process_entry = process_entry;
crawl_data->child = idx;
crawl_data->pid = frame->root->pid;
crawl_data->crawl = crawl;
- gf_log (this->name, GF_LOG_INFO, "starting crawl for %s",
- priv->children[idx]->name);
- ret = synctask_new (this->ctx->env, afr_crawl,
- afr_crawl_done, frame, crawl_data);
+ crawl_data->op_data = op_data;
+ crawl_data->crawl_flags = crawl_flags;
+ gf_log (this->name, GF_LOG_INFO, "starting crawl %d for %s",
+ crawl_data->crawl, priv->children[idx]->name);
+
+ if (exclusive)
+ crawler = afr_dir_exclusive_crawl;
+ else
+ crawler = afr_dir_crawl;
+ ret = synctask_new (this->ctx->env, crawler,
+ crawl_done, frame, crawl_data);
if (ret)
gf_log (this->name, GF_LOG_ERROR, "Could not create the "
"task for %d ret %d", idx, ret);
@@ -679,16 +1059,6 @@ out:
return;
}
-//void
-//afr_full_self_heal (xlator_t *this)
-//{
-// int i = 0;
-// afr_private_t *priv = this->private;
-//
-// for (i = 0; i < priv->child_count; i++)
-// afr_start_crawl (this, i, FULL);
-//}
-
void
afr_build_root_loc (xlator_t *this, loc_t *loc)
{
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index eb10219957d..44fd9f38566 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -26,17 +26,22 @@
#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
#define AFR_ALL_CHILDREN -1
-typedef enum {
- INDEX,
- FULL,
-} afr_crawl_type_t;
typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
- afr_crawl_type_t crawl;
- xlator_t *readdir_xl;
+ int child;
+ pid_t pid;
+ afr_crawl_type_t crawl;
+ xlator_t *readdir_xl;
+ void *op_data;
+ int crawl_flags;
+ int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
} afr_crawl_data_t;
+typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+
void afr_proactive_self_heal (xlator_t *this, int idx);
void afr_build_root_loc (xlator_t *this, loc_t *loc);
@@ -48,4 +53,7 @@ afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent);
void
afr_do_poll_self_heal (void *data);
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index abc6aa3e567..8e2ef10080e 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -30,7 +30,10 @@
#endif
#include "afr-common.c"
-#define SHD_INODE_LRU_LIMIT 100
+#define SHD_INODE_LRU_LIMIT 100
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
struct volume_options options[];
@@ -39,8 +42,13 @@ notify (xlator_t *this, int32_t event,
void *data, ...)
{
int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- ret = afr_notify (this, event, data);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
return ret;
}
@@ -342,42 +350,55 @@ init (xlator_t *this)
goto out;
}
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_afr_brick_pos_t);
- if (!priv->shd.pos) {
- ret = -ENOMEM;
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
+
+ if (!priv->shd.enabled) {
+ ret = 0;
goto out;
}
+ ret = -ENOMEM;
+ priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
+ gf_afr_mt_brick_pos_t);
+ if (!priv->shd.pos)
+ goto out;
+
priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
- gf_afr_mt_afr_shd_bool_t);
- if (!priv->shd.pending) {
- ret = -ENOMEM;
+ gf_afr_mt_int32_t);
+ if (!priv->shd.pending)
goto out;
- }
priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
- child_count,
- gf_afr_mt_afr_shd_bool_t);
- if (!priv->shd.inprogress) {
- ret = -ENOMEM;
+ child_count, gf_afr_mt_shd_bool_t);
+ if (!priv->shd.inprogress)
goto out;
- }
priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
- gf_afr_mt_afr_shd_timer_t);
- if (!priv->shd.timer) {
- ret = -ENOMEM;
+ gf_afr_mt_shd_timer_t);
+ if (!priv->shd.timer)
+ goto out;
+
+ priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false);
+ if (!priv->shd.healed)
+ goto out;
+
+ priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false);
+ if (!priv->shd.heal_failed)
+ goto out;
+
+ priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false);
+ if (!priv->shd.split_brain)
+ goto out;
+
+ priv->shd.sh_times = GF_CALLOC (priv->child_count,
+ sizeof (*priv->shd.sh_times),
+ gf_afr_mt_time_t);
+ if (!priv->shd.sh_times)
+ goto out;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
goto out;
- }
- if (priv->shd.enabled) {
- this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
- if (!this->itable) {
- ret = -ENOMEM;
- goto out;
- }
- }
- priv->first_lookup = 1;
- priv->root_inode = NULL;
ret = 0;
out:
@@ -393,6 +414,8 @@ fini (xlator_t *this)
priv = this->private;
this->private = NULL;
afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
return 0;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index f3d372de5a2..0f4a6d90a72 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -88,12 +88,22 @@ typedef struct afr_inode_ctx_ {
int32_t *fresh_children;//increasing order of latency
} afr_inode_ctx_t;
+typedef enum {
+ NONE,
+ INDEX,
+ FULL,
+} afr_crawl_type_t;
+
typedef struct afr_self_heald_ {
- gf_boolean_t enabled;
- gf_boolean_t *pending;
- gf_boolean_t *inprogress;
- afr_child_pos_t *pos;
- gf_timer_t **timer;
+ gf_boolean_t enabled;
+ afr_crawl_type_t *pending;
+ gf_boolean_t *inprogress;
+ afr_child_pos_t *pos;
+ time_t *sh_times;
+ gf_timer_t **timer;
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
} afr_self_heald_t;
typedef struct _afr_private {
@@ -747,8 +757,7 @@ int
pump_command_reply (call_frame_t *frame, xlator_t *this);
int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, ...);
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 281bfd722fa..eae7899e9e8 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -2334,7 +2334,7 @@ notify (xlator_t *this, int32_t event,
child_xl = (xlator_t *) data;
- ret = afr_notify (this, event, data);
+ ret = afr_notify (this, event, data, NULL);
switch (event) {
case GF_EVENT_CHILD_DOWN:
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index b06dd28cf3f..79439535fed 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -651,6 +651,7 @@ out:
glusterd_op_sm ();
return ret;
}
+
int
glusterd_handle_cli_probe (rpcsvc_request_t *req)
{
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 77ed83f8bbf..da1299de066 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -152,6 +152,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
gd1_mgmt_brick_op_req *brick_req = NULL;
char *volname = NULL;
char name[1024] = {0,};
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
GF_ASSERT (op < GD_OP_MAX);
GF_ASSERT (op > GD_OP_NONE);
@@ -190,8 +191,12 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
if (!brick_req)
goto out;
- brick_req->op = GLUSTERD_BRICK_XLATOR_HEAL;
+ brick_req->op = GLUSTERD_BRICK_XLATOR_OP;
brick_req->name = "";
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (dict, "xl-op", heal_op);
}
break;
case GD_OP_STATUS_VOLUME:
@@ -2190,6 +2195,7 @@ glusterd_need_brick_op (glusterd_op_t op)
case GD_OP_PROFILE_VOLUME:
case GD_OP_STATUS_VOLUME:
case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_HEAL_VOLUME:
ret = _gf_true;
break;
default:
@@ -2578,6 +2584,94 @@ _status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
return;
}
+//input-key: <replica-id>:<child-id>-*
+//output-key: <brick-id>-*
+void
+_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data)
+{
+ char new_key[256] = {0,};
+ char int_str[16] = {0};
+ data_t *new_value = NULL;
+ char *rxl_end = NULL;
+ char *rxl_child_end = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int rxl_id = 0;
+ int rxl_child_id = 0;
+ int brick_id = 0;
+ int int_len = 0;
+ int brick_count = 0;
+ int ret = 0;
+ glusterd_heal_rsp_conv_t *rsp_ctx = NULL;
+
+ rsp_ctx = data;
+ rxl_end = strchr (key, '-');
+ if (!rxl_end)
+ goto out;
+
+ int_len = strlen (key) - strlen (rxl_end);
+ strncpy (int_str, key, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_id);
+ if (ret)
+ goto out;
+
+ rxl_child_end = strchr (rxl_end + 1, '-');
+ if (!rxl_child_end)
+ goto out;
+
+ int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1;
+ strncpy (int_str, rxl_end + 1, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_child_id);
+ if (ret)
+ goto out;
+
+ volinfo = rsp_ctx->volinfo;
+ brick_id = rxl_id * volinfo->replica_count + rxl_child_id;
+
+ new_value = data_copy (value);
+ snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+
+ ret = dict_get_int32 (rsp_ctx->dict, "count", &brick_count);
+ if (brick_id >= brick_count)
+ ret = dict_set_int32 (rsp_ctx->dict, "count", brick_id + 1);
+out:
+ return;
+}
+
+int
+glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx, char **op_errstr)
+{
+ int ret = 0;
+ glusterd_heal_rsp_conv_t rsp_ctx = {0};
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_str (req_dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ rsp_ctx.dict = op_ctx;
+ rsp_ctx.volinfo = volinfo;
+ dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx);
+
+out:
+ return ret;
+}
+
int
glusterd_status_volume_brick_rsp (glusterd_brickinfo_t *brickinfo,
dict_t *rsp_dict, dict_t *op_ctx,
@@ -2607,27 +2701,29 @@ glusterd_status_volume_brick_rsp (glusterd_brickinfo_t *brickinfo,
rsp_ctx.count = index;
rsp_ctx.dict = op_ctx;
dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx);
- ret = dict_set_int32 (op_ctx, "count", count);
out:
return ret;
}
int32_t
-glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo,
- glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
- char **op_errstr)
+glusterd_handle_node_rsp (glusterd_req_ctx_t *req_ctx, void *pending_entry,
+ glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr)
{
- int ret = 0;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
GF_ASSERT (op_errstr);
switch (op) {
case GD_OP_PROFILE_VOLUME:
+ brickinfo = pending_entry;
ret = glusterd_profile_volume_brick_rsp (brickinfo, rsp_dict,
op_ctx, op_errstr);
break;
case GD_OP_STATUS_VOLUME:
+ brickinfo = pending_entry;
ret = glusterd_status_volume_brick_rsp (brickinfo, rsp_dict,
op_ctx, op_errstr);
break;
@@ -2636,6 +2732,10 @@ glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo,
dict_copy (rsp_dict, op_ctx);
break;
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_heal_volume_brick_rsp (req_ctx->dict, rsp_dict,
+ op_ctx, op_errstr);
+ break;
default:
break;
}
@@ -2892,16 +2992,91 @@ _add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count)
char key[128] = {0,};
char *xname = NULL;
- snprintf (key, sizeof (key), "heal-%d", count);
+ snprintf (key, sizeof (key), "xl-%d", count);
ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index);
if (ret == -1)
goto out;
ret = dict_set_dynstr (dict, key, xname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, xname, index);
out:
return ret;
}
+int
+_select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int index = 1;
+ int rxlator_count = 0;
+ int replica_count = 0;
+ gf_boolean_t add = _gf_false;
+
+ priv = this->private;
+ replica_count = volinfo->replica_count;
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (!uuid_compare (priv->uuid, brickinfo->uuid))
+ add = _gf_true;
+ if (index % replica_count == 0) {
+ if (add) {
+ _add_rxlator_to_dict (dict, volinfo->volname,
+ (index-1)/replica_count,
+ rxlator_count);
+ rxlator_count++;
+ }
+ add = _gf_false;
+ }
+
+ index++;
+ }
+ return rxlator_count;
+}
+
+int
+_select_rxlators_for_full_self_heal (xlator_t *this,
+ glusterd_volinfo_t *volinfo,
+ dict_t *dict)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int index = 1;
+ int rxlator_count = 0;
+ int replica_count = 0;
+ uuid_t candidate = {0};
+
+ priv = this->private;
+ replica_count = volinfo->replica_count;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (uuid_compare (brickinfo->uuid, candidate) > 0)
+ uuid_copy (candidate, brickinfo->uuid);
+
+ if (index % replica_count == 0) {
+ if (!uuid_compare (priv->uuid, candidate)) {
+ _add_rxlator_to_dict (dict, volinfo->volname,
+ (index-1)/replica_count,
+ rxlator_count);
+ rxlator_count++;
+ }
+ uuid_clear (candidate);
+ }
+
+ index++;
+ }
+ return rxlator_count;
+}
+
static int
glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)
{
@@ -2909,14 +3084,11 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)
char *volname = NULL;
glusterd_conf_t *priv = NULL;
glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
xlator_t *this = NULL;
char msg[2048] = {0,};
- int replica_count = 0;
- int index = 1;
- int rxlator_count = 0;
- uuid_t candidate = {0};
glusterd_pending_node_t *pending_node = NULL;
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ int rxlator_count = 0;
this = THIS;
GF_ASSERT (this);
@@ -2939,48 +3111,43 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)
goto out;
}
- replica_count = volinfo->replica_count;
-
- index = 1;
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (uuid_is_null (brickinfo->uuid))
- (void)glusterd_resolve_brick (brickinfo);
-
- if (uuid_compare (brickinfo->uuid, candidate) > 0)
- uuid_copy (candidate, brickinfo->uuid);
-
- if (index % replica_count == 0) {
- if (!uuid_compare (priv->uuid, candidate)) {
- _add_rxlator_to_dict (dict, volname,
- (index-1)/replica_count,
- rxlator_count);
- rxlator_count++;
- }
- uuid_clear (candidate);
- }
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret || (heal_op == GF_AFR_OP_INVALID)) {
+ gf_log ("glusterd", GF_LOG_ERROR, "heal op invalid");
+ goto out;
+ }
- index++;
+ switch (heal_op) {
+ case GF_AFR_OP_HEAL_FULL:
+ rxlator_count = _select_rxlators_for_full_self_heal (this,
+ volinfo,
+ dict);
+ break;
+ default:
+ rxlator_count = _select_rxlators_with_local_bricks (this,
+ volinfo,
+ dict);
+ break;
}
+ if (!rxlator_count)
+ goto out;
ret = dict_set_int32 (dict, "count", rxlator_count);
if (ret)
goto out;
- if (rxlator_count) {
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = priv->shd;
- pending_node->type = GD_NODE_SHD;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
- }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = priv->shd;
+ pending_node->type = GD_NODE_SHD;
+ list_add_tail (&pending_node->list,
+ &opinfo.pending_bricks);
+ pending_node = NULL;
}
-
out:
gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret);
return ret;
@@ -3222,8 +3389,8 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
if (opinfo.brick_pending_count > 0)
opinfo.brick_pending_count--;
- glusterd_handle_brick_rsp (pending_entry, op, ev_ctx->rsp_dict,
- op_ctx, &op_errstr);
+ glusterd_handle_node_rsp (req_ctx, pending_entry, op, ev_ctx->rsp_dict,
+ op_ctx, &op_errstr);
if (opinfo.brick_pending_count > 0)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 12aa139f591..b4df8201769 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -154,6 +154,11 @@ typedef struct glusterd_pr_brick_rsp_conv_t {
dict_t *dict;
} glusterd_pr_brick_rsp_conv_t;
+typedef struct glusterd_heal_rsp_conv_ {
+ dict_t *dict;
+ glusterd_volinfo_t *volinfo;
+} glusterd_heal_rsp_conv_t;
+
typedef struct glusterd_status_rsp_conv_ {
int count;
dict_t *dict;
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 537496f0835..39a9c6161a9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -111,6 +111,11 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
}
break;
}
+ case GD_OP_HEAL_VOLUME:
+ {
+ glusterd_add_bricks_hname_path_to_dict (ctx);
+ break;
+ }
case GD_OP_PROFILE_VOLUME:
{
if (ctx && dict_get_int32 (ctx, "count", &count)) {
@@ -142,7 +147,6 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
case GD_OP_ADD_BRICK:
case GD_OP_LOG_ROTATE:
case GD_OP_SYNC_VOLUME:
- case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_REPLACE_BRICK:
case GD_OP_STATUS_VOLUME:
@@ -1107,6 +1111,48 @@ out:
return ret;
}
+void
+_heal_volume_add_peer_rsp (dict_t *peer_dict, char *key, data_t *value,
+ void *data)
+{
+ int max_brick = 0;
+ int peer_max_brick = 0;
+ int ret = 0;
+ dict_t *ctx_dict = data;
+
+
+
+ ret = dict_get_int32 (ctx_dict, "count", &max_brick);
+ ret = dict_get_int32 (peer_dict, "count", &peer_max_brick);
+ if (peer_max_brick > max_brick)
+ ret = dict_set_int32 (ctx_dict, "count", peer_max_brick);
+ else
+ ret = dict_set_int32 (ctx_dict, "count", max_brick);
+ dict_del (peer_dict, "count");
+ dict_copy (peer_dict, ctx_dict);
+ return;
+}
+
+int
+glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict)
+{
+ int ret = 0;
+ dict_t *ctx_dict = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+
+ GF_ASSERT (rsp_dict);
+
+ op = glusterd_op_get_op ();
+ GF_ASSERT (GD_OP_HEAL_VOLUME == op);
+
+ ctx_dict = glusterd_op_get_ctx (op);
+
+ if (!ctx_dict)
+ goto out;
+ dict_foreach (rsp_dict, _heal_volume_add_peer_rsp, ctx_dict);
+out:
+ return ret;
+}
int32_t
glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
@@ -1229,6 +1275,13 @@ glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
case GD_OP_DEFRAG_BRICK_VOLUME:
break;
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_volume_heal_use_rsp_dict (dict);
+ if (ret)
+ goto out;
+
+ break;
+
default:
break;
}
@@ -1723,7 +1776,7 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
char *op_errstr = NULL;
int pending_bricks = 0;
glusterd_pending_node_t *pending_node;
- glusterd_req_ctx_t *req_ctx = NULL;
+ glusterd_req_ctx_t *req_ctx = NULL;
struct rpc_clnt *rpc = NULL;
if (!this) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 4ec8ae5dc2a..117e5e8f3a8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1476,6 +1476,36 @@ _add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data)
}
int32_t
+glusterd_add_bricks_hname_path_to_dict (dict_t *dict)
+{
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = 0;
+ char key[256] = {0};
+ int index = 0;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ snprintf (key, sizeof (key), "%d-hostname", index);
+ ret = dict_set_str (dict, key, brickinfo->hostname);
+ snprintf (key, sizeof (key), "%d-path", index);
+ ret = dict_set_str (dict, key, brickinfo->path);
+ index++;
+ }
+out:
+ return ret;
+}
+
+int32_t
glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
dict_t *dict, int32_t count)
{
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index f71ecc404b6..de6185753a1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -410,4 +410,7 @@ glusterd_get_trusted_client_filepath (char *filepath,
gf_transport_type type);
int
glusterd_restart_rebalance (glusterd_conf_t *conf);
+
+int32_t
+glusterd_add_bricks_hname_path_to_dict (dict_t *dict);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 9df9d4219a7..caafa9fd094 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -470,6 +470,8 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
"failed to "
"unserialize req-buffer to dictionary");
goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
}
@@ -489,8 +491,6 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
out:
if (ret && dict)
dict_unref (dict);
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val); //its malloced by xdr
glusterd_friend_sm ();
glusterd_op_sm ();
@@ -999,6 +999,7 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)
char msg[2048];
glusterd_conf_t *priv = NULL;
dict_t *opt_dict = NULL;
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
priv = THIS->private;
if (!priv) {
@@ -1068,6 +1069,15 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)
goto out;
}
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret || (heal_op == GF_AFR_OP_INVALID)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Invalid heal-op");
+ *op_errstr = gf_strdup (msg);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+ goto out;
+ }
+
ret = 0;
out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);