summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrishnan Parthasarathi <kp@gluster.com>2011-09-16 10:40:32 +0530
committerVijay Bellur <vijay@gluster.com>2011-09-22 09:43:25 -0700
commit4765dd1a1c51c67ab86687fbd871c89156680c34 (patch)
treed1d3890457cbcb01131d21f66e40ec8a1c537cf1
parent53b5da6dfab2e6b11ab2e40119e92ff7d4527b2c (diff)
glusterd: Implemented cmd to trigger self-heal on a replicate volume.v3.3.0qa10
This cmd is used in the context of proactive self-heal for replicated volumes. User invokes the following cmd when (s)he suspects that self-heal needs to be done on a particular volume, gluster volume heal <VOLNAME>. Change-Id: I3954353b53488c28b70406e261808239b44997f3 BUG: 3602 Reviewed-on: http://review.gluster.com/454 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--cli/src/cli-cmd-volume.c45
-rw-r--r--cli/src/cli-rpc-ops.c88
-rw-r--r--cli/src/cli.h4
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c127
-rw-r--r--libglusterfs/src/globals.c1
-rw-r--r--libglusterfs/src/glusterfs.h1
-rw-r--r--rpc/rpc-lib/src/protocol-common.h2
-rw-r--r--rpc/xdr/src/cli1-xdr.c30
-rw-r--r--rpc/xdr/src/cli1-xdr.h21
-rw-r--r--rpc/xdr/src/cli1-xdr.x12
-rw-r--r--xlators/cluster/afr/src/afr-common.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c166
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c52
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c138
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h22
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c160
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h23
21 files changed, 905 insertions, 42 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index de3166e8d04..a2ec5086317 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1469,6 +1469,47 @@ cli_print_brick_status (char *brick, int port, int online, int pid)
return 0;
}
+int
+cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ gf1_cli_heal_vol_req req = {0,};
+ int sent = 0;
+ int parse_error = 0;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ req.volname = (char *)words[2];
+ if (!req.volname)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, &req);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume heal failed");
+ }
+
+ return ret;
+}
+
struct cli_cmd volume_cmds[] = {
{ "volume info [all|<VOLNAME>]",
cli_cmd_volume_info_cbk,
@@ -1571,6 +1612,10 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_status_cbk,
"display status of specified volume"},
+ { "volume heal <VOLNAME>",
+ cli_cmd_volume_heal_cbk,
+ "Start healing of volume specified by <VOLNAME>"},
+
{ NULL, NULL, NULL }
};
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index ccd76d57043..78d27b62424 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -3912,6 +3912,91 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
return ret;
}
+int
+gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_heal_vol_rsp rsp = {0,};
+ int ret = 0;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_heal_vol_rsp);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "error");
+ goto out;
+ }
+
+ frame = myframe;
+
+ if (frame) {
+ local = frame->local;
+ frame->local = NULL;
+ }
+
+ if (local)
+ volname = local->u.heal_vol.volname;
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume");
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_out ("%s", rsp.op_errstr);
+ else
+ cli_out ("Starting heal on volume %s has been %s", volname,
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ if (local)
+ cli_local_wipe (local);
+ if (rsp.volname)
+ free (rsp.volname);
+ if (rsp.op_errstr)
+ free (rsp.op_errstr);
+ return ret;
+}
+
+int32_t
+gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf1_cli_heal_vol_req *req = NULL;
+ int ret = 0;
+ cli_local_t *local = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ req = data;
+ local = cli_local_get ();
+
+ if (local) {
+ local->u.heal_vol.volname = req->volname;
+ frame->local = local;
+ }
+
+ ret = cli_cmd_submit (req, frame, cli_rpc_prog,
+ GLUSTER_CLI_HEAL_VOLUME, NULL,
+ this, gf_cli3_1_heal_volume_cbk,
+ (xdrproc_t) xdr_gf1_cli_heal_vol_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+
+
struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_NULL] = {"NULL", NULL },
[GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli3_1_probe},
@@ -3945,7 +4030,8 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_GETWD] = {"GETWD", gf_cli3_1_getwd},
[GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli3_1_status_volume},
[GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli3_1_mount},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount}
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli3_1_heal_volume}
};
struct rpc_clnt_program cli_prog = {
diff --git a/cli/src/cli.h b/cli/src/cli.h
index bf3437827ec..4ef1dbe06f6 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -149,6 +149,10 @@ struct cli_local {
char *volname;
int flags;
} get_vol;
+
+ struct {
+ char *volname;
+ }heal_vol;
} u;
};
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 43d49b3f4e8..adce060a6d2 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -316,6 +316,41 @@ out:
}
int
+glusterfs_translator_heal_response_send (rpcsvc_request_t *req, int op_ret,
+ char *msg, dict_t *output)
+{
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ int ret = -1;
+ GF_ASSERT (msg);
+ GF_ASSERT (req);
+ GF_ASSERT (output);
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = 0;
+ if (ret && msg[0])
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
+ (size_t *)&rsp.output.output_len);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't serialize "
+ "output dict.");
+ goto out;
+ }
+
+ ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+
+out:
+ if (rsp.output.output_val)
+ GF_FREE (rsp.output.output_val);
+
+ return ret;
+}
+
+int
glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
{
int32_t ret = -1;
@@ -615,6 +650,92 @@ out:
}
int
+glusterfs_handle_translator_heal (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ char key[2048] = {0};
+ char *xname = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ int i = 0;
+ int count = 0;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ctx = glusterfs_ctx_get ();
+ GF_ASSERT (ctx);
+
+ active = ctx->active;
+ any = active->first;
+ if (!xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new ();
+
+ ret = dict_unserialize (xlator_req.input.input_val,
+ xlator_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ i = 0;
+ while (i < count) {
+ snprintf (key, sizeof (key), "heal-%d", i);
+ ret = dict_get_str (dict, key, &xname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't get "
+ "replicate xlator %s to trigger "
+ "self-heal", xname);
+ goto out;
+ }
+ xlator = xlator_search_by_name (any, xname);
+ if (!xlator) {
+ snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+ xlator_req.name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = xlator_notify (xlator, GF_EVENT_TRIGGER_HEAL, dict, NULL);
+ i++;
+ }
+ output = dict_new ();
+ if (!output)
+ goto out;
+
+ /* output dict is not used currently, could be used later. */
+ ret = glusterfs_translator_heal_response_send (req, ret, msg, output);
+out:
+ if (dict)
+ dict_unref (dict);
+ if (xlator_req.input.input_val)
+ free (xlator_req.input.input_val); // malloced by xdr
+ if (output)
+ dict_unref (output);
+ if (xlator_req.name)
+ free (xlator_req.name); //malloced by xdr
+
+ return ret;
+}
+
+int
glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
{
int ret = -1;
@@ -627,6 +748,9 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
case GF_BRICK_XLATOR_INFO:
ret = glusterfs_handle_translator_info_get (req);
break;
+ case GF_BRICK_XLATOR_HEAL:
+ ret = glusterfs_handle_translator_heal (req);
+ break;
default:
break;
}
@@ -681,7 +805,8 @@ rpc_clnt_prog_t clnt_handshake_prog = {
rpcsvc_actor_t glusterfs_actors[] = {
[GF_BRICK_NULL] = { "NULL", GF_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL},
[GF_BRICK_TERMINATE] = { "TERMINATE", GF_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL},
- [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL}
+ [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL},
+ [GF_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GF_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL}
};
struct rpcsvc_program glusterfs_mop_prog = {
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index fbae75dffba..473a4604e1f 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -348,6 +348,7 @@ char eventstring[GF_EVENT_MAXVAL][64] = {
"Transport Cleanup",
"Transport Connected",
"Volfile Modified",
+ "Volume Heal Triggered",
};
/* Copy the string ptr contents if needed for yourself */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 25f32bd5b88..8247c60fbf6 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -373,6 +373,7 @@ typedef enum {
GF_EVENT_VOLFILE_MODIFIED,
GF_EVENT_GRAPH_NEW,
GF_EVENT_TRANSLATOR_INFO,
+ GF_EVENT_TRIGGER_HEAL,
GF_EVENT_MAXVAL,
} glusterfs_event_t;
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 3c4c8fc444f..41197044b66 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -205,6 +205,7 @@ enum gluster_cli_procnum {
GLUSTER_CLI_STATUS_VOLUME,
GLUSTER_CLI_MOUNT,
GLUSTER_CLI_UMOUNT,
+ GLUSTER_CLI_HEAL_VOLUME,
GLUSTER_CLI_MAXVALUE,
};
@@ -212,6 +213,7 @@ enum gf_brick_procnum {
GF_BRICK_NULL = 0,
GF_BRICK_TERMINATE = 1,
GF_BRICK_XLATOR_INFO = 2,
+ GF_BRICK_XLATOR_HEAL = 3,
GF_BRICK_MAX_VALUE
};
diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c
index 25ab32a8fe9..1240dda8bf5 100644
--- a/rpc/xdr/src/cli1-xdr.c
+++ b/rpc/xdr/src/cli1-xdr.c
@@ -1068,3 +1068,33 @@ xdr_gf1_cli_umount_rsp (XDR *xdrs, gf1_cli_umount_rsp *objp)
return FALSE;
return TRUE;
}
+
+bool_t
+xdr_gf1_cli_heal_vol_req (XDR *xdrs, gf1_cli_heal_vol_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->volname, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_heal_vol_rsp (XDR *xdrs, gf1_cli_heal_vol_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->volname, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
index 4077ff3c11b..f22c635f176 100644
--- a/rpc/xdr/src/cli1-xdr.h
+++ b/rpc/xdr/src/cli1-xdr.h
@@ -614,6 +614,23 @@ struct gf1_cli_umount_rsp {
};
typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp;
+struct gf1_cli_heal_vol_req {
+ char *volname;
+};
+typedef struct gf1_cli_heal_vol_req gf1_cli_heal_vol_req;
+
+struct gf1_cli_heal_vol_rsp {
+ int op_ret;
+ int op_errno;
+ char *volname;
+ char *op_errstr;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf1_cli_heal_vol_rsp gf1_cli_heal_vol_rsp;
+
/* the xdr functions */
#if defined(__STDC__) || defined(__cplusplus)
@@ -687,6 +704,8 @@ extern bool_t xdr_gf1_cli_mount_req (XDR *, gf1_cli_mount_req*);
extern bool_t xdr_gf1_cli_mount_rsp (XDR *, gf1_cli_mount_rsp*);
extern bool_t xdr_gf1_cli_umount_req (XDR *, gf1_cli_umount_req*);
extern bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*);
+extern bool_t xdr_gf1_cli_heal_vol_req (XDR *, gf1_cli_heal_vol_req*);
+extern bool_t xdr_gf1_cli_heal_vol_rsp (XDR *, gf1_cli_heal_vol_rsp*);
#else /* K&R C */
extern bool_t xdr_gf_cli_defrag_type ();
@@ -759,6 +778,8 @@ extern bool_t xdr_gf1_cli_mount_req ();
extern bool_t xdr_gf1_cli_mount_rsp ();
extern bool_t xdr_gf1_cli_umount_req ();
extern bool_t xdr_gf1_cli_umount_rsp ();
+extern bool_t xdr_gf1_cli_heal_vol_req ();
+extern bool_t xdr_gf1_cli_heal_vol_rsp ();
#endif /* K&R C */
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 2a6168c0456..9a1f77c0ef7 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -455,3 +455,15 @@ struct gf1_cli_umount_rsp {
int op_ret;
int op_errno;
};
+
+struct gf1_cli_heal_vol_req {
+ string volname<>;
+} ;
+
+struct gf1_cli_heal_vol_rsp {
+ int op_ret;
+ int op_errno;
+ string volname<>;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2e5ca71b219..c23e329dfcb 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3498,7 +3498,15 @@ afr_notify (xlator_t *this, int32_t event,
priv->last_event[idx] = event;
}
UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_TRIGGER_HEAL:
+ gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered"
+ " manually. Start crawling");
+ call_psh = 1;
break;
+
default:
propagate = 1;
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index df0aa202cc6..2ab80c2ff63 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -2562,6 +2562,42 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
}
int
+glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
+ (void) glusterd_shd_set_running (_gf_true);
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT");
+ (void) glusterd_shd_set_running (_gf_false);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
+
+int
glusterd_friend_remove_notify (glusterd_peerinfo_t *peerinfo, rpcsvc_request_t *req)
{
int ret = -1;
@@ -2742,6 +2778,7 @@ rpcsvc_actor_t gd_svc_cli_actors[] = {
[GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, NULL},
[GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, NULL},
[GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, NULL},
+ [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, NULL}
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index e10cf1aca19..196f5f50bd8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -70,7 +70,8 @@ typedef enum gf_gld_mem_types_ {
gf_gld_mt_mount_comp_container = gf_common_mt_end + 44,
gf_gld_mt_mount_component = gf_common_mt_end + 45,
gf_gld_mt_mount_spec = gf_common_mt_end + 46,
- gf_gld_mt_end = gf_common_mt_end + 47,
+ gf_gld_mt_nodesrv_t = gf_common_mt_end + 47,
+ gf_gld_mt_end = gf_common_mt_end + 48,
} gf_gld_mem_types_t;
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index c9d1c99a1e1..84280498e8b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -202,6 +202,17 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
brick_req->name = brickinfo->path;
break;
+ case GD_OP_HEAL_VOLUME:
+ {
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GF_BRICK_XLATOR_HEAL;
+ brick_req->name = "";
+ }
+ break;
default:
goto out;
break;
@@ -1483,6 +1494,7 @@ glusterd_op_build_payload (dict_t **req)
case GD_OP_LOG_LEVEL:
case GD_OP_STATUS_VOLUME:
case GD_OP_REBALANCE:
+ case GD_OP_HEAL_VOLUME:
{
dict_t *dict = ctx;
dict_copy (dict, req_dict);
@@ -1777,19 +1789,15 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx)
{
int ret = 0;
glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
gf_boolean_t free_errstr = _gf_false;
GF_ASSERT (event);
GF_ASSERT (ctx);
ev_ctx = ctx;
- brickinfo = ev_ctx->brickinfo;
- GF_ASSERT (brickinfo);
- ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo);
+ ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "unknown response received "
- "from %s:%s", brickinfo->hostname, brickinfo->path);
+ gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");
ret = -1;
free_errstr = _gf_true;
goto out;
@@ -1828,7 +1836,7 @@ glusterd_op_brick_disconnect (void *data)
ev_ctx = data;
GF_ASSERT (ev_ctx);
- brickinfo = ev_ctx->brickinfo;
+ brickinfo = ev_ctx->pending_node->node;
GF_ASSERT (brickinfo);
if (brickinfo->timer) {
@@ -2260,6 +2268,10 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
ret = glusterd_op_stage_rebalance (dict, op_errstr);
break;
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_stage_heal_volume (dict, op_errstr);
+ break;
+
default:
gf_log ("", GF_LOG_ERROR, "Unknown op %d",
op);
@@ -2351,6 +2363,10 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);
break;
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_heal_volume (dict, op_errstr);
+ break;
+
default:
gf_log ("", GF_LOG_ERROR, "Unknown op %d",
op);
@@ -2467,6 +2483,7 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr)
goto out;
} else {
pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
list_add_tail (&pending_node->list, &opinfo.pending_bricks);
pending_node = NULL;
}
@@ -2539,6 +2556,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)
goto out;
} else {
pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
list_add_tail (&pending_node->list, &opinfo.pending_bricks);
pending_node = NULL;
}
@@ -2606,6 +2624,7 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr)
goto out;
} else {
pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
list_add_tail (&pending_node->list,
&opinfo.pending_bricks);
pending_node = NULL;
@@ -2674,6 +2693,117 @@ out:
}
static int
+_add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count)
+{
+ int ret = -1;
+ char key[128] = {0,};
+ char *xname = NULL;
+
+ snprintf (key, sizeof (key), "heal-%d", count);
+ ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index);
+ if (ret == -1)
+ goto out;
+
+ ret = dict_set_dynstr (dict, key, xname);
+out:
+ return ret;
+}
+
+static int
+glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ int replica_count = 0;
+ int index = 1;
+ int rxlator_count = 0;
+ uuid_t candidate = {0};
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
+ replica_count = volinfo->replica_count;
+
+ } else if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ replica_count = volinfo->sub_count;
+
+ } else {
+ GF_ASSERT (0);
+ goto out;
+ }
+
+ index = 1;
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (uuid_compare (brickinfo->uuid, candidate) > 0)
+ uuid_copy (candidate, brickinfo->uuid);
+
+ if (index % replica_count == 0) {
+ if (!uuid_compare (priv->uuid, candidate)) {
+ _add_rxlator_to_dict (dict, volname,
+ (index-1)/replica_count,
+ rxlator_count);
+ rxlator_count++;
+ }
+ uuid_clear (candidate);
+ }
+
+ index++;
+ }
+ ret = dict_set_int32 (dict, "count", rxlator_count);
+ if (ret)
+ goto out;
+
+ if (rxlator_count) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = priv->shd;
+ pending_node->type = GD_NODE_SHD;
+ list_add_tail (&pending_node->list,
+ &opinfo.pending_bricks);
+ pending_node = NULL;
+ }
+ }
+
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret);
+ return ret;
+
+}
+
+static int
glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx)
{
int ret = 0;
@@ -2723,7 +2853,6 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
{
int ret = 0;
glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
char *op_errstr = NULL;
glusterd_op_t op = GD_OP_NONE;
dict_t *op_ctx = NULL;
@@ -2736,24 +2865,22 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
req_ctx = ev_ctx->commit_ctx;
GF_ASSERT (req_ctx);
- brickinfo = ev_ctx->brickinfo;
- GF_ASSERT (brickinfo);
+ op = req_ctx->op;
+ op_ctx = glusterd_op_get_ctx ();
- ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo);
+ ret = glusterd_remove_pending_entry (&opinfo.pending_bricks,
+ ev_ctx->pending_node->node);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "unknown response received "
- "from %s:%s", brickinfo->hostname, brickinfo->path);
+ gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");
ret = -1;
goto out;
}
if (opinfo.brick_pending_count > 0)
opinfo.brick_pending_count--;
- op = req_ctx->op;
- op_ctx = glusterd_op_get_ctx ();
- glusterd_handle_brick_rsp (brickinfo, op, ev_ctx->rsp_dict,
- op_ctx, &op_errstr);
+ glusterd_handle_brick_rsp (ev_ctx->pending_node->node, op, ev_ctx->rsp_dict,
+ op_ctx, &op_errstr);
if (opinfo.brick_pending_count > 0)
goto out;
@@ -2791,6 +2918,10 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr)
ret = glusterd_bricks_select_profile_volume (dict, op_errstr);
break;
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_bricks_select_heal_volume (dict, op_errstr);
+ break;
+
default:
break;
}
@@ -3344,6 +3475,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx)
case GD_OP_LOG_LEVEL:
case GD_OP_STATUS_VOLUME:
case GD_OP_REBALANCE:
+ case GD_OP_HEAL_VOLUME:
dict_unref (ctx);
break;
case GD_OP_DELETE_VOLUME:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 0a75d4c8474..97385e6a49b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -152,7 +152,7 @@ typedef struct glusterd_op_brick_rsp_ctx_ {
char *op_errstr;
dict_t *rsp_dict;
glusterd_req_ctx_t *commit_ctx;
- glusterd_brickinfo_t *brickinfo;
+ glusterd_pending_node_t *pending_node;
} glusterd_op_brick_rsp_ctx_t;
typedef struct glusterd_pr_brick_rsp_conv_t {
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index c9f41405247..9cdab97df8b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -455,6 +455,21 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
xdrproc = (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp;
break;
}
+ case GD_OP_HEAL_VOLUME:
+ {
+ gf1_cli_heal_vol_rsp rsp = {0,};
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.volname = "";
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+ cli_rsp = &rsp;
+ xdrproc = (xdrproc_t) xdr_gf1_cli_heal_vol_rsp;
+ break;
+
+ }
case GD_OP_NONE:
case GD_OP_MAX:
{
@@ -1922,7 +1937,7 @@ glusterd_start_brick_disconnect_timer (glusterd_op_brick_rsp_ctx_t *ev_ctx)
timeout.tv_sec = 5;
timeout.tv_usec = 0;
- brickinfo = ev_ctx->brickinfo;
+ brickinfo = ev_ctx->pending_node->node;
GF_ASSERT (brickinfo);
this = THIS;
GF_ASSERT (this);
@@ -2000,7 +2015,7 @@ out:
} else {
event_type = GD_OP_EVENT_RCVD_ACC;
}
- ev_ctx->brickinfo = frame->cookie;
+ ev_ctx->pending_node = frame->cookie;
ev_ctx->rsp_dict = dict;
ev_ctx->commit_ctx = frame->local;
op = glusterd_op_get_op ();
@@ -2087,9 +2102,9 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
call_frame_t *dummy_frame = NULL;
char *op_errstr = NULL;
int pending_bricks = 0;
- glusterd_pending_node_t *pending_brick;
- glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node;
glusterd_req_ctx_t *req_ctx = NULL;
+ struct rpc_clnt *rpc = NULL;
if (!this) {
ret = -1;
@@ -2109,25 +2124,30 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
goto out;
}
- list_for_each_entry (pending_brick, &opinfo.pending_bricks, list) {
+ list_for_each_entry (pending_node, &opinfo.pending_bricks, list) {
dummy_frame = create_frame (this, this->ctx->pool);
- brickinfo = pending_brick->node;
-
if (!dummy_frame)
continue;
- if (_gf_false == glusterd_is_brick_started (brickinfo))
- continue;
-
- ret = glusterd_brick_op_build_payload (req_ctx->op, brickinfo,
- (gd1_mgmt_brick_op_req **)&req,
- req_ctx->dict);
+ ret = glusterd_brick_op_build_payload (req_ctx->op,
+ pending_node->node,
+ (gd1_mgmt_brick_op_req **)&req,
+ req_ctx->dict);
if (ret)
goto out;
dummy_frame->local = data;
- dummy_frame->cookie = brickinfo;
- ret = glusterd_submit_request (brickinfo->rpc, req, dummy_frame,
+ dummy_frame->cookie = pending_node;
+
+ rpc = glusterd_pending_node_get_rpc (pending_node);
+ if (!rpc) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Brick Op failed "
+ "due to rpc failure.");
+ goto out;
+ }
+
+ ret = glusterd_submit_request (rpc, req, dummy_frame,
&glusterd_glusterfs_3_1_mgmt_prog,
req->op, NULL,
this, glusterd3_1_brick_op_cbk,
@@ -2143,7 +2163,7 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
}
gf_log ("glusterd", GF_LOG_DEBUG, "Sent op req to %d bricks",
- pending_bricks);
+ pending_bricks);
opinfo.brick_pending_count = pending_bricks;
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index e6c23e8337e..59609971bd8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2366,6 +2366,120 @@ glusterd_get_nodesvc_volfile (char *server, char *workdir,
snprintf (volfile, len, "%s/%s-server.vol", dir, server);
}
+void
+glusterd_shd_set_running (gf_boolean_t status)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (priv->shd);
+
+ priv->shd->running = status;
+}
+
+gf_boolean_t
+glusterd_shd_is_running ()
+{
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+ GF_ASSERT (conf->shd);
+
+ return conf->shd->running;
+}
+
+int32_t
+glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid,
+ char *socketpath, int len)
+{
+ char sockfilepath[PATH_MAX] = {0,};
+ char md5_str[PATH_MAX] = {0,};
+
+ snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s",
+ rundir, uuid_utoa (uuid));
+ _get_md5_str (md5_str, sizeof (md5_str),
+ (uint8_t *)sockfilepath, sizeof (sockfilepath));
+ snprintf (socketpath, len, "%s/%s.socket", glusterd_sock_dir,
+ md5_str);
+ return 0;
+}
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
+{
+ struct rpc_clnt *rpc = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ nodesrv_t *shd = NULL;
+ GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out);
+
+ if (pending_node->type == GD_NODE_BRICK) {
+ brickinfo = pending_node->node;
+ rpc = brickinfo->rpc;
+
+ } else if (pending_node->type == GD_NODE_SHD) {
+ shd = pending_node->node;
+ rpc = shd->rpc;
+
+ } else {
+ GF_ASSERT (0);
+ }
+
+out:
+ return rpc;
+}
+
+struct rpc_clnt*
+glusterd_shd_get_rpc (void)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (priv->shd);
+
+ return priv->shd->rpc;
+}
+
+int32_t
+glusterd_shd_set_rpc (struct rpc_clnt *rpc)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (priv->shd);
+
+ priv->shd->rpc = rpc;
+
+ return ret;
+}
+
+int32_t
+glusterd_shd_connect (char *socketpath) {
+ int ret = 0;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+
+ ret = rpc_clnt_transport_unix_options_build (&options, socketpath);
+ if (ret)
+ goto out;
+ ret = glusterd_rpc_create (&rpc, options,
+ glusterd_shd_rpc_notify,
+ NULL);
+ if (ret)
+ goto out;
+ (void) glusterd_shd_set_rpc (rpc);
+out:
+ return ret;
+}
+
int32_t
glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)
{
@@ -2376,6 +2490,7 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)
char logfile[PATH_MAX] = {0,};
char volfile[PATH_MAX] = {0,};
char rundir[PATH_MAX] = {0,};
+ char shd_sockfpath[PATH_MAX] = {0,};
char volfileid[256] = {0};
this = THIS;
@@ -2408,13 +2523,28 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)
server);
snprintf (volfileid, sizeof (volfileid), "gluster/%s", server);
- if (pmap_signin)
+ if (!strcmp (server, "glustershd")) {
+ glusterd_shd_set_socket_filepath (rundir,
+ priv->uuid,
+ shd_sockfpath,
+ sizeof (shd_sockfpath));
+ }
+
+ //TODO: kp:change the assumption that shd is the one which signs in
+ // use runner_add_args?
+ if (pmap_signin) {
ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost",
"--volfile-id", volfileid,
- "-p", pidfile, "-l", logfile, NULL);
- else
+ "-p", pidfile, "-l", logfile,
+ "-S", shd_sockfpath, NULL);
+ if (!ret)
+ glusterd_shd_connect (shd_sockfpath);
+
+ }
+ else {
ret = runcmd (SBIN_DIR"/glusterfs", "-f", volfile,
"-p", pidfile, "-l", logfile, NULL);
+ }
out:
return ret;
@@ -3742,7 +3872,7 @@ glusterd_remove_pending_entry (struct list_head *list, void *elem)
{
glusterd_pending_node_t *pending_node = NULL;
glusterd_pending_node_t *tmp = NULL;
- int ret = -1;
+ int ret = 0;
list_for_each_entry_safe (pending_node, tmp, list, list) {
if (elem == pending_node->node) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 93fa763bd51..aca46eae120 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -174,6 +174,28 @@ glusterd_shd_start ();
int32_t
glusterd_shd_stop ();
+int32_t
+glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid,
+ char *socketpath, int len);
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node);
+
+struct rpc_clnt*
+glusterd_shd_get_rpc (void);
+
+int32_t
+glusterd_shd_set_rpc (struct rpc_clnt *rpc);
+
+int32_t
+glusterd_shd_connect (char *socketpath);
+
+void
+glusterd_shd_set_running (gf_boolean_t status);
+
+gf_boolean_t
+glusterd_shd_is_running ();
+
int
glusterd_remote_hostname_get (rpcsvc_request_t *req,
char *remote_host, int len);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 560968defe7..39cc02c8ee0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -392,6 +392,62 @@ out:
return ret;
}
+int
+glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_heal_vol_req cli_req = {0,};
+ char *dup_volname = NULL;
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_HEAL_VOLUME;
+
+ GF_ASSERT (req);
+
+ if (!xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf1_cli_heal_vol_req)) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received heal vol req"
+ "for volume %s", cli_req.volname);
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ dup_volname = gf_strdup (cli_req.volname);
+ if (!dup_volname)
+ goto out;
+
+ ret = dict_set_dynstr (dict, "volname", dup_volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_op_begin (req, GD_OP_HEAL_VOLUME, dict);
+
+ gf_cmd_log ("volume heal","on volname: %s %s", cli_req.volname,
+ ((ret == 0) ? "SUCCESS": "FAILED"));
+
+out:
+ if (ret && dict)
+ dict_unref (dict);
+ if (cli_req.volname)
+ free (cli_req.volname); //its malloced by xdr
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ if (ret)
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ NULL, "operation failed");
+
+ return ret;
+}
+
+
/* op-sm */
int
glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr)
@@ -754,6 +810,101 @@ out:
}
int
+glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ gf_boolean_t enabled = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048];
+ glusterd_conf_t *priv = NULL;
+ dict_t *opt_dict = NULL;
+
+ priv = THIS->private;
+ if (!priv) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "priv is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ if (!glusterd_shd_is_running ()) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Self-heal daemon is not "
+ "running.");
+ *op_errstr = gf_strdup (msg);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+
+ if (!exists) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist", volname);
+ gf_log ("", GF_LOG_ERROR, "%s",
+ msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ if (!glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s is not started.",
+ volname);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!glusterd_is_volume_replicate (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s is not of type."
+ "replicate", volname);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ opt_dict = volinfo->dict;
+ if (!opt_dict) {
+ ret = 0;
+ goto out;
+ }
+
+ enabled = dict_get_str_boolean (opt_dict, "cluster.self-heal-daemon",
+ 1);
+ if (!enabled) {
+ snprintf (msg, sizeof (msg), "Self-heal-daemon is "
+ "disabled. Heal will not be triggered on volume %s",
+ volname);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int
glusterd_op_create_volume (dict_t *dict, char **op_errstr)
{
int ret = 0;
@@ -1034,3 +1185,12 @@ out:
gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
return ret;
}
+
+int
+glusterd_op_heal_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ /* Necessary subtasks of heal are completed in brick op */
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index d1582eae38a..83bbd1b2238 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -930,6 +930,10 @@ init (xlator_t *this)
conf = GF_CALLOC (1, sizeof (glusterd_conf_t),
gf_gld_mt_glusterd_conf_t);
GF_VALIDATE_OR_GOTO(this->name, conf, out);
+ conf->shd = GF_CALLOC (1, sizeof (nodesrv_t),
+ gf_gld_mt_nodesrv_t);
+ GF_VALIDATE_OR_GOTO(this->name, conf->shd, out);
+
INIT_LIST_HEAD (&conf->peers);
INIT_LIST_HEAD (&conf->volumes);
pthread_mutex_init (&conf->mutex, NULL);
@@ -961,6 +965,7 @@ init (xlator_t *this)
}
#endif
this->private = conf;
+ (void) glusterd_shd_set_running (_gf_false);
/* this->ctx->top = this;*/
ret = glusterd_uuid_init (first_time);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index ab521af0f4b..b49e7d6751f 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -79,6 +79,7 @@ typedef enum glusterd_op_ {
GD_OP_LOG_LEVEL,
GD_OP_STATUS_VOLUME,
GD_OP_REBALANCE,
+ GD_OP_HEAL_VOLUME,
GD_OP_MAX,
} glusterd_op_t;
@@ -95,6 +96,11 @@ struct glusterd_volgen {
dict_t *dict;
};
typedef struct {
+ struct rpc_clnt *rpc;
+ gf_boolean_t running;
+} nodesrv_t;
+
+typedef struct {
struct _volfile_ctx *volfile;
pthread_mutex_t mutex;
struct list_head peers;
@@ -104,6 +110,7 @@ typedef struct {
uuid_t uuid;
char workdir[PATH_MAX];
rpcsvc_t *rpc;
+ nodesrv_t *shd;
struct pmap_registry *pmap;
struct list_head volumes;
struct list_head xprt_list;
@@ -225,9 +232,16 @@ struct glusterd_volinfo_ {
xlator_t *xl;
};
+typedef enum gd_node_type_ {
+ GD_NODE_NONE,
+ GD_NODE_BRICK,
+ GD_NODE_SHD
+} gd_node_type;
+
typedef struct glusterd_pending_node_ {
- void *node;
struct list_head list;
+ void *node;
+ gd_node_type type;
} glusterd_pending_node_t;
enum glusterd_op_ret {
@@ -511,6 +525,10 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
rpc_clnt_event_t event, void *data);
int
+glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+
+int
glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options,
rpc_clnt_notify_t notify_fn, void *notify_data);
@@ -535,8 +553,11 @@ int glusterd_handle_cli_delete_volume (rpcsvc_request_t *req);
int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
size_t len, int cmd, defrag_cbk_fn_t cbk);
+int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req);
/* op-sm functions */
+int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_heal_volume (dict_t *dict, char **op_errstr);
int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr);
int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
int glusterd_op_quota (dict_t *dict, char **op_errstr);