diff options
| author | Krishnan Parthasarathi <kp@gluster.com> | 2011-09-16 10:40:32 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vijay@gluster.com> | 2011-09-22 09:43:25 -0700 | 
| commit | 4765dd1a1c51c67ab86687fbd871c89156680c34 (patch) | |
| tree | d1d3890457cbcb01131d21f66e40ec8a1c537cf1 | |
| parent | 53b5da6dfab2e6b11ab2e40119e92ff7d4527b2c (diff) | |
glusterd: Implemented cmd to trigger self-heal on a replicate volume.v3.3.0qa10
This cmd is used in the context of proactive self-heal for replicated
volumes. User invokes the following cmd when (s)he suspects that self-heal
needs to be done on a particular volume,
        gluster volume heal <VOLNAME>.
Change-Id: I3954353b53488c28b70406e261808239b44997f3
BUG: 3602
Reviewed-on: http://review.gluster.com/454
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
| -rw-r--r-- | cli/src/cli-cmd-volume.c | 45 | ||||
| -rw-r--r-- | cli/src/cli-rpc-ops.c | 88 | ||||
| -rw-r--r-- | cli/src/cli.h | 4 | ||||
| -rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 127 | ||||
| -rw-r--r-- | libglusterfs/src/globals.c | 1 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs.h | 1 | ||||
| -rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 2 | ||||
| -rw-r--r-- | rpc/xdr/src/cli1-xdr.c | 30 | ||||
| -rw-r--r-- | rpc/xdr/src/cli1-xdr.h | 21 | ||||
| -rw-r--r-- | rpc/xdr/src/cli1-xdr.x | 12 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 8 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 37 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mem-types.h | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 166 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.h | 2 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 52 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 138 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 22 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 160 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 5 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 23 | 
21 files changed, 905 insertions, 42 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index de3166e8d..a2ec50863 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1469,6 +1469,47 @@ cli_print_brick_status (char *brick, int port, int online, int pid)          return 0;  } +int +cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word, +                          const char **words, int wordcount) +{ +        int                     ret = -1; +        rpc_clnt_procedure_t    *proc = NULL; +        call_frame_t            *frame = NULL; +        gf1_cli_heal_vol_req    req = {0,}; +        int                     sent = 0; +        int                     parse_error = 0; + +        frame = create_frame (THIS, THIS->ctx->pool); +        if (!frame) +                goto out; + +        if (wordcount != 3) { +               cli_usage_out (word->pattern); +                parse_error = 1; +               goto out; +        } + +        req.volname = (char *)words[2]; +        if (!req.volname) +                goto out; + +        proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME]; + +        if (proc->fn) { +                ret = proc->fn (frame, THIS, &req); +        } + +out: +        if (ret) { +                cli_cmd_sent_status_get (&sent); +                if ((sent == 0) && (parse_error == 0)) +                        cli_out ("Volume heal failed"); +        } + +        return ret; +} +  struct cli_cmd volume_cmds[] = {          { "volume info [all|<VOLNAME>]",            cli_cmd_volume_info_cbk, @@ -1571,6 +1612,10 @@ struct cli_cmd volume_cmds[] = {            cli_cmd_volume_status_cbk,           "display status of specified volume"}, +        { "volume heal <VOLNAME>", +          cli_cmd_volume_heal_cbk, +          "Start healing of volume specified by <VOLNAME>"}, +          { NULL, NULL, NULL }  }; diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index ccd76d570..78d27b624 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -3912,6 +3912,91 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)          return ret;  } +int +gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, +                             int count, void *myframe) +{ +        gf1_cli_heal_vol_rsp    rsp   = {0,}; +        int                     ret   = 0; +        cli_local_t             *local = NULL; +        char                    *volname = NULL; +        call_frame_t            *frame = NULL; + +        if (-1 == req->rpc_status) { +                goto out; +        } + +        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_heal_vol_rsp); +        if (ret < 0) { +                gf_log ("", GF_LOG_ERROR, "error"); +                goto out; +        } + +        frame = myframe; + +        if (frame) { +                local = frame->local; +                frame->local = NULL; +        } + +        if (local) +                volname = local->u.heal_vol.volname; + +        gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume"); + +        if (rsp.op_ret && strcmp (rsp.op_errstr, "")) +                cli_out ("%s", rsp.op_errstr); +        else +                cli_out ("Starting heal on volume %s has been %s", volname, +                        (rsp.op_ret) ? "unsuccessful": "successful"); + +        ret = rsp.op_ret; + +out: +        cli_cmd_broadcast_response (ret); +        if (local) +                cli_local_wipe (local); +        if (rsp.volname) +                free (rsp.volname); +        if (rsp.op_errstr) +                free (rsp.op_errstr); +        return ret; +} + +int32_t +gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this, +                         void *data) +{ +        gf1_cli_heal_vol_req   *req = NULL; +        int                     ret = 0; +        cli_local_t             *local = NULL; + +        if (!frame || !this ||  !data) { +                ret = -1; +                goto out; +        } + +        req = data; +        local = cli_local_get (); + +        if (local) { +                local->u.heal_vol.volname = req->volname; +                frame->local = local; +        } + +        ret = cli_cmd_submit (req, frame, cli_rpc_prog, +                              GLUSTER_CLI_HEAL_VOLUME, NULL, +                              this, gf_cli3_1_heal_volume_cbk, +                              (xdrproc_t) xdr_gf1_cli_heal_vol_req); + +out: +        gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + +        return ret; +} + + +  struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {          [GLUSTER_CLI_NULL]             = {"NULL", NULL },          [GLUSTER_CLI_PROBE]            = {"PROBE_QUERY", gf_cli3_1_probe}, @@ -3945,7 +4030,8 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {          [GLUSTER_CLI_GETWD]            = {"GETWD", gf_cli3_1_getwd},          [GLUSTER_CLI_STATUS_VOLUME]    = {"STATUS_VOLUME", gf_cli3_1_status_volume},          [GLUSTER_CLI_MOUNT]            = {"MOUNT", gf_cli3_1_mount}, -        [GLUSTER_CLI_UMOUNT]           = {"UMOUNT", gf_cli3_1_umount} +        [GLUSTER_CLI_UMOUNT]           = {"UMOUNT", gf_cli3_1_umount}, +        [GLUSTER_CLI_HEAL_VOLUME]      = {"HEAL_VOLUME", gf_cli3_1_heal_volume}  };  struct rpc_clnt_program cli_prog = { diff --git a/cli/src/cli.h b/cli/src/cli.h index bf3437827..4ef1dbe06 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -149,6 +149,10 @@ struct cli_local {                          char    *volname;                          int     flags;                  } get_vol; + +                struct { +                        char    *volname; +                }heal_vol;          } u;  }; diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 43d49b3f4..adce060a6 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -316,6 +316,41 @@ out:  }  int +glusterfs_translator_heal_response_send (rpcsvc_request_t *req, int op_ret, +                                         char *msg, dict_t *output) +{ +        gd1_mgmt_brick_op_rsp    rsp = {0,}; +        int                      ret = -1; +        GF_ASSERT (msg); +        GF_ASSERT (req); +        GF_ASSERT (output); + +        rsp.op_ret = op_ret; +        rsp.op_errno = 0; +        if (ret && msg[0]) +                rsp.op_errstr = msg; +        else +                rsp.op_errstr = ""; + +        ret = dict_allocate_and_serialize (output, &rsp.output.output_val, +                                        (size_t *)&rsp.output.output_len); +        if (ret) { +                gf_log (THIS->name, GF_LOG_ERROR, "Couldn't serialize " +                        "output dict."); +                goto out; +        } + +        ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL, +                                     (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + +out: +        if (rsp.output.output_val) +                GF_FREE (rsp.output.output_val); + +        return ret; +} + +int  glusterfs_handle_translator_info_get (rpcsvc_request_t *req)  {          int32_t                  ret     = -1; @@ -615,6 +650,92 @@ out:  }  int +glusterfs_handle_translator_heal (rpcsvc_request_t *req) +{ +        int32_t                  ret     = -1; +        gd1_mgmt_brick_op_req    xlator_req = {0,}; +        dict_t                   *dict    = NULL; +        xlator_t                 *xlator = NULL; +        xlator_t                 *any = NULL; +        dict_t                   *output = NULL; +        char                     msg[2048] = {0}; +        char                     key[2048] = {0}; +        char                    *xname = NULL; +        glusterfs_ctx_t          *ctx = NULL; +        glusterfs_graph_t        *active = NULL; +        xlator_t                 *this = NULL; +        int                      i = 0; +        int                      count = 0; + +        GF_ASSERT (req); +        this = THIS; +        GF_ASSERT (this); + +        ctx = glusterfs_ctx_get (); +        GF_ASSERT (ctx); + +        active = ctx->active; +        any = active->first; +        if (!xdr_to_generic (req->msg[0], &xlator_req, +                             (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) { +                //failed to decode msg; +                req->rpc_err = GARBAGE_ARGS; +                goto out; +        } +        dict = dict_new (); + +        ret = dict_unserialize (xlator_req.input.input_val, +                                xlator_req.input.input_len, +                                &dict); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to " +                        "unserialize req-buffer to dictionary"); +                goto out; +        } + +        ret = dict_get_int32 (dict, "count", &count); +        i = 0; +        while (i < count)  { +                snprintf (key, sizeof (key), "heal-%d", i); +                ret = dict_get_str (dict, key, &xname); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Couldn't get " +                                "replicate xlator %s to trigger " +                                "self-heal", xname); +                        goto out; +                } +                xlator = xlator_search_by_name (any, xname); +                if (!xlator) { +                        snprintf (msg, sizeof (msg), "xlator %s is not loaded", +                                  xlator_req.name); +                        ret = -1; +                        goto out; +                } + +                ret = xlator_notify (xlator, GF_EVENT_TRIGGER_HEAL, dict, NULL); +                i++; +        } +        output = dict_new (); +        if (!output) +                goto out; + +        /* output dict is not used currently, could be used later. */ +        ret = glusterfs_translator_heal_response_send (req, ret, msg, output); +out: +        if (dict) +                dict_unref (dict); +        if (xlator_req.input.input_val) +                free (xlator_req.input.input_val); // malloced by xdr +        if (output) +                dict_unref (output); +        if (xlator_req.name) +                free (xlator_req.name); //malloced by xdr + +        return ret; +} + +int  glusterfs_handle_rpc_msg (rpcsvc_request_t *req)  {          int     ret = -1; @@ -627,6 +748,9 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)          case GF_BRICK_XLATOR_INFO:                  ret = glusterfs_handle_translator_info_get (req);                  break; +        case GF_BRICK_XLATOR_HEAL: +                ret = glusterfs_handle_translator_heal (req); +                break;          default:                  break;          } @@ -681,7 +805,8 @@ rpc_clnt_prog_t clnt_handshake_prog = {  rpcsvc_actor_t glusterfs_actors[] = {          [GF_BRICK_NULL]        = { "NULL",    GF_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL},          [GF_BRICK_TERMINATE] = { "TERMINATE", GF_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL}, -        [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL} +        [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL}, +        [GF_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GF_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL}  };  struct rpcsvc_program glusterfs_mop_prog = { diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index fbae75dff..473a4604e 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -348,6 +348,7 @@ char eventstring[GF_EVENT_MAXVAL][64] = {          "Transport Cleanup",          "Transport Connected",          "Volfile Modified", +        "Volume Heal Triggered",  };  /* Copy the string ptr contents if needed for yourself */ diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 25f32bd5b..8247c60fb 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -373,6 +373,7 @@ typedef enum {          GF_EVENT_VOLFILE_MODIFIED,          GF_EVENT_GRAPH_NEW,          GF_EVENT_TRANSLATOR_INFO, +        GF_EVENT_TRIGGER_HEAL,          GF_EVENT_MAXVAL,  } glusterfs_event_t; diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 3c4c8fc44..41197044b 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -205,6 +205,7 @@ enum gluster_cli_procnum {          GLUSTER_CLI_STATUS_VOLUME,          GLUSTER_CLI_MOUNT,          GLUSTER_CLI_UMOUNT, +        GLUSTER_CLI_HEAL_VOLUME,          GLUSTER_CLI_MAXVALUE,  }; @@ -212,6 +213,7 @@ enum gf_brick_procnum {          GF_BRICK_NULL = 0,          GF_BRICK_TERMINATE = 1,          GF_BRICK_XLATOR_INFO = 2, +        GF_BRICK_XLATOR_HEAL = 3,          GF_BRICK_MAX_VALUE  }; diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c index 25ab32a8f..1240dda8b 100644 --- a/rpc/xdr/src/cli1-xdr.c +++ b/rpc/xdr/src/cli1-xdr.c @@ -1068,3 +1068,33 @@ xdr_gf1_cli_umount_rsp (XDR *xdrs, gf1_cli_umount_rsp *objp)  		 return FALSE;  	return TRUE;  } + +bool_t +xdr_gf1_cli_heal_vol_req (XDR *xdrs, gf1_cli_heal_vol_req *objp) +{ +	register int32_t *buf; +        buf = NULL; + +	 if (!xdr_string (xdrs, &objp->volname, ~0)) +		 return FALSE; +	return TRUE; +} + +bool_t +xdr_gf1_cli_heal_vol_rsp (XDR *xdrs, gf1_cli_heal_vol_rsp *objp) +{ +	register int32_t *buf; +        buf = NULL; + +	 if (!xdr_int (xdrs, &objp->op_ret)) +		 return FALSE; +	 if (!xdr_int (xdrs, &objp->op_errno)) +		 return FALSE; +	 if (!xdr_string (xdrs, &objp->volname, ~0)) +		 return FALSE; +	 if (!xdr_string (xdrs, &objp->op_errstr, ~0)) +		 return FALSE; +	 if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) +		 return FALSE; +	return TRUE; +} diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h index 4077ff3c1..f22c635f1 100644 --- a/rpc/xdr/src/cli1-xdr.h +++ b/rpc/xdr/src/cli1-xdr.h @@ -614,6 +614,23 @@ struct gf1_cli_umount_rsp {  };  typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp; +struct gf1_cli_heal_vol_req { +	char *volname; +}; +typedef struct gf1_cli_heal_vol_req gf1_cli_heal_vol_req; + +struct gf1_cli_heal_vol_rsp { +	int op_ret; +	int op_errno; +	char *volname; +	char *op_errstr; +	struct { +		u_int dict_len; +		char *dict_val; +	} dict; +}; +typedef struct gf1_cli_heal_vol_rsp gf1_cli_heal_vol_rsp; +  /* the xdr functions */  #if defined(__STDC__) || defined(__cplusplus) @@ -687,6 +704,8 @@ extern  bool_t xdr_gf1_cli_mount_req (XDR *, gf1_cli_mount_req*);  extern  bool_t xdr_gf1_cli_mount_rsp (XDR *, gf1_cli_mount_rsp*);  extern  bool_t xdr_gf1_cli_umount_req (XDR *, gf1_cli_umount_req*);  extern  bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*); +extern  bool_t xdr_gf1_cli_heal_vol_req (XDR *, gf1_cli_heal_vol_req*); +extern  bool_t xdr_gf1_cli_heal_vol_rsp (XDR *, gf1_cli_heal_vol_rsp*);  #else /* K&R C */  extern bool_t xdr_gf_cli_defrag_type (); @@ -759,6 +778,8 @@ extern bool_t xdr_gf1_cli_mount_req ();  extern bool_t xdr_gf1_cli_mount_rsp ();  extern bool_t xdr_gf1_cli_umount_req ();  extern bool_t xdr_gf1_cli_umount_rsp (); +extern bool_t xdr_gf1_cli_heal_vol_req (); +extern bool_t xdr_gf1_cli_heal_vol_rsp ();  #endif /* K&R C */ diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x index 2a6168c04..9a1f77c0e 100644 --- a/rpc/xdr/src/cli1-xdr.x +++ b/rpc/xdr/src/cli1-xdr.x @@ -455,3 +455,15 @@ struct gf1_cli_umount_rsp {         int op_ret;         int op_errno;  }; + +struct gf1_cli_heal_vol_req { +       string volname<>; +}  ; + +struct gf1_cli_heal_vol_rsp { +       int     op_ret; +       int     op_errno; +       string  volname<>; +       string op_errstr<>; +       opaque  dict<>; +}  ; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2e5ca71b2..c23e329df 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3498,7 +3498,15 @@ afr_notify (xlator_t *this, int32_t event,                          priv->last_event[idx] = event;                  }                  UNLOCK (&priv->lock); + +                break; + +        case GF_EVENT_TRIGGER_HEAL: +                gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered" +                        " manually. Start crawling"); +                call_psh = 1;                  break; +          default:                  propagate = 1;                  break; diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index df0aa202c..2ab80c2ff 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -2562,6 +2562,42 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,  }  int +glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata, +                         rpc_clnt_event_t event, +                         void *data) +{ +        xlator_t                *this = NULL; +        glusterd_conf_t         *conf = NULL; +        int                     ret = 0; + +        this = THIS; +        GF_ASSERT (this); +        conf = this->private; +        GF_ASSERT (conf); + +        switch (event) { +        case RPC_CLNT_CONNECT: +                gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); +                (void) glusterd_shd_set_running (_gf_true); +                ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + +                break; + +        case RPC_CLNT_DISCONNECT: +                gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); +                (void) glusterd_shd_set_running (_gf_false); +                break; + +        default: +                gf_log (this->name, GF_LOG_TRACE, +                        "got some other RPC event %d", event); +                break; +        } + +        return ret; +} + +int  glusterd_friend_remove_notify (glusterd_peerinfo_t *peerinfo, rpcsvc_request_t *req)  {          int ret = -1; @@ -2742,6 +2778,7 @@ rpcsvc_actor_t gd_svc_cli_actors[] = {          [GLUSTER_CLI_STATUS_VOLUME]  = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, NULL},          [GLUSTER_CLI_MOUNT]         = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, NULL},          [GLUSTER_CLI_UMOUNT]        = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, NULL}, +        [GLUSTER_CLI_HEAL_VOLUME]  = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, NULL}  }; diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index e10cf1aca..196f5f50b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -70,7 +70,8 @@ typedef enum gf_gld_mem_types_ {          gf_gld_mt_mount_comp_container          = gf_common_mt_end + 44,          gf_gld_mt_mount_component               = gf_common_mt_end + 45,          gf_gld_mt_mount_spec                    = gf_common_mt_end + 46, -        gf_gld_mt_end                           = gf_common_mt_end + 47, +        gf_gld_mt_nodesrv_t                     = gf_common_mt_end + 47, +        gf_gld_mt_end                           = gf_common_mt_end + 48,  } gf_gld_mem_types_t;  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index c9d1c99a1..84280498e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -202,6 +202,17 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin                  brick_req->name = brickinfo->path;                  break; +        case GD_OP_HEAL_VOLUME: +        { +                brick_req = GF_CALLOC (1, sizeof (*brick_req), +                                       gf_gld_mt_mop_brick_req_t); +                if (!brick_req) +                        goto out; + +                brick_req->op = GF_BRICK_XLATOR_HEAL; +                brick_req->name = ""; +        } +                break;          default:                  goto out;          break; @@ -1483,6 +1494,7 @@ glusterd_op_build_payload (dict_t **req)                  case GD_OP_LOG_LEVEL:                  case GD_OP_STATUS_VOLUME:                  case GD_OP_REBALANCE: +                case GD_OP_HEAL_VOLUME:                          {                                  dict_t  *dict = ctx;                                  dict_copy (dict, req_dict); @@ -1777,19 +1789,15 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx)  {          int                     ret = 0;          glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; -        glusterd_brickinfo_t        *brickinfo = NULL;          gf_boolean_t                free_errstr = _gf_false;          GF_ASSERT (event);          GF_ASSERT (ctx);          ev_ctx = ctx; -        brickinfo = ev_ctx->brickinfo; -        GF_ASSERT (brickinfo); -        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo); +        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node);          if (ret) { -                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received " -                        "from %s:%s", brickinfo->hostname, brickinfo->path); +                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");                  ret = -1;                  free_errstr = _gf_true;                  goto out; @@ -1828,7 +1836,7 @@ glusterd_op_brick_disconnect (void *data)          ev_ctx = data;          GF_ASSERT (ev_ctx); -        brickinfo = ev_ctx->brickinfo; +        brickinfo = ev_ctx->pending_node->node;          GF_ASSERT (brickinfo);          if (brickinfo->timer) { @@ -2260,6 +2268,10 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,                          ret = glusterd_op_stage_rebalance (dict, op_errstr);                          break; +                case GD_OP_HEAL_VOLUME: +                        ret = glusterd_op_stage_heal_volume (dict, op_errstr); +                        break; +                  default:                          gf_log ("", GF_LOG_ERROR, "Unknown op %d",                                  op); @@ -2351,6 +2363,10 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,                         ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);                         break; +               case GD_OP_HEAL_VOLUME: +                       ret = glusterd_op_heal_volume (dict, op_errstr); +                       break; +                  default:                          gf_log ("", GF_LOG_ERROR, "Unknown op %d",                                  op); @@ -2467,6 +2483,7 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr)                                  goto out;                          } else {                                  pending_node->node = brickinfo; +                                pending_node->type = GD_NODE_BRICK;                                  list_add_tail (&pending_node->list, &opinfo.pending_bricks);                                  pending_node = NULL;                          } @@ -2539,6 +2556,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)                                  goto out;                          } else {                                  pending_node->node = brickinfo; +                                pending_node->type = GD_NODE_BRICK;                                  list_add_tail (&pending_node->list, &opinfo.pending_bricks);                                  pending_node = NULL;                          } @@ -2606,6 +2624,7 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr)                                          goto out;                                  } else {                                          pending_node->node = brickinfo; +                                        pending_node->type = GD_NODE_BRICK;                                          list_add_tail (&pending_node->list,                                                         &opinfo.pending_bricks);                                          pending_node = NULL; @@ -2674,6 +2693,117 @@ out:  }  static int +_add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count) +{ +        int     ret             = -1; +        char    key[128]        = {0,}; +        char    *xname          = NULL; + +        snprintf (key, sizeof (key), "heal-%d", count); +        ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index); +        if (ret == -1) +                goto out; + +        ret = dict_set_dynstr (dict, key, xname); +out: +        return ret; +} + +static int +glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr) +{ +        int                                     ret = -1; +        char                                    *volname = NULL; +        glusterd_conf_t                         *priv = NULL; +        glusterd_volinfo_t                      *volinfo = NULL; +        glusterd_brickinfo_t                    *brickinfo = NULL; +        xlator_t                                *this = NULL; +        char                                    msg[2048] = {0,}; +        int                                     replica_count = 0; +        int                                     index = 1; +        int                                     rxlator_count = 0; +        uuid_t                                  candidate = {0}; +        glusterd_pending_node_t                 *pending_node = NULL; + +        this = THIS; +        GF_ASSERT (this); +        priv = this->private; +        GF_ASSERT (priv); + +        ret = dict_get_str (dict, "volname", &volname); +        if (ret) { +                gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); +                goto out; +        } + +        ret = glusterd_volinfo_find (volname, &volinfo); +        if (ret) { +                snprintf (msg, sizeof (msg), "Volume %s does not exist", +                          volname); + +                *op_errstr = gf_strdup (msg); +                gf_log ("", GF_LOG_ERROR, "%s", msg); +                goto out; +        } + +        if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) { +                replica_count = volinfo->replica_count; + +        } else if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { +                replica_count = volinfo->sub_count; + +        } else { +                GF_ASSERT (0); +                goto out; +        } + +        index = 1; +        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { +                if (uuid_is_null (brickinfo->uuid)) +                        (void)glusterd_resolve_brick (brickinfo); + +                if (uuid_compare (brickinfo->uuid, candidate) > 0) +                        uuid_copy (candidate, brickinfo->uuid); + +                if (index % replica_count == 0) { +                        if (!uuid_compare (priv->uuid, candidate)) { +                                _add_rxlator_to_dict (dict, volname, +                                                      (index-1)/replica_count, +                                                      rxlator_count); +                                rxlator_count++; +                        } +                        uuid_clear (candidate); +                } + +                index++; +        } +        ret = dict_set_int32 (dict, "count", rxlator_count); +        if (ret) +                goto out; + +        if (rxlator_count) { +                pending_node = GF_CALLOC (1, sizeof (*pending_node), +                                          gf_gld_mt_pending_node_t); +                if (!pending_node) { +                        ret = -1; +                        goto out; +                } else { +                        pending_node->node = priv->shd; +                        pending_node->type = GD_NODE_SHD; +                        list_add_tail (&pending_node->list, +                                       &opinfo.pending_bricks); +                        pending_node = NULL; +                } +        } + + +out: +        gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); +        return ret; + +} + +static int  glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx)  {          int                             ret = 0; @@ -2723,7 +2853,6 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)  {          int                     ret = 0;          glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; -        glusterd_brickinfo_t        *brickinfo = NULL;          char                        *op_errstr = NULL;          glusterd_op_t               op = GD_OP_NONE;          dict_t                      *op_ctx = NULL; @@ -2736,24 +2865,22 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)          req_ctx = ev_ctx->commit_ctx;          GF_ASSERT (req_ctx); -        brickinfo = ev_ctx->brickinfo; -        GF_ASSERT (brickinfo); +        op = req_ctx->op; +        op_ctx = glusterd_op_get_ctx (); -        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo); +        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, +                                             ev_ctx->pending_node->node);          if (ret) { -                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received " -                        "from %s:%s", brickinfo->hostname, brickinfo->path); +                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");                  ret = -1;                  goto out;          }          if (opinfo.brick_pending_count > 0)                  opinfo.brick_pending_count--; -        op = req_ctx->op; -        op_ctx = glusterd_op_get_ctx (); -        glusterd_handle_brick_rsp (brickinfo, op, ev_ctx->rsp_dict, -                                   op_ctx, &op_errstr); +        glusterd_handle_brick_rsp (ev_ctx->pending_node->node, op, ev_ctx->rsp_dict, +                                           op_ctx, &op_errstr);          if (opinfo.brick_pending_count > 0)                  goto out; @@ -2791,6 +2918,10 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr)                  ret = glusterd_bricks_select_profile_volume (dict, op_errstr);                  break; +        case GD_OP_HEAL_VOLUME: +                ret = glusterd_bricks_select_heal_volume (dict, op_errstr); +                break; +          default:                  break;           } @@ -3344,6 +3475,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx)                  case GD_OP_LOG_LEVEL:                  case GD_OP_STATUS_VOLUME:                  case GD_OP_REBALANCE: +                case GD_OP_HEAL_VOLUME:                          dict_unref (ctx);                          break;                  case GD_OP_DELETE_VOLUME: diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 0a75d4c84..97385e6a4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -152,7 +152,7 @@ typedef struct glusterd_op_brick_rsp_ctx_ {          char *op_errstr;          dict_t *rsp_dict;          glusterd_req_ctx_t *commit_ctx; -        glusterd_brickinfo_t *brickinfo; +        glusterd_pending_node_t *pending_node;  } glusterd_op_brick_rsp_ctx_t;  typedef struct glusterd_pr_brick_rsp_conv_t { diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index c9f414052..9cdab97df 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -455,6 +455,21 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,                  xdrproc = (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp;                  break;          } +        case GD_OP_HEAL_VOLUME: +        { +                gf1_cli_heal_vol_rsp rsp = {0,}; +                rsp.op_ret = op_ret; +                rsp.op_errno = op_errno; +                rsp.volname = ""; +                if (op_errstr) +                        rsp.op_errstr = op_errstr; +                else +                        rsp.op_errstr = ""; +                cli_rsp = &rsp; +                xdrproc = (xdrproc_t) xdr_gf1_cli_heal_vol_rsp; +                break; + +        }          case GD_OP_NONE:          case GD_OP_MAX:          { @@ -1922,7 +1937,7 @@ glusterd_start_brick_disconnect_timer (glusterd_op_brick_rsp_ctx_t *ev_ctx)          timeout.tv_sec  = 5;          timeout.tv_usec = 0; -        brickinfo = ev_ctx->brickinfo; +        brickinfo = ev_ctx->pending_node->node;          GF_ASSERT (brickinfo);          this = THIS;          GF_ASSERT (this); @@ -2000,7 +2015,7 @@ out:          } else {                  event_type = GD_OP_EVENT_RCVD_ACC;          } -        ev_ctx->brickinfo = frame->cookie; +        ev_ctx->pending_node = frame->cookie;          ev_ctx->rsp_dict  = dict;          ev_ctx->commit_ctx = frame->local;          op = glusterd_op_get_op (); @@ -2087,9 +2102,9 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,          call_frame_t                    *dummy_frame = NULL;          char                            *op_errstr = NULL;          int                             pending_bricks = 0; -        glusterd_pending_node_t         *pending_brick; -        glusterd_brickinfo_t            *brickinfo = NULL; +        glusterd_pending_node_t         *pending_node;          glusterd_req_ctx_t               *req_ctx = NULL; +        struct rpc_clnt                 *rpc = NULL;          if (!this) {                  ret = -1; @@ -2109,25 +2124,30 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,                  goto out;          } -        list_for_each_entry (pending_brick, &opinfo.pending_bricks, list) { +        list_for_each_entry (pending_node, &opinfo.pending_bricks, list) {                  dummy_frame = create_frame (this, this->ctx->pool); -                brickinfo = pending_brick->node; -                  if (!dummy_frame)                          continue; -                if (_gf_false == glusterd_is_brick_started (brickinfo)) -                        continue; - -                ret = glusterd_brick_op_build_payload (req_ctx->op, brickinfo, -                                                (gd1_mgmt_brick_op_req **)&req, -                                                 req_ctx->dict); +                ret = glusterd_brick_op_build_payload (req_ctx->op, +                                                       pending_node->node, +                                                       (gd1_mgmt_brick_op_req **)&req, +                                                       req_ctx->dict);                  if (ret)                          goto out;                  dummy_frame->local = data; -                dummy_frame->cookie = brickinfo; -                ret = glusterd_submit_request (brickinfo->rpc, req, dummy_frame, +                dummy_frame->cookie = pending_node; + +                rpc = glusterd_pending_node_get_rpc (pending_node); +                if (!rpc) { +                        ret = -1; +                        gf_log (this->name, GF_LOG_ERROR, "Brick Op failed " +                                "due to rpc failure."); +                        goto out; +                } + +                ret = glusterd_submit_request (rpc, req, dummy_frame,                                                 &glusterd_glusterfs_3_1_mgmt_prog,                                                 req->op, NULL,                                                 this, glusterd3_1_brick_op_cbk, @@ -2143,7 +2163,7 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,          }          gf_log ("glusterd", GF_LOG_DEBUG, "Sent op req to %d bricks", -                                            pending_bricks); +                pending_bricks);          opinfo.brick_pending_count = pending_bricks;  out: diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e6c23e833..59609971b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2366,6 +2366,120 @@ glusterd_get_nodesvc_volfile (char *server, char *workdir,          snprintf (volfile, len, "%s/%s-server.vol", dir, server);  } +void +glusterd_shd_set_running (gf_boolean_t status) +{ +        glusterd_conf_t *priv = NULL; + +        priv = THIS->private; +        GF_ASSERT (priv); +        GF_ASSERT (priv->shd); + +        priv->shd->running = status; +} + +gf_boolean_t +glusterd_shd_is_running () +{ +        glusterd_conf_t *conf = NULL; + +        conf = THIS->private; +        GF_ASSERT (conf); +        GF_ASSERT (conf->shd); + +        return conf->shd->running; +} + +int32_t +glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid, +                                  char *socketpath, int len) +{ +        char                    sockfilepath[PATH_MAX] = {0,}; +        char                    md5_str[PATH_MAX] = {0,}; + +        snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s", +                  rundir, uuid_utoa (uuid)); +        _get_md5_str (md5_str, sizeof (md5_str), +                      (uint8_t *)sockfilepath, sizeof (sockfilepath)); +        snprintf (socketpath, len, "%s/%s.socket", glusterd_sock_dir, +                  md5_str); +        return 0; +} + +struct rpc_clnt* +glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) +{ +        struct rpc_clnt *rpc = NULL; +        glusterd_brickinfo_t    *brickinfo = NULL; +        nodesrv_t               *shd       = NULL; +        GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out); +        GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out); + +        if (pending_node->type == GD_NODE_BRICK) { +                brickinfo = pending_node->node; +                rpc       = brickinfo->rpc; + +        } else if (pending_node->type == GD_NODE_SHD) { +                shd       = pending_node->node; +                rpc       = shd->rpc; + +        } else { +                GF_ASSERT (0); +        } + +out: +        return rpc; +} + +struct rpc_clnt* +glusterd_shd_get_rpc (void) +{ +        glusterd_conf_t *priv   = NULL; + +        priv = THIS->private; +        GF_ASSERT (priv); +        GF_ASSERT (priv->shd); + +        return priv->shd->rpc; +} + +int32_t +glusterd_shd_set_rpc (struct rpc_clnt *rpc) +{ +        int             ret   = 0; +        xlator_t        *this = NULL; +        glusterd_conf_t *priv = NULL; + +        this = THIS; +        GF_ASSERT (this); +        priv = this->private; +        GF_ASSERT (priv); +        GF_ASSERT (priv->shd); + +        priv->shd->rpc = rpc; + +        return ret; +} + +int32_t +glusterd_shd_connect (char *socketpath) { +        int                     ret = 0; +        dict_t                  *options = NULL; +        struct rpc_clnt         *rpc = NULL; + +        ret = rpc_clnt_transport_unix_options_build (&options, socketpath); +        if (ret) +                goto out; +        ret = glusterd_rpc_create (&rpc, options, +                                   glusterd_shd_rpc_notify, +                                   NULL); +        if (ret) +                goto out; +        (void) glusterd_shd_set_rpc (rpc); +out: +        return ret; +} +  int32_t  glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)  { @@ -2376,6 +2490,7 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)          char                    logfile[PATH_MAX] = {0,};          char                    volfile[PATH_MAX] = {0,};          char                    rundir[PATH_MAX] = {0,}; +        char                    shd_sockfpath[PATH_MAX] = {0,};          char                    volfileid[256]   = {0};          this = THIS; @@ -2408,13 +2523,28 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)                    server);          snprintf (volfileid, sizeof (volfileid), "gluster/%s", server); -        if (pmap_signin) +        if (!strcmp (server, "glustershd")) { +                glusterd_shd_set_socket_filepath (rundir, +                                                  priv->uuid, +                                                  shd_sockfpath, +                                                  sizeof (shd_sockfpath)); +        } + +        //TODO: kp:change the assumption that shd is the one which signs in +        // use runner_add_args? +        if (pmap_signin) {                  ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost",                                "--volfile-id", volfileid, -                              "-p", pidfile, "-l", logfile, NULL); -        else +                              "-p", pidfile, "-l", logfile, +                              "-S", shd_sockfpath, NULL); +                if (!ret) +                        glusterd_shd_connect (shd_sockfpath); + +        } +        else {                  ret = runcmd (SBIN_DIR"/glusterfs", "-f", volfile,                                "-p", pidfile, "-l", logfile, NULL); +        }  out:          return ret; @@ -3742,7 +3872,7 @@ glusterd_remove_pending_entry (struct list_head *list, void *elem)  {          glusterd_pending_node_t *pending_node = NULL;          glusterd_pending_node_t *tmp = NULL; -        int                     ret = -1; +        int                     ret = 0;          list_for_each_entry_safe (pending_node, tmp, list, list) {                  if (elem == pending_node->node) { diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 93fa763bd..aca46eae1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -174,6 +174,28 @@ glusterd_shd_start ();  int32_t  glusterd_shd_stop (); +int32_t +glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid, +                                  char *socketpath, int len); + +struct rpc_clnt* +glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node); + +struct rpc_clnt* +glusterd_shd_get_rpc (void); + +int32_t +glusterd_shd_set_rpc (struct rpc_clnt *rpc); + +int32_t +glusterd_shd_connect (char *socketpath); + +void +glusterd_shd_set_running (gf_boolean_t status); + +gf_boolean_t +glusterd_shd_is_running (); +  int  glusterd_remote_hostname_get (rpcsvc_request_t *req,                                char *remote_host, int len); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 560968def..39cc02c8e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -392,6 +392,62 @@ out:          return ret;  } +int +glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) +{ +        int32_t                         ret = -1; +        gf1_cli_heal_vol_req           cli_req = {0,}; +        char                            *dup_volname = NULL; +        dict_t                          *dict = NULL; +        glusterd_op_t                   cli_op = GD_OP_HEAL_VOLUME; + +        GF_ASSERT (req); + +        if (!xdr_to_generic (req->msg[0], &cli_req, +                             (xdrproc_t)xdr_gf1_cli_heal_vol_req)) { +                //failed to decode msg; +                req->rpc_err = GARBAGE_ARGS; +                goto out; +        } + +        gf_log ("glusterd", GF_LOG_INFO, "Received heal vol req" +                "for volume %s", cli_req.volname); + +        dict = dict_new (); + +        if (!dict) +                goto out; + +        dup_volname = gf_strdup (cli_req.volname); +        if (!dup_volname) +                goto out; + +        ret = dict_set_dynstr (dict, "volname", dup_volname); +        if (ret) +                goto out; + +        ret = glusterd_op_begin (req, GD_OP_HEAL_VOLUME, dict); + +        gf_cmd_log ("volume heal","on volname: %s %s", cli_req.volname, +                    ((ret == 0) ? "SUCCESS": "FAILED")); + +out: +        if (ret && dict) +                dict_unref (dict); +        if (cli_req.volname) +                free (cli_req.volname); //its malloced by xdr + +        glusterd_friend_sm (); +        glusterd_op_sm (); + +        if (ret) +                ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, +                                                     NULL, "operation failed"); + +        return ret; +} + +  /* op-sm */  int  glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) @@ -754,6 +810,101 @@ out:  }  int +glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) +{ +        int                                     ret = 0; +        char                                    *volname = NULL; +        gf_boolean_t                            exists  = _gf_false; +        gf_boolean_t                            enabled = _gf_false; +        glusterd_volinfo_t                      *volinfo = NULL; +        char                                    msg[2048]; +        glusterd_conf_t                         *priv = NULL; +        dict_t                                  *opt_dict = NULL; + +        priv = THIS->private; +        if (!priv) { +                gf_log (THIS->name, GF_LOG_ERROR, +                        "priv is NULL"); +                ret = -1; +                goto out; +        } + +        if (!glusterd_shd_is_running ()) { +                ret = -1; +                snprintf (msg, sizeof (msg), "Self-heal daemon is not " +                          "running."); +                *op_errstr = gf_strdup (msg); +                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); +                goto out; +        } + +        ret = dict_get_str (dict, "volname", &volname); +        if (ret) { +                gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); +                goto out; +        } + +        exists = glusterd_check_volume_exists (volname); + +        if (!exists) { +                snprintf (msg, sizeof (msg), "Volume %s does not exist", volname); +                gf_log ("", GF_LOG_ERROR, "%s", +                        msg); +                *op_errstr = gf_strdup (msg); +                ret = -1; +        } else { +                ret = 0; +        } + +        ret  = glusterd_volinfo_find (volname, &volinfo); + +        if (ret) +                goto out; + +        if (!glusterd_is_volume_started (volinfo)) { +                snprintf (msg, sizeof (msg), "Volume %s is not started.", +                          volname); +                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); +                *op_errstr = gf_strdup (msg); +                ret = -1; +                goto out; +        } + +        if (!glusterd_is_volume_replicate (volinfo)) { +                snprintf (msg, sizeof (msg), "Volume %s is not of type." +                          "replicate", volname); +                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); +                *op_errstr = gf_strdup (msg); +                ret = -1; +                goto out; +        } + +        opt_dict = volinfo->dict; +        if (!opt_dict) { +                ret = 0; +                goto out; +        } + +        enabled = dict_get_str_boolean (opt_dict, "cluster.self-heal-daemon", +                                        1); +        if (!enabled) { +                snprintf (msg, sizeof (msg), "Self-heal-daemon is " +                          "disabled. Heal will not be triggered on volume %s", +                          volname); +                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); +                *op_errstr = gf_strdup (msg); +                ret = -1; +                goto out; +        } + +        ret = 0; +out: +        gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + +        return ret; +} + +int  glusterd_op_create_volume (dict_t *dict, char **op_errstr)  {          int                   ret        = 0; @@ -1034,3 +1185,12 @@ out:          gf_log ("", GF_LOG_DEBUG, "returning %d", ret);          return ret;  } + +int +glusterd_op_heal_volume (dict_t *dict, char **op_errstr) +{ +        int                                     ret = 0; +        /* Necessary subtasks of heal are completed in brick op */ + +        return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index d1582eae3..83bbd1b22 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -930,6 +930,10 @@ init (xlator_t *this)          conf = GF_CALLOC (1, sizeof (glusterd_conf_t),                            gf_gld_mt_glusterd_conf_t);          GF_VALIDATE_OR_GOTO(this->name, conf, out); +        conf->shd = GF_CALLOC (1, sizeof (nodesrv_t), +                               gf_gld_mt_nodesrv_t); +        GF_VALIDATE_OR_GOTO(this->name, conf->shd, out); +          INIT_LIST_HEAD (&conf->peers);          INIT_LIST_HEAD (&conf->volumes);          pthread_mutex_init (&conf->mutex, NULL); @@ -961,6 +965,7 @@ init (xlator_t *this)          }  #endif          this->private = conf; +        (void) glusterd_shd_set_running (_gf_false);          /* this->ctx->top = this;*/          ret = glusterd_uuid_init (first_time); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ab521af0f..b49e7d675 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -79,6 +79,7 @@ typedef enum glusterd_op_ {          GD_OP_LOG_LEVEL,          GD_OP_STATUS_VOLUME,          GD_OP_REBALANCE, +        GD_OP_HEAL_VOLUME,          GD_OP_MAX,  } glusterd_op_t; @@ -95,6 +96,11 @@ struct glusterd_volgen {          dict_t *dict;  };  typedef struct { +        struct rpc_clnt         *rpc; +        gf_boolean_t            running; +} nodesrv_t; + +typedef struct {          struct _volfile_ctx *volfile;  	pthread_mutex_t   mutex;  	struct list_head  peers; @@ -104,6 +110,7 @@ typedef struct {          uuid_t            uuid;          char              workdir[PATH_MAX];          rpcsvc_t          *rpc; +        nodesrv_t         *shd;          struct pmap_registry *pmap;          struct list_head  volumes;          struct list_head  xprt_list; @@ -225,9 +232,16 @@ struct glusterd_volinfo_ {          xlator_t                *xl;  }; +typedef enum gd_node_type_ { +        GD_NODE_NONE, +        GD_NODE_BRICK, +        GD_NODE_SHD +} gd_node_type; +  typedef struct glusterd_pending_node_ { -        void   *node;          struct list_head list; +        void   *node; +        gd_node_type type;  } glusterd_pending_node_t;  enum glusterd_op_ret { @@ -511,6 +525,10 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,                            rpc_clnt_event_t event, void *data);  int +glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata, +                          rpc_clnt_event_t event, void *data); + +int  glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options,                       rpc_clnt_notify_t notify_fn, void *notify_data); @@ -535,8 +553,11 @@ int glusterd_handle_cli_delete_volume (rpcsvc_request_t *req);  int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,                                    size_t len, int cmd, defrag_cbk_fn_t cbk); +int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req);  /* op-sm functions */ +int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr); +int glusterd_op_heal_volume (dict_t *dict, char **op_errstr);  int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr);  int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict);  int glusterd_op_quota (dict_t *dict, char **op_errstr);  | 
