diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2012-02-16 21:30:47 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vijay@gluster.com> | 2012-02-20 21:23:37 -0800 | 
| commit | 81ab6622d403558cd6f31efeb535fe886d3beeaa (patch) | |
| tree | 7e30ec6d7ee51e957a50c98741f0a3a7118b5dfa | |
| parent | 5f117a4a1fca3ec2d163fe77615cf6859c0450e4 (diff) | |
cluster/afr: Add commands to see self-heald ops
Change-Id: Id92d3276e65a6c0fe61ab328b58b3954ae116c74
BUG: 763820
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.com/2775
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
24 files changed, 1274 insertions, 387 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 703a06e84..00c2f3618 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -2108,3 +2108,75 @@ out:         return ret;  } + +int +cli_cmd_volume_heal_options_parse (const char **words, int wordcount, +                                   dict_t **options) +{ +        int     ret = 0; +        dict_t  *dict = NULL; + +        dict = dict_new (); +        if (!dict) +                goto out; + +        ret = dict_set_str (dict, "volname", (char *) words[2]); +        if (ret) { +                gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname"); +                goto out; +        } + +        if (wordcount == 3) { +                ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_INDEX); +                goto done; +        } + +        if (wordcount == 4) { +                if (!strcmp (words[3], "full")) { +                        ret = dict_set_int32 (dict, "heal-op", +                                              GF_AFR_OP_HEAL_FULL); +                        goto done; +                } else if (!strcmp (words[3], "info")) { +                        ret = dict_set_int32 (dict, "heal-op", +                                              GF_AFR_OP_INDEX_SUMMARY); +                        goto done; +                } else { +                        ret = -1; +                        goto out; +                } +        } +        if (wordcount == 5) { +                if (strcmp (words[3], "info")) { +                        ret = -1; +                        goto out; +                } +                if (!strcmp (words[4], "healed")) { +                        ret = dict_set_int32 (dict, "heal-op", +                                              GF_AFR_OP_HEALED_FILES); +                        goto done; +                } +                if (!strcmp (words[4], "heal-failed")) { +                        ret = dict_set_int32 (dict, "heal-op", +                                              GF_AFR_OP_HEAL_FAILED_FILES); +                        goto done; +                } +                if (!strcmp (words[4], "split-brain")) { +                        ret = dict_set_int32 (dict, "heal-op", +                                              GF_AFR_OP_SPLIT_BRAIN_FILES); +                        goto done; +                } +                ret = -1; +                goto out; +        } +        ret = -1; +        goto out; +done: +        *options = dict; +out: +        if (ret && dict) { +                dict_unref (dict); +                *options = NULL; +        } + +        return ret; +} diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 37b41c81e..0906f3387 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1560,32 +1560,29 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,          call_frame_t            *frame = NULL;          int                     sent = 0;          int                     parse_error = 0; -        dict_t                  *dict = NULL; +        dict_t                  *options = NULL;          frame = create_frame (THIS, THIS->ctx->pool);          if (!frame)                  goto out; -        if (wordcount != 3) { +        if (wordcount < 3) {                 cli_usage_out (word->pattern); -                parse_error = 1; +               parse_error = 1;                 goto out;          } -        dict = dict_new (); -        if (!dict) -                goto out; - -        ret = dict_set_str (dict, "volname", (char *) words[2]); +        ret = cli_cmd_volume_heal_options_parse (words, wordcount, &options);          if (ret) { -                gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname"); +                cli_usage_out (word->pattern); +                parse_error = 1;                  goto out;          }          proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];          if (proc->fn) { -                ret = proc->fn (frame, THIS, dict); +                ret = proc->fn (frame, THIS, options);          }  out: @@ -1595,8 +1592,8 @@ out:                          cli_out ("Volume heal failed");          } -        if (dict) -                dict_unref (dict); +        if (options) +                dict_unref (options);          return ret;  } @@ -1826,9 +1823,9 @@ struct cli_cmd volume_cmds[] = {            cli_cmd_volume_status_cbk,            "display status of all or specified volume(s)/brick"}, -        { "volume heal <VOLNAME>", +        { "volume heal <VOLNAME> [{full | info {healed | heal-failed | split-brain}}]",            cli_cmd_volume_heal_cbk, -          "Start healing of volume specified by <VOLNAME>"}, +          "self-heal commands on volume specified by <VOLNAME>"},          {"volume statedump <VOLNAME> [nfs] [all|mem|iobuf|callpool|priv|fd|"           "inode|history]...", diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index d1888415c..9537c977d 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -5320,6 +5320,38 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)          return ret;  } +void +cmd_heal_volume_brick_out (dict_t *dict, int brick) +{ +        uint64_t        num_entries = 0; +        int             ret = 0; +        char            key[256] = {0}; +        char            *hostname = NULL; +        char            *path = NULL; +        uint64_t        i = 0; + +        snprintf (key, sizeof (key), "%d-hostname", brick); +        ret = dict_get_str (dict, key, &hostname); +        if (ret) +                goto out; +        snprintf (key, sizeof (key), "%d-path", brick); +        ret = dict_get_str (dict, key, &path); +        if (ret) +                goto out; +        snprintf (key, sizeof (key), "%d-count", brick); +        ret = dict_get_uint64 (dict, key, &num_entries); +        cli_out ("\nEntries on %s:%s %"PRIu64, hostname, path, num_entries); +        for (i = 0; i < num_entries; i++) { +                snprintf (key, sizeof (key), "%d-%"PRIu64, brick, i); +                ret = dict_get_str (dict, key, &path); +                if (ret) +                        continue; +                cli_out (path); +        } +out: +        return; +} +  int  gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,                               int count, void *myframe) @@ -5329,7 +5361,11 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,          cli_local_t             *local = NULL;          char                    *volname = NULL;          call_frame_t            *frame = NULL; +        dict_t                  *input_dict = NULL;          dict_t                  *dict = NULL; +        int                     brick_count = 0; +        int                     i = 0; +        gf_xl_afr_op_t          heal_op = GF_AFR_OP_INVALID;          if (-1 == req->rpc_status) {                  goto out; @@ -5348,21 +5384,24 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,                  frame->local = NULL;          } -        if (local) -                dict = local->dict; - -#if (HAVE_LIB_XML) -        if (global_state->mode & GLUSTER_MODE_XML) { -                ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret, -                                           rsp.op_errno, rsp.op_errstr); -                if (ret) -                        gf_log ("cli", GF_LOG_ERROR, -                                "Error outputting to xml"); -                goto out; -        } -#endif - -        ret = dict_get_str (dict, "volname", &volname); +        if (local) { +                input_dict = local->dict; +                ret = dict_get_int32 (input_dict, "heal-op", +                                      (int32_t*)&heal_op); +        } + +//#if (HAVE_LIB_XML) +//        if (global_state->mode & GLUSTER_MODE_XML) { +//                ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret, +//                                           rsp.op_errno, rsp.op_errstr); +//                if (ret) +//                        gf_log ("cli", GF_LOG_ERROR, +//                                "Error outputting to xml"); +//                goto out; +//        } +//#endif + +        ret = dict_get_str (input_dict, "volname", &volname);          if (ret) {                  gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");                  goto out; @@ -5376,14 +5415,51 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,                  cli_out ("Starting heal on volume %s has been %s", volname,                          (rsp.op_ret) ? "unsuccessful": "successful"); +        if (rsp.op_ret) { +                ret = rsp.op_ret; +                goto out; +        } + +        if ((heal_op == GF_AFR_OP_HEAL_FULL) || +            (heal_op == GF_AFR_OP_HEAL_INDEX)) { +                ret = 0; +                goto out; +        } +        dict = dict_new (); + +        if (!dict) { +                ret = -1; +                goto out; +        } + +        ret = dict_unserialize (rsp.dict.dict_val, +                                rsp.dict.dict_len, +                                &dict); + +        if (ret) { +                gf_log ("", GF_LOG_ERROR, +                                "Unable to allocate memory"); +                goto out; +        } else { +                dict->extra_stdfree = rsp.dict.dict_val; +        } +        ret = dict_get_int32 (dict, "count", &brick_count); +        if (ret) +                goto out; + +        if (!brick_count) { +                cli_out ("All bricks of volume %s are down.", volname); +                goto out; +        } + +        for (i = 0; i < brick_count; i++) +                cmd_heal_volume_brick_out (dict, i);          ret = rsp.op_ret;  out:          cli_cmd_broadcast_response (ret);          if (local)                  cli_local_wipe (local); -        if (rsp.dict.dict_val) -                free (rsp.dict.dict_val);          if (rsp.op_errstr)                  free (rsp.op_errstr);          if (dict) diff --git a/cli/src/cli.h b/cli/src/cli.h index 74e1423f5..1f78da1fb 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -259,6 +259,10 @@ cli_cmd_volume_status_parse (const char **words, int wordcount,                               dict_t **options);  int +cli_cmd_volume_heal_options_parse (const char **words, int wordcount, +                                   dict_t **options); + +int  cli_print_brick_status (cli_volume_status_t *status);  void diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 08f8a05f9..de6e2d60c 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -46,6 +46,7 @@  #include "rpcsvc.h"  #include "cli1-xdr.h"  #include "statedump.h" +#include "syncop.h"  static char is_mgmt_rpc_reconnect; @@ -317,8 +318,8 @@ out:  }  int -glusterfs_translator_heal_response_send (rpcsvc_request_t *req, int op_ret, -                                         char *msg, dict_t *output) +glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret, +                                   char *msg, dict_t *output)  {          gd1_mgmt_brick_op_rsp    rsp = {0,};          int                      ret = -1; @@ -651,15 +652,14 @@ out:  }  int -glusterfs_handle_translator_heal (rpcsvc_request_t *req) +glusterfs_handle_translator_op (void *data)  {          int32_t                  ret     = -1;          gd1_mgmt_brick_op_req    xlator_req = {0,}; -        dict_t                   *dict    = NULL; +        dict_t                   *input    = NULL;          xlator_t                 *xlator = NULL;          xlator_t                 *any = NULL;          dict_t                   *output = NULL; -        char                     msg[2048] = {0};          char                     key[2048] = {0};          char                    *xname = NULL;          glusterfs_ctx_t          *ctx = NULL; @@ -667,73 +667,76 @@ glusterfs_handle_translator_heal (rpcsvc_request_t *req)          xlator_t                 *this = NULL;          int                      i = 0;          int                      count = 0; +        rpcsvc_request_t         *req = data;          GF_ASSERT (req);          this = THIS;          GF_ASSERT (this); -        ctx = glusterfs_ctx_get (); -        GF_ASSERT (ctx); - -        active = ctx->active; -        any = active->first;          if (!xdr_to_generic (req->msg[0], &xlator_req,                               (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {                  //failed to decode msg;                  req->rpc_err = GARBAGE_ARGS;                  goto out;          } -        dict = dict_new (); +        ctx = glusterfs_ctx_get (); +        active = ctx->active; +        any = active->first; +        input = dict_new ();          ret = dict_unserialize (xlator_req.input.input_val,                                  xlator_req.input.input_len, -                                &dict); +                                &input);          if (ret < 0) {                  gf_log (this->name, GF_LOG_ERROR,                          "failed to "                          "unserialize req-buffer to dictionary");                  goto out; +        } else { +                input->extra_stdfree = xlator_req.input.input_val;          } -        ret = dict_get_int32 (dict, "count", &count); -        i = 0; -        while (i < count)  { -                snprintf (key, sizeof (key), "heal-%d", i); -                ret = dict_get_str (dict, key, &xname); +        ret = dict_get_int32 (input, "count", &count); + +        output = dict_new (); +        if (!output) { +                ret = -1; +                goto out; +        } + +        for (i = 0; i < count; i++)  { +                snprintf (key, sizeof (key), "xl-%d", i); +                ret = dict_get_str (input, key, &xname);                  if (ret) {                          gf_log (this->name, GF_LOG_ERROR, "Couldn't get " -                                "replicate xlator %s to trigger " -                                "self-heal", xname); +                                "xlator %s ", key);                          goto out;                  }                  xlator = xlator_search_by_name (any, xname);                  if (!xlator) { -                        snprintf (msg, sizeof (msg), "xlator %s is not loaded", -                                  xlator_req.name); -                        ret = -1; +                        gf_log (this->name, GF_LOG_ERROR, "xlator %s is not " +                                "loaded", xname);                          goto out;                  } - -                ret = xlator_notify (xlator, GF_EVENT_TRIGGER_HEAL, dict, NULL); -                i++;          } -        output = dict_new (); -        if (!output) -                goto out; - -        /* output dict is not used currently, could be used later. */ -        ret = glusterfs_translator_heal_response_send (req, ret, msg, output); +        for (i = 0; i < count; i++)  { +                snprintf (key, sizeof (key), "xl-%d", i); +                ret = dict_get_str (input, key, &xname); +                xlator = xlator_search_by_name (any, xname); +                XLATOR_NOTIFY (xlator, GF_EVENT_TRANSLATOR_OP, input, output); +                if (ret) +                        break; +        }  out: -        if (dict) -                dict_unref (dict); -        if (xlator_req.input.input_val) -                free (xlator_req.input.input_val); // malloced by xdr +        glusterfs_xlator_op_response_send (req, ret, "", output); +        if (input) +                dict_unref (input);          if (output)                  dict_unref (output);          if (xlator_req.name)                  free (xlator_req.name); //malloced by xdr -        return ret; +        return 0;  } @@ -941,11 +944,20 @@ out:          return ret;  } +static int +glusterfs_command_done  (int ret, call_frame_t *sync_frame, void *data) +{ +        STACK_DESTROY (sync_frame->root); +        return 0; +} +  int  glusterfs_handle_rpc_msg (rpcsvc_request_t *req)  { -        int     ret = -1; -        xlator_t *this = THIS; +        int             ret = -1; +        xlator_t        *this = THIS; +        call_frame_t    *frame = NULL; +          GF_ASSERT (this);          switch (req->procnum) {          case GLUSTERD_BRICK_TERMINATE: @@ -954,8 +966,13 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)          case GLUSTERD_BRICK_XLATOR_INFO:                  ret = glusterfs_handle_translator_info_get (req);                  break; -        case GLUSTERD_BRICK_XLATOR_HEAL: -                ret = glusterfs_handle_translator_heal (req); +        case GLUSTERD_BRICK_XLATOR_OP: +                frame = create_frame (this, this->ctx->pool); +                if (!frame) +                        goto out; +                ret = synctask_new (this->ctx->env, +                                    glusterfs_handle_translator_op, +                                    glusterfs_command_done, frame, req);                  break;          case GLUSTERD_BRICK_STATUS:                  ret = glusterfs_handle_brick_status (req); @@ -966,7 +983,7 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)          default:                  break;          } - +out:          return ret;  } @@ -1018,7 +1035,7 @@ rpcsvc_actor_t glusterfs_actors[] = {          [GLUSTERD_BRICK_NULL]        = { "NULL",    GLUSTERD_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL, 0},          [GLUSTERD_BRICK_TERMINATE] = { "TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL, 0},          [GLUSTERD_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL, 0}, -        [GLUSTERD_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GLUSTERD_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL, 0}, +        [GLUSTERD_BRICK_XLATOR_OP] = { "TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP, glusterfs_handle_rpc_msg, NULL, NULL, 0},          [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0},          [GLUSTERD_BRICK_XLATOR_DEFRAG] = { "TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_rpc_msg, NULL, NULL, 0}  }; diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 9355a3a46..e041ce5d6 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -395,7 +395,7 @@ char eventstring[GF_EVENT_MAXVAL+1][64] = {          "Volfile Modified",          "New Volfile",          "Translator Info", -        "Trigger Volume Heal", +        "Xlator Op",          "Authentication Failed",          "Invalid event",  }; diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 076833d23..63c28b8cf 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -385,7 +385,7 @@ typedef enum {          GF_EVENT_VOLFILE_MODIFIED,          GF_EVENT_GRAPH_NEW,          GF_EVENT_TRANSLATOR_INFO, -        GF_EVENT_TRIGGER_HEAL, +        GF_EVENT_TRANSLATOR_OP,          GF_EVENT_AUTH_FAILED,          GF_EVENT_VOLUME_DEFRAG,          GF_EVENT_MAXVAL, @@ -403,7 +403,6 @@ struct gf_flock {          gf_lkowner_t l_owner;  }; -  extern char *glusterfs_strevent (glusterfs_event_t ev);  #define GF_MUST_CHECK __attribute__((warn_unused_result)) diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 7604f8c21..5e4216dec 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -832,6 +832,18 @@ struct _xlator {  #define xlator_has_parent(xl) (xl->parents != NULL) +#define XLATOR_NOTIFY(_xl, params ...)          \ +        do {                                    \ +                xlator_t *_old_THIS = NULL;     \ +                                                \ +                _old_THIS = THIS;               \ +                THIS = _xl;                     \ +                                                \ +                ret = _xl->notify (_xl, params);\ +                                                \ +                THIS = _old_THIS;               \ +        } while (0); +  int32_t xlator_set_type_virtual (xlator_t *xl, const char *type);  int32_t xlator_set_type (xlator_t *xl, const char *type); diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 827201e2d..cd7adde4e 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -182,13 +182,22 @@ enum glusterd_brick_procnum {          GLUSTERD_BRICK_NULL,    /* 0 */          GLUSTERD_BRICK_TERMINATE,          GLUSTERD_BRICK_XLATOR_INFO, -        GLUSTERD_BRICK_XLATOR_HEAL, +        GLUSTERD_BRICK_XLATOR_OP,          GLUSTERD_BRICK_STATUS,          GLUSTERD_BRICK_OP,          GLUSTERD_BRICK_XLATOR_DEFRAG,          GLUSTERD_BRICK_MAXVALUE,  }; +typedef enum { +        GF_AFR_OP_INVALID, +        GF_AFR_OP_HEAL_INDEX, +        GF_AFR_OP_HEAL_FULL, +        GF_AFR_OP_INDEX_SUMMARY, +        GF_AFR_OP_HEALED_FILES, +        GF_AFR_OP_HEAL_FAILED_FILES, +        GF_AFR_OP_SPLIT_BRAIN_FILES +} gf_xl_afr_op_t ;  #define GLUSTER_HNDSK_PROGRAM    14398633 /* Completely random */  #define GLUSTER_HNDSK_VERSION    1   /* 0.0.1 */ diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index 16ed25af1..ed0901813 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -15,7 +15,7 @@ noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-  AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \  	    -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \ -	    -I$(top_srcdir)/xlators/lib/src +	    -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src  CLEANFILES = diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 1895150cd..9a78f6d3d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3399,7 +3399,7 @@ find_child_index (xlator_t *this, xlator_t *child)  int32_t  afr_notify (xlator_t *this, int32_t event, -            void *data, ...) +            void *data, void *data2)  {          afr_private_t   *priv               = NULL;          int             i                   = -1; @@ -3412,6 +3412,8 @@ afr_notify (xlator_t *this, int32_t event,          int             ret                 = -1;          int             call_psh            = 0;          int             up_child            = AFR_ALL_CHILDREN; +        dict_t          *input              = NULL; +        dict_t          *output             = NULL;          priv = this->private; @@ -3499,10 +3501,11 @@ afr_notify (xlator_t *this, int32_t event,                  break; -        case GF_EVENT_TRIGGER_HEAL: -                gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered" -                        " manually. Start crawling"); -                call_psh = 1; +        case GF_EVENT_TRANSLATOR_OP: +                input = data; +                output = data2; +                ret = afr_xl_op (this, input, output); +                goto out;                  break;          default: @@ -3552,7 +3555,7 @@ afr_notify (xlator_t *this, int32_t event,          ret = 0;          if (propagate)                  ret = default_notify (this, event, data); -        if (call_psh) { +        if (call_psh && priv->shd.enabled) {                  gf_log (this->name, GF_LOG_DEBUG, "start crawl: %d", up_child);                  afr_do_poll_self_heal ((void*) (long) up_child);          } @@ -3925,6 +3928,23 @@ afr_priv_destroy (afr_private_t *priv)                  goto out;          inode_unref (priv->root_inode);          GF_FREE (priv->shd.pos); +        GF_FREE (priv->shd.pending); +        GF_FREE (priv->shd.inprogress); +        GF_FREE (priv->shd.sh_times); +//        for (i = 0; i < priv->child_count; i++) +//                if (priv->shd.timer && priv->shd.timer[i]) +//                        gf_timer_call_cancel (this->ctx, priv->shd.timer[i]); +        GF_FREE (priv->shd.timer); + +        if (priv->shd.healed) +                eh_destroy (priv->shd.healed); + +        if (priv->shd.heal_failed) +                eh_destroy (priv->shd.heal_failed); + +        if (priv->shd.split_brain) +                eh_destroy (priv->shd.split_brain); +          GF_FREE (priv->last_event);          if (priv->pending_key) {                  for (i = 0; i < priv->child_count; i++) diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index 228139408..a138c9676 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -44,10 +44,12 @@ enum gf_afr_mem_types_ {          gf_afr_mt_locked_fd,          gf_afr_mt_inode_ctx_t,          gf_afr_fd_paused_call_t, -        gf_afr_mt_afr_crawl_data_t, -        gf_afr_mt_afr_brick_pos_t, -        gf_afr_mt_afr_shd_bool_t, -        gf_afr_mt_afr_shd_timer_t, +        gf_afr_mt_crawl_data_t, +        gf_afr_mt_brick_pos_t, +        gf_afr_mt_shd_bool_t, +        gf_afr_mt_shd_timer_t, +        gf_afr_mt_shd_event_t, +        gf_afr_mt_time_t,          gf_afr_mt_end  };  #endif diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 1f071b871..fa7e61e49 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -25,11 +25,453 @@  #include "syncop.h"  #include "afr-self-heald.h"  #include "afr-self-heal-common.h" +#include "protocol-common.h" +#include "event-history.h"  #define AFR_POLL_TIMEOUT 600 +typedef enum { +        STOP_CRAWL_ON_SINGLE_SUBVOL = 1 +} afr_crawl_flags_t; + +typedef struct shd_dump { +        dict_t   *dict; +        time_t   sh_time; +        xlator_t *this; +        int      child; +} shd_dump_t; + +typedef struct shd_event_ { +        int     child; +        char    *path; +} shd_event_t; + +typedef int +(*afr_crawl_done_cbk_t)  (int ret, call_frame_t *sync_frame, void *crawl_data); + +void +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, +                 process_entry_cbk_t process_entry, void *op_data, +                 gf_boolean_t exclusive, int crawl_flags, +                 afr_crawl_done_cbk_t crawl_done); + +static int +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data); + +void +shd_cleanup_event (void *event) +{ +        shd_event_t *shd_event = event; + +        if (!shd_event) +                goto out; +        if (shd_event->path) +                GF_FREE (shd_event->path); +        GF_FREE (shd_event); +out: +        return; +} + +int +afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count) +{ +        int i = 0; +        int ret = -1; +        for (i = 0; i < child_count; i++) { +                if (shd->pos[i] == AFR_POS_LOCAL) { +                        ret = i; +                        break; +                } +        } +        return ret; +} + +static int +_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) +{ +        int             ret = 0; + +        uuid_copy (loc->pargfid, parent->inode->gfid); +        loc->path = ""; +        loc->name = name; +        loc->parent = inode_ref (parent->inode); +        if (!loc->parent) { +                loc->path = NULL; +                loc_wipe (loc); +                ret = -1; +        } +        return ret; +} + +int +_add_str_to_dict (xlator_t *this, dict_t *output, int child, char *str, +                  gf_boolean_t dyn) +{ +        //subkey not used for now +        int             ret = -1; +        uint64_t        count = 0; +        char            key[256] = {0}; +        int             xl_id = 0; + +        ret = dict_get_int32 (output, this->name, &xl_id); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); +                goto out; +        } + +        snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); +        ret = dict_get_uint64 (output, key, &count); + +        snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count); +        if (dyn) +                ret = dict_set_dynstr (output, key, str); +        else +                ret = dict_set_str (output, key, str); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output", +                        str); +                goto out; +        } + +        snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); +        ret = dict_set_uint64 (output, key, count + 1); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Could not increment count"); +                goto out; +        } +        ret = 0; +out: +        return ret; +} + +int +_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child, +                         char **fpath) +{ +        dict_t          *xattr = NULL; +        char            *path = NULL; +        int             ret = -1; + +        ret = syncop_getxattr (readdir_xl, child, &xattr, +                               GFID_TO_PATH_KEY); +        if (ret) +                goto out; +        ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path); +        if (ret) { +                gf_log (this->name, GF_LOG_DEBUG, "Failed to get path for " +                        "gfid %s", uuid_utoa (child->gfid)); +                goto out; +        } +        path = gf_strdup (path); +        if (!path) { +                ret = -1; +                goto out; +        } +        ret = 0; +out: +        if (!ret) +                *fpath = path; +        if (xattr) +                dict_unref (xattr); +        return ret; +} + +int +_add_event_to_dict (circular_buffer_t *cb, void *data) +{ +        int               ret = 0; +        shd_dump_t        *dump_data = NULL; +        shd_event_t       *shd_event = NULL; + +        dump_data = data; +        shd_event = cb->data; +        if (shd_event->child != dump_data->child) +                goto out; +        if (cb->tv.tv_sec >= dump_data->sh_time) +                ret = _add_str_to_dict (dump_data->this, dump_data->dict, +                                        dump_data->child, shd_event->path, +                                        _gf_false); +out: +        return ret; +} + +int +_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, time_t sh_time, +                 int child) +{ +        shd_dump_t dump_data = {0}; + +        dump_data.this = this; +        dump_data.dict = dict; +        dump_data.sh_time = sh_time; +        dump_data.child = child; +        eh_dump (eh, &dump_data, _add_event_to_dict); +        return 0; +} + +int +_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data, +                      gf_dirent_t *entry, +                      loc_t *childloc, loc_t *parentloc, struct iatt *iattr) +{ +        dict_t          *output = NULL; +        xlator_t        *readdir_xl = NULL; +        int             ret = -1; +        char            *path = NULL; + +        if (uuid_is_null (childloc->gfid)) +                goto out; + +        output = crawl_data->op_data; +        readdir_xl = crawl_data->readdir_xl; + +        ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path); +        if (ret) +                goto out; + +        ret = _add_str_to_dict (this, output, crawl_data->child, path, +                                _gf_true); +out: +        if (ret && path) +                GF_FREE (path); +        return ret; +} + +void +_remove_stale_index (xlator_t *this, xlator_t *readdir_xl, +                     loc_t *parent, char *fname) +{ +        int              ret = 0; +        loc_t            index_loc = {0}; + +        ret = _build_index_loc (this, &index_loc, fname, parent); +        if (ret) +                goto out; +        gf_log (this->name, GF_LOG_INFO, "Removing stale index " +                "for %s on %s", index_loc.name, readdir_xl->name); +        ret = syncop_unlink (readdir_xl, &index_loc); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" +                        " index on %s - %s", index_loc.name, +                        readdir_xl->name, strerror (errno)); +        } +        index_loc.path = NULL; +        loc_wipe (&index_loc); +out: +        return; +} + +void +_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, +                       int32_t op_ret, int32_t op_errno, +                       afr_crawl_data_t *crawl_data) +{ +        int              ret = 0; +        afr_private_t    *priv = NULL; +        afr_self_heald_t *shd = NULL; +        eh_t             *eh = NULL; +        char             *path = NULL; +        shd_event_t      *event = NULL; + +        priv = this->private; +        shd  = &priv->shd; +        if (crawl_data->crawl == INDEX) { +                if ((op_ret < 0) && (op_errno == ENOENT)) { +                        _remove_stale_index (this, crawl_data->readdir_xl, +                                             parent, uuid_utoa (child->gfid)); +                        goto out; +                } +                ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl, +                                               child, &path); +                if (ret) +                        goto out; +        } else { +                path = gf_strdup (child->path); +                if (!path) { +                        ret = -1; +                        goto out; +                } +        } + +        if (op_ret < 0 && op_errno == EIO) +                eh = shd->split_brain; +        else if (op_ret < 0) +                eh = shd->heal_failed; +        else +                eh = shd->healed; +        ret = -1; +        event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t); +        if (!event) +                goto out; +        event->child = crawl_data->child; +        event->path = path; +        ret = eh_save_history (eh, event); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to " +                        "eh, (%d, %s)", path, op_ret, strerror (op_errno)); +                goto out; +        } +        ret = 0; +out: +        if (ret && path) +                GF_FREE (path); +        return; +} + +int +_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry, +                  loc_t *child, loc_t *parent, struct iatt *iattr) +{ +        struct iatt      parentbuf = {0}; +        int              ret = 0; + +        if (uuid_is_null (child->gfid)) +                gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path); +        else +                gf_log (this->name, GF_LOG_DEBUG, "lookup %s", +                        uuid_utoa (child->gfid)); + +        ret = syncop_lookup (this, child, NULL, +                             iattr, NULL, &parentbuf); +        _crawl_post_sh_action (this, parent, child, ret, errno, crawl_data); +        return ret; +} + +static int +afr_crawl_done  (int ret, call_frame_t *sync_frame, void *data) +{ +        GF_FREE (data); +        STACK_DESTROY (sync_frame->root); +        return 0; +} +  void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl); +_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl) +{ +        afr_private_t   *priv = NULL; +        afr_self_heald_t *shd = NULL; + +        priv = this->private; +        shd = &priv->shd; + +        time (&shd->sh_times[child]); +        afr_start_crawl (this, child, crawl, _self_heal_entry, +                         NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL, +                         afr_crawl_done); +} + +void +_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl) +{ +        int             i = 0; +        afr_private_t   *priv = NULL; + +        priv = this->private; +        for (i = 0; i < priv->child_count; i++) +                _do_self_heal_on_subvol (this, i, INDEX); +} + +void +_do_self_heal_on_local_subvol (xlator_t *this, afr_crawl_type_t crawl) +{ +        int             local_child = -1; +        afr_private_t   *priv = NULL; + +        priv = this->private; +        local_child = afr_get_local_child (&priv->shd, +                                           priv->child_count); +        if (local_child < -1) { +               gf_log (this->name, GF_LOG_INFO, +                       "No local bricks found"); +        } +        _do_self_heal_on_subvol (this, local_child, FULL); +} + +int +_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) +{ +        int             i = 0; +        afr_private_t   *priv = NULL; + +        priv = this->private; +        for (i = 0; i < priv->child_count; i++) +                afr_start_crawl (this, i, INDEX, _add_summary_to_dict, +                                 output, _gf_false, 0, NULL); +        return 0; +} + +int +_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) +{ +        afr_private_t           *priv = NULL; +        afr_self_heald_t        *shd = NULL; +        int                     i = 0; + +        priv = this->private; +        shd = &priv->shd; + +        for (i = 0; i < priv->child_count; i++) { +                if (shd->pos[i] != AFR_POS_LOCAL) +                        continue; +                _add_eh_to_dict (this, eh, dict, shd->sh_times[i], i); +        } +        return 0; +} + +int +afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) +{ +        gf_xl_afr_op_t   op = GF_AFR_OP_INVALID; +        int              ret = 0; +        afr_private_t    *priv = NULL; +        afr_self_heald_t *shd = NULL; +        int              xl_id = 0; + +        priv = this->private; +        shd = &priv->shd; + +        ret = dict_get_int32 (input, "xl-op", (int32_t*)&op); +        if (ret) +                goto out; +        ret = dict_get_int32 (input, this->name, &xl_id); +        if (ret) +                goto out; +        ret = dict_set_int32 (output, this->name, xl_id); +        if (ret) +                goto out; +        switch (op) { +        case GF_AFR_OP_HEAL_INDEX: +                _do_self_heal_on_local_subvols (this, INDEX); +                ret = 0; +                break; +        case GF_AFR_OP_HEAL_FULL: +                _do_self_heal_on_local_subvol (this, FULL); +                ret = 0; +                break; +        case GF_AFR_OP_INDEX_SUMMARY: +                ret = _get_index_summary_on_local_subvols (this, output); +                if (ret) +                        goto out; +                break; +        case GF_AFR_OP_HEALED_FILES: +                ret = _add_all_subvols_eh_to_dict (this, shd->healed, output); +                break; +        case GF_AFR_OP_HEAL_FAILED_FILES: +                ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed, +                                                   output); +                break; +        case GF_AFR_OP_SPLIT_BRAIN_FILES: +                ret = _add_all_subvols_eh_to_dict (this, shd->split_brain, +                                                   output); +                break; +        default: +                gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op); +                break; +        } +out: +        dict_del (output, this->name); +        return ret; +}  void  afr_do_poll_self_heal (void *data) @@ -39,21 +481,14 @@ afr_do_poll_self_heal (void *data)          struct timeval   timeout = {0};          xlator_t         *this = NULL;          long             child = (long)data; -        int              i = 0;          this = THIS;          priv = this->private;          shd = &priv->shd; -        if (child == AFR_ALL_CHILDREN) { //done by command -                for (i = 0; i < priv->child_count; i++) -                        afr_start_crawl (this, i, INDEX); +        _do_self_heal_on_subvol (this, child, INDEX); +        if (shd->pos[child] == AFR_POS_REMOTE)                  goto out; -        } else { -                afr_start_crawl (this, child, INDEX); -                if (shd->pos[child] == AFR_POS_REMOTE) -                        goto out; -        }          timeout.tv_sec = AFR_POLL_TIMEOUT;          timeout.tv_usec = 0;          if (shd->timer[child]) @@ -71,9 +506,6 @@ out:  }  static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, -                  xlator_t *readdir_xl); -static int  get_pathinfo_host (char *pathinfo, char *hostname, size_t size)  {          char    *start = NULL; @@ -132,15 +564,16 @@ out:  int  afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, -                           loc_t *dirloc, xlator_t *readdir_xl) +                           loc_t *dirloc)  {          afr_private_t *priv = NULL;          dict_t        *xattr = NULL;          void          *index_gfid = NULL;          loc_t         rootloc = {0}; -        struct iatt   iatt = {0}; +        struct iatt   iattr = {0};          struct iatt   parent = {0};          int           ret = 0; +        xlator_t      *readdir_xl = crawl_data->readdir_xl;          priv = this->private;          if (crawl_data->crawl == FULL) { @@ -167,13 +600,13 @@ afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,                  dirloc->path = "";                  dirloc->inode = inode_new (priv->root_inode->table);                  ret = syncop_lookup (readdir_xl, dirloc, NULL, -                                     &iatt, NULL, &parent); +                                     &iattr, NULL, &parent);                  if (ret < 0) {                          gf_log (this->name, GF_LOG_ERROR, "lookup failed on "                                  "index dir on %s", readdir_xl->name);                          goto out;                  } -                inode_link (dirloc->inode, NULL, NULL, &iatt); +                inode_link (dirloc->inode, NULL, NULL, &iattr);          }          ret = 0;  out: @@ -185,7 +618,7 @@ out:  int  afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd, -                   loc_t *dirloc, xlator_t *readdir_xl) +                   loc_t *dirloc)  {          fd_t          *fd   = NULL;          int           ret = 0; @@ -199,7 +632,7 @@ afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,                          goto out;                  } -                ret = syncop_opendir (readdir_xl, dirloc, fd); +                ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);                  if (ret < 0) {                          gf_log (this->name, GF_LOG_ERROR,                                  "opendir failed on %s", dirloc->path); @@ -247,7 +680,7 @@ afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,  }  gf_boolean_t -_crawl_proceed (xlator_t *this, int child) +_crawl_proceed (xlator_t *this, int child, int crawl_flags)  {          afr_private_t *priv = this->private;          gf_boolean_t proceed = _gf_false; @@ -258,77 +691,33 @@ _crawl_proceed (xlator_t *this, int child)                  goto out;          } -        if (afr_up_children_count (priv->child_up, -                                   priv->child_count) < 2) { -                gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " -                        "< 2 children are up"); -                goto out; -        } -        proceed = _gf_true; -out: -        return proceed; -} - -static int -_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) -{ -        int             ret = 0; - -        uuid_copy (loc->pargfid, parent->inode->gfid); -        loc->path = ""; -        loc->name = name; -        loc->parent = inode_ref (parent->inode); -        if (!loc->parent) { -                loc->path = NULL; -                loc_wipe (loc); -                ret = -1; -        } -        return ret; -} - -void -_index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc, -                              gf_dirent_t *entry, int op_ret, int op_errno, -                              xlator_t *readdir_xl) -{ -        loc_t            index_loc = {0}; -        int              ret = 0; - -        if (op_ret && (op_errno == ENOENT)) { -                ret = _build_index_loc (this, &index_loc, entry->d_name, -                                        parentloc); -                if (ret) +        if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) { +                if (afr_up_children_count (priv->child_up, +                                           priv->child_count) < 2) { +                        gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " +                                "< 2 children are up");                          goto out; -                gf_log (this->name, GF_LOG_INFO, "Removing stale index " -                        "for %s on %s", index_loc.name, readdir_xl->name); -                ret = syncop_unlink (readdir_xl, &index_loc); -                if (ret) { -                        gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" -                                " index on %s - %s", index_loc.name, -                                readdir_xl->name, strerror (errno));                  } -                index_loc.path = NULL; -                loc_wipe (&index_loc);          } +        proceed = _gf_true;  out: -        return; +        return proceed;  }  static int -_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, -                    off_t *offset, afr_crawl_data_t *crawl_data, -                    xlator_t *readdir_xl) +_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, +                  off_t *offset, afr_crawl_data_t *crawl_data)  {          gf_dirent_t      *entry = NULL;          gf_dirent_t      *tmp = NULL; -        struct iatt      iatt = {0}; -        struct iatt      parent = {0};          int              ret = 0;          loc_t            entry_loc = {0};          fd_t             *fd = NULL; +        struct iatt      iattr = {0};          list_for_each_entry_safe (entry, tmp, &entries->list, list) { -                if (!_crawl_proceed (this, crawl_data->child)) { +                if (!_crawl_proceed (this, crawl_data->child, +                                     crawl_data->crawl_flags)) {                          ret = -1;                          goto out;                  } @@ -344,62 +733,51 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,                          continue;                  } +                if (crawl_data->crawl == INDEX) +                        entry_loc.path = NULL;//HACK                  loc_wipe (&entry_loc);                  ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,                                                   entry, crawl_data);                  if (ret)                          goto out; -                if (uuid_is_null (entry_loc.gfid)) { -                        gf_log (this->name, GF_LOG_WARNING, "failed to build " -                                "location for %s", entry->d_name); -                        continue; -                } -                if (entry_loc.path) -                        gf_log (this->name, GF_LOG_DEBUG, "lookup %s", -                                entry_loc.path); -                else -                        gf_log (this->name, GF_LOG_DEBUG, "lookup %s", -                                uuid_utoa (entry_loc.gfid)); - -                ret = syncop_lookup (this, &entry_loc, NULL, -                                     &iatt, NULL, &parent); -                if (crawl_data->crawl == INDEX) { -                        _index_crawl_post_lookup_fop (this, parentloc, entry, -                                                      ret, errno, readdir_xl); -                        entry_loc.path = NULL; -                        loc_wipe (&entry_loc); +                ret = crawl_data->process_entry (this, crawl_data, entry, +                                                 &entry_loc, parentloc, &iattr); + +                if (crawl_data->crawl == INDEX)                          continue; -                } -                //Don't fail the crawl if lookup fails as it -                //could be because of split-brain -                if (ret || (!IA_ISDIR (iatt.ia_type))) +                if (ret || !IA_ISDIR (iattr.ia_type))                          continue; -                inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt); -                ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc, -                                         readdir_xl); + +                inode_link (entry_loc.inode, parentloc->inode, NULL, &iattr); + +                fd = NULL; +                ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);                  if (ret)                          continue; -                ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl); -                fd_unref (fd); +                ret = _crawl_directory (fd, &entry_loc, crawl_data); +                if (fd) +                        fd_unref (fd);          }          ret = 0;  out: +        if (crawl_data->crawl == INDEX) +                entry_loc.path = NULL;          if (entry_loc.path)                  loc_wipe (&entry_loc);          return ret;  }  static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, -                  xlator_t *readdir_xl) +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)  {          xlator_t        *this = NULL;          off_t           offset   = 0;          gf_dirent_t     entries;          int             ret = 0;          gf_boolean_t    free_entries = _gf_false; +        xlator_t        *readdir_xl = crawl_data->readdir_xl;          INIT_LIST_HEAD (&entries.list);          this = THIS; @@ -424,15 +802,16 @@ _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,                  ret = 0;                  free_entries = _gf_true; -                if (!_crawl_proceed (this, crawl_data->child)) { +                if (!_crawl_proceed (this, crawl_data->child, +                                     crawl_data->crawl_flags)) {                          ret = -1;                          goto out;                  }                  if (list_empty (&entries.list))                          goto out; -                ret = _perform_self_heal (this, loc, &entries, &offset, -                                          crawl_data, readdir_xl); +                ret = _process_entries (this, loc, &entries, &offset, +                                        crawl_data);                  gf_dirent_free (&entries);                  free_entries = _gf_false;          } @@ -515,14 +894,6 @@ out:          return ret;  } -static int -afr_crawl_done  (int ret, call_frame_t *sync_frame, void *data) -{ -        GF_FREE (data); -        STACK_DESTROY (sync_frame->root); -        return 0; -} -  static inline int  afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)  { @@ -530,17 +901,74 @@ afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)  }  static int -afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) +afr_dir_crawl (void *data) +{ +        xlator_t            *this = NULL; +        afr_private_t       *priv = NULL; +        afr_self_heald_t    *shd = NULL; +        int                 ret = -1; +        xlator_t            *readdir_xl = NULL; +        fd_t                *fd = NULL; +        loc_t               dirloc = {0}; +        afr_crawl_data_t    *crawl_data = data; + +        this = THIS; +        priv = this->private; +        shd = &priv->shd; + +        if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags)) +                goto out; + +        ret = afr_find_child_position (this, crawl_data->child); +        if (ret) +                goto out; + +        if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) +                goto out; + +        readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); +        if (!readdir_xl) +                goto out; +        crawl_data->readdir_xl = readdir_xl; + +        ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc); +        if (ret) +                goto out; + +        ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc); +        if (ret) +                goto out; + +        ret = _crawl_directory (fd, &dirloc, crawl_data); +        if (ret) +                gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", +                        readdir_xl->name); +        else +                gf_log (this->name, GF_LOG_INFO, "Crawl completed " +                        "on %s", readdir_xl->name); +        if (crawl_data->crawl == INDEX) +                dirloc.path = NULL; +out: +        if (fd) +                fd_unref (fd); +        if (crawl_data->crawl == INDEX) +                dirloc.path = NULL; +        loc_wipe (&dirloc); +        return ret; +} + +static int +afr_dir_exclusive_crawl (void *data)  {          afr_private_t    *priv = NULL;          afr_self_heald_t *shd = NULL; -        loc_t            dirloc = {0};          gf_boolean_t     crawl = _gf_false;          int              ret = 0; -        xlator_t         *readdir_xl = NULL; -        fd_t             *fd = NULL;          int              child = -1; +        xlator_t         *this = NULL; +        afr_crawl_data_t *crawl_data = data; +        this = THIS;          priv = this->private;          shd = &priv->shd;          child = crawl_data->child; @@ -548,7 +976,8 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)          LOCK (&priv->lock);          {                  if (shd->inprogress[child]) { -                        shd->pending[child] = _gf_true; +                        if (shd->pending[child] != FULL) +                                shd->pending[child] = crawl_data->crawl;                  } else {                          shd->inprogress[child] = _gf_true;                          crawl = _gf_true; @@ -556,11 +985,6 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)          }          UNLOCK (&priv->lock); -        if (!priv->root_inode) { -                ret = -1; -                goto out; -        } -          if (!crawl) {                  gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "                          "for %s", priv->children[child]->name); @@ -568,87 +992,35 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)          }          do { -                readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); -                if (!readdir_xl) -                        goto done; -                ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc, -                                                 readdir_xl); -                if (ret) -                        goto done; -                ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc, -                                         readdir_xl); -                if (ret) -                        goto done; -                ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl); -                if (ret) -                        gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", -                                readdir_xl->name); -                else -                        gf_log (this->name, GF_LOG_INFO, "Crawl completed " -                                "on %s", readdir_xl->name); -                fd_unref (fd); -                fd = NULL; -done: +                afr_dir_crawl (data);                  LOCK (&priv->lock);                  { -                        if (shd->pending[child]) { -                                shd->pending[child] = _gf_false; +                        if (shd->pending[child] != NONE) { +                                crawl_data->crawl = shd->pending[child]; +                                shd->pending[child] = NONE;                          } else {                                  shd->inprogress[child] = _gf_false;                                  crawl = _gf_false;                          }                  }                  UNLOCK (&priv->lock); -                if (crawl_data->crawl == INDEX) { -                        dirloc.path = NULL; -                        loc_wipe (&dirloc); -                }          } while (crawl);  out: -        if (fd) -                fd_unref (fd); -        if (crawl_data->crawl == INDEX) { -                dirloc.path = NULL; -                loc_wipe (&dirloc); -        } -        return ret; -} - -static int -afr_crawl (void *data) -{ -        xlator_t         *this = NULL; -        afr_private_t    *priv = NULL; -        afr_self_heald_t *shd = NULL; -        int              ret = -1; -        afr_crawl_data_t *crawl_data = data; - -        this = THIS; -        priv = this->private; -        shd = &priv->shd; - -        if (!_crawl_proceed (this, crawl_data->child)) -                goto out; -        ret = afr_find_child_position (this, crawl_data->child); -        if (ret) -                goto out; - -        if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) -                goto out; - -        ret = afr_crawl_directory (this, crawl_data); -out:          return ret;  }  void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl) +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, +                 process_entry_cbk_t process_entry, void *op_data, +                 gf_boolean_t exclusive, int crawl_flags, +                 afr_crawl_done_cbk_t crawl_done)  {          afr_private_t              *priv = NULL;          afr_self_heald_t           *shd = NULL;          call_frame_t               *frame = NULL;          afr_crawl_data_t           *crawl_data = NULL;          int                        ret = 0; +        int (*crawler) (void*) = NULL;          priv = this->private;          shd = &priv->shd; @@ -662,16 +1034,24 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl)          afr_set_lk_owner (frame, this);          afr_set_low_priority (frame);          crawl_data = GF_CALLOC (1, sizeof (*crawl_data), -                                gf_afr_mt_afr_crawl_data_t); +                                gf_afr_mt_crawl_data_t);          if (!crawl_data)                  goto out; +        crawl_data->process_entry = process_entry;          crawl_data->child = idx;          crawl_data->pid = frame->root->pid;          crawl_data->crawl = crawl; -        gf_log (this->name, GF_LOG_INFO, "starting crawl for %s", -                priv->children[idx]->name); -        ret = synctask_new (this->ctx->env, afr_crawl, -                            afr_crawl_done, frame, crawl_data); +        crawl_data->op_data = op_data; +        crawl_data->crawl_flags = crawl_flags; +        gf_log (this->name, GF_LOG_INFO, "starting crawl %d for %s", +                crawl_data->crawl, priv->children[idx]->name); + +        if (exclusive) +                crawler = afr_dir_exclusive_crawl; +        else +                crawler = afr_dir_crawl; +        ret = synctask_new (this->ctx->env, crawler, +                            crawl_done, frame, crawl_data);          if (ret)                  gf_log (this->name, GF_LOG_ERROR, "Could not create the "                          "task for %d ret %d", idx, ret); @@ -679,16 +1059,6 @@ out:          return;  } -//void -//afr_full_self_heal (xlator_t *this) -//{ -//        int     i = 0; -//        afr_private_t *priv = this->private; -// -//        for (i = 0; i < priv->child_count; i++) -//                afr_start_crawl (this, i, FULL); -//} -  void  afr_build_root_loc (xlator_t *this, loc_t *loc)  { diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index eb1021995..44fd9f385 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -26,17 +26,22 @@  #define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))  #define AFR_ALL_CHILDREN -1 -typedef enum { -        INDEX, -        FULL, -} afr_crawl_type_t;  typedef struct afr_crawl_data_ { -        int              child; -        pid_t            pid; -        afr_crawl_type_t crawl; -        xlator_t         *readdir_xl; +        int                 child; +        pid_t               pid; +        afr_crawl_type_t    crawl; +        xlator_t            *readdir_xl; +        void                *op_data; +        int                 crawl_flags; +        int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data, +                              gf_dirent_t *entry, loc_t *child, loc_t *parent, +                              struct iatt *iattr);  } afr_crawl_data_t; +typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data, +                              gf_dirent_t *entry, loc_t *child, loc_t *parent, +                              struct iatt *iattr); +  void afr_proactive_self_heal (xlator_t *this, int idx);  void afr_build_root_loc (xlator_t *this, loc_t *loc); @@ -48,4 +53,7 @@ afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent);  void  afr_do_poll_self_heal (void *data); + +int +afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);  #endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index abc6aa3e5..8e2ef1008 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -30,7 +30,10 @@  #endif  #include "afr-common.c" -#define SHD_INODE_LRU_LIMIT     100 +#define SHD_INODE_LRU_LIMIT          100 +#define AFR_EH_HEALED_LIMIT          1024 +#define AFR_EH_HEAL_FAIL_LIMIT       1024 +#define AFR_EH_SPLIT_BRAIN_LIMIT     1024  struct volume_options options[]; @@ -39,8 +42,13 @@ notify (xlator_t *this, int32_t event,          void *data, ...)  {          int ret = -1; +        va_list         ap; +        void *data2 = NULL; -        ret = afr_notify (this, event, data); +        va_start (ap, data); +        data2 = va_arg (ap, dict_t*); +        va_end (ap); +        ret = afr_notify (this, event, data, data2);          return ret;  } @@ -342,42 +350,55 @@ init (xlator_t *this)                  goto out;          } -        priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, -                                   gf_afr_mt_afr_brick_pos_t); -        if (!priv->shd.pos) { -                ret = -ENOMEM; +        priv->first_lookup = 1; +        priv->root_inode = NULL; + +        if (!priv->shd.enabled) { +                ret = 0;                  goto out;          } +        ret = -ENOMEM; +        priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, +                                   gf_afr_mt_brick_pos_t); +        if (!priv->shd.pos) +                goto out; +          priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count, -                                       gf_afr_mt_afr_shd_bool_t); -        if (!priv->shd.pending) { -                ret = -ENOMEM; +                                       gf_afr_mt_int32_t); +        if (!priv->shd.pending)                  goto out; -        }          priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress), -                                          child_count, -                                          gf_afr_mt_afr_shd_bool_t); -        if (!priv->shd.inprogress) { -                ret = -ENOMEM; +                                          child_count, gf_afr_mt_shd_bool_t); +        if (!priv->shd.inprogress)                  goto out; -        }          priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count, -                                     gf_afr_mt_afr_shd_timer_t); -        if (!priv->shd.timer) { -                ret = -ENOMEM; +                                     gf_afr_mt_shd_timer_t); +        if (!priv->shd.timer) +                goto out; + +        priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false); +        if (!priv->shd.healed) +                goto out; + +        priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false); +        if (!priv->shd.heal_failed) +                goto out; + +        priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false); +        if (!priv->shd.split_brain) +                goto out; + +        priv->shd.sh_times = GF_CALLOC (priv->child_count, +                                        sizeof (*priv->shd.sh_times), +                                        gf_afr_mt_time_t); +        if (!priv->shd.sh_times) +                goto out; + +        this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); +        if (!this->itable)                  goto out; -        } -        if (priv->shd.enabled) { -                this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); -                if (!this->itable) { -                        ret = -ENOMEM; -                        goto out; -                } -        } -        priv->first_lookup = 1; -        priv->root_inode = NULL;          ret = 0;  out: @@ -393,6 +414,8 @@ fini (xlator_t *this)          priv = this->private;          this->private = NULL;          afr_priv_destroy (priv); +        if (this->itable);//I dont see any destroy func +          return 0;  } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f3d372de5..0f4a6d90a 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -88,12 +88,22 @@ typedef struct afr_inode_ctx_ {          int32_t  *fresh_children;//increasing order of latency  } afr_inode_ctx_t; +typedef enum { +        NONE, +        INDEX, +        FULL, +} afr_crawl_type_t; +  typedef struct afr_self_heald_ { -        gf_boolean_t    enabled; -        gf_boolean_t    *pending; -        gf_boolean_t    *inprogress; -        afr_child_pos_t *pos; -        gf_timer_t      **timer; +        gf_boolean_t     enabled; +        afr_crawl_type_t *pending; +        gf_boolean_t     *inprogress; +        afr_child_pos_t  *pos; +        time_t           *sh_times; +        gf_timer_t       **timer; +        eh_t             *healed; +        eh_t             *heal_failed; +        eh_t             *split_brain;  } afr_self_heald_t;  typedef struct _afr_private { @@ -747,8 +757,7 @@ int  pump_command_reply (call_frame_t *frame, xlator_t *this);  int32_t -afr_notify (xlator_t *this, int32_t event, -            void *data, ...); +afr_notify (xlator_t *this, int32_t event, void *data, void *data2);  int  afr_attempt_lock_recovery (xlator_t *this, int32_t child_index); diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 281bfd722..eae7899e9 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -2334,7 +2334,7 @@ notify (xlator_t *this, int32_t event,          child_xl = (xlator_t *) data; -        ret = afr_notify (this, event, data); +        ret = afr_notify (this, event, data, NULL);  	switch (event) {  	case GF_EVENT_CHILD_DOWN: diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index b06dd28cf..79439535f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -651,6 +651,7 @@ out:          glusterd_op_sm ();          return ret;  } +  int  glusterd_handle_cli_probe (rpcsvc_request_t *req)  { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 77ed83f8b..da1299de0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -152,6 +152,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin          gd1_mgmt_brick_op_req   *brick_req = NULL;          char                    *volname = NULL;          char                    name[1024] = {0,}; +        gf_xl_afr_op_t          heal_op = GF_AFR_OP_INVALID;          GF_ASSERT (op < GD_OP_MAX);          GF_ASSERT (op > GD_OP_NONE); @@ -190,8 +191,12 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin                  if (!brick_req)                          goto out; -                brick_req->op = GLUSTERD_BRICK_XLATOR_HEAL; +                brick_req->op = GLUSTERD_BRICK_XLATOR_OP;                  brick_req->name = ""; +                ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); +                if (ret) +                        goto out; +                ret = dict_set_int32 (dict, "xl-op", heal_op);          }                  break;          case GD_OP_STATUS_VOLUME: @@ -2190,6 +2195,7 @@ glusterd_need_brick_op (glusterd_op_t op)          case GD_OP_PROFILE_VOLUME:          case GD_OP_STATUS_VOLUME:          case GD_OP_DEFRAG_BRICK_VOLUME: +        case GD_OP_HEAL_VOLUME:                  ret = _gf_true;                  break;          default: @@ -2578,6 +2584,94 @@ _status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,          return;  } +//input-key: <replica-id>:<child-id>-* +//output-key: <brick-id>-* +void +_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data) +{ +        char                            new_key[256] = {0,}; +        char                            int_str[16] = {0}; +        data_t                          *new_value = NULL; +        char                            *rxl_end = NULL; +        char                            *rxl_child_end = NULL; +        glusterd_volinfo_t              *volinfo = NULL; +        int                             rxl_id = 0; +        int                             rxl_child_id = 0; +        int                             brick_id = 0; +        int                             int_len = 0; +        int                             brick_count = 0; +        int                             ret = 0; +        glusterd_heal_rsp_conv_t        *rsp_ctx = NULL; + +        rsp_ctx = data; +        rxl_end = strchr (key, '-'); +        if (!rxl_end) +                goto out; + +        int_len = strlen (key) - strlen (rxl_end); +        strncpy (int_str, key, int_len); +        int_str[int_len] = '\0'; +        ret = gf_string2int (int_str, &rxl_id); +        if (ret) +                goto out; + +        rxl_child_end = strchr (rxl_end + 1, '-'); +        if (!rxl_child_end) +                goto out; + +        int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; +        strncpy (int_str, rxl_end + 1, int_len); +        int_str[int_len] = '\0'; +        ret = gf_string2int (int_str, &rxl_child_id); +        if (ret) +                goto out; + +        volinfo = rsp_ctx->volinfo; +        brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + +        new_value = data_copy (value); +        snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end); +        dict_set (rsp_ctx->dict, new_key, new_value); + +        ret = dict_get_int32 (rsp_ctx->dict, "count", &brick_count); +        if (brick_id >= brick_count) +                ret = dict_set_int32 (rsp_ctx->dict, "count", brick_id + 1); +out: +        return; +} + +int +glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, +                                dict_t *op_ctx, char **op_errstr) +{ +        int                             ret = 0; +        glusterd_heal_rsp_conv_t        rsp_ctx = {0}; +        char                            *volname = NULL; +        glusterd_volinfo_t              *volinfo = NULL; + +        GF_ASSERT (rsp_dict); +        GF_ASSERT (op_ctx); +        GF_ASSERT (op_errstr); + +        ret = dict_get_str (req_dict, "volname", &volname); +        if (ret) { +                gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); +                goto out; +        } + +        ret  = glusterd_volinfo_find (volname, &volinfo); + +        if (ret) +                goto out; + +        rsp_ctx.dict = op_ctx; +        rsp_ctx.volinfo = volinfo; +        dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + +out: +        return ret; +} +  int  glusterd_status_volume_brick_rsp (glusterd_brickinfo_t *brickinfo,                                    dict_t *rsp_dict, dict_t *op_ctx, @@ -2607,27 +2701,29 @@ glusterd_status_volume_brick_rsp (glusterd_brickinfo_t *brickinfo,          rsp_ctx.count = index;          rsp_ctx.dict = op_ctx;          dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx); -        ret = dict_set_int32 (op_ctx, "count", count);  out:          return ret;  }  int32_t -glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo, -                           glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, -                           char **op_errstr) +glusterd_handle_node_rsp (glusterd_req_ctx_t *req_ctx, void *pending_entry, +                          glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, +                          char **op_errstr)  { -        int     ret = 0; +        int                     ret = 0; +        glusterd_brickinfo_t    *brickinfo = NULL;          GF_ASSERT (op_errstr);          switch (op) {          case GD_OP_PROFILE_VOLUME: +                brickinfo = pending_entry;                  ret = glusterd_profile_volume_brick_rsp (brickinfo, rsp_dict,                                                           op_ctx, op_errstr);                  break;          case GD_OP_STATUS_VOLUME: +                brickinfo = pending_entry;                  ret = glusterd_status_volume_brick_rsp (brickinfo, rsp_dict,                                                          op_ctx, op_errstr);                  break; @@ -2636,6 +2732,10 @@ glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo,                  dict_copy (rsp_dict, op_ctx);          break; +        case GD_OP_HEAL_VOLUME: +                ret = glusterd_heal_volume_brick_rsp (req_ctx->dict, rsp_dict, +                                                      op_ctx, op_errstr); +                break;          default:                  break;          } @@ -2892,16 +2992,91 @@ _add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count)          char    key[128]        = {0,};          char    *xname          = NULL; -        snprintf (key, sizeof (key), "heal-%d", count); +        snprintf (key, sizeof (key), "xl-%d", count);          ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index);          if (ret == -1)                  goto out;          ret = dict_set_dynstr (dict, key, xname); +        if (ret) +                goto out; + +        ret = dict_set_int32 (dict, xname, index);  out:          return ret;  } +int +_select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo, +                                    dict_t *dict) +{ +        glusterd_brickinfo_t    *brickinfo = NULL; +        glusterd_conf_t         *priv   = NULL; +        int                     index = 1; +        int                     rxlator_count = 0; +        int                     replica_count = 0; +        gf_boolean_t            add     = _gf_false; + +        priv = this->private; +        replica_count = volinfo->replica_count; +        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { +                if (uuid_is_null (brickinfo->uuid)) +                        (void)glusterd_resolve_brick (brickinfo); + +                if (!uuid_compare (priv->uuid, brickinfo->uuid)) +                        add = _gf_true; +                if (index % replica_count == 0) { +                        if (add) { +                                _add_rxlator_to_dict (dict, volinfo->volname, +                                                      (index-1)/replica_count, +                                                      rxlator_count); +                                rxlator_count++; +                        } +                        add = _gf_false; +                } + +                index++; +        } +        return rxlator_count; +} + +int +_select_rxlators_for_full_self_heal (xlator_t *this, +                                     glusterd_volinfo_t *volinfo, +                                     dict_t *dict) +{ +        glusterd_brickinfo_t    *brickinfo = NULL; +        glusterd_conf_t         *priv   = NULL; +        int                     index = 1; +        int                     rxlator_count = 0; +        int                     replica_count = 0; +        uuid_t                  candidate = {0}; + +        priv = this->private; +        replica_count = volinfo->replica_count; + +        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { +                if (uuid_is_null (brickinfo->uuid)) +                        (void)glusterd_resolve_brick (brickinfo); + +                if (uuid_compare (brickinfo->uuid, candidate) > 0) +                        uuid_copy (candidate, brickinfo->uuid); + +                if (index % replica_count == 0) { +                        if (!uuid_compare (priv->uuid, candidate)) { +                                _add_rxlator_to_dict (dict, volinfo->volname, +                                                      (index-1)/replica_count, +                                                      rxlator_count); +                                rxlator_count++; +                        } +                        uuid_clear (candidate); +                } + +                index++; +        } +        return rxlator_count; +} +  static int  glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)  { @@ -2909,14 +3084,11 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)          char                                    *volname = NULL;          glusterd_conf_t                         *priv = NULL;          glusterd_volinfo_t                      *volinfo = NULL; -        glusterd_brickinfo_t                    *brickinfo = NULL;          xlator_t                                *this = NULL;          char                                    msg[2048] = {0,}; -        int                                     replica_count = 0; -        int                                     index = 1; -        int                                     rxlator_count = 0; -        uuid_t                                  candidate = {0};          glusterd_pending_node_t                 *pending_node = NULL; +        gf_xl_afr_op_t                          heal_op = GF_AFR_OP_INVALID; +        int                                     rxlator_count = 0;          this = THIS;          GF_ASSERT (this); @@ -2939,48 +3111,43 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)                  goto out;          } -        replica_count = volinfo->replica_count; - -        index = 1; -        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { -                if (uuid_is_null (brickinfo->uuid)) -                        (void)glusterd_resolve_brick (brickinfo); - -                if (uuid_compare (brickinfo->uuid, candidate) > 0) -                        uuid_copy (candidate, brickinfo->uuid); - -                if (index % replica_count == 0) { -                        if (!uuid_compare (priv->uuid, candidate)) { -                                _add_rxlator_to_dict (dict, volname, -                                                      (index-1)/replica_count, -                                                      rxlator_count); -                                rxlator_count++; -                        } -                        uuid_clear (candidate); -                } +        ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); +        if (ret || (heal_op == GF_AFR_OP_INVALID)) { +                gf_log ("glusterd", GF_LOG_ERROR, "heal op invalid"); +                goto out; +        } -                index++; +        switch (heal_op) { +        case GF_AFR_OP_HEAL_FULL: +                rxlator_count = _select_rxlators_for_full_self_heal (this, +                                                                     volinfo, +                                                                     dict); +                break; +        default: +                rxlator_count = _select_rxlators_with_local_bricks (this, +                                                                    volinfo, +                                                                    dict); +                break;          } +        if (!rxlator_count) +                goto out;          ret = dict_set_int32 (dict, "count", rxlator_count);          if (ret)                  goto out; -        if (rxlator_count) { -                pending_node = GF_CALLOC (1, sizeof (*pending_node), -                                          gf_gld_mt_pending_node_t); -                if (!pending_node) { -                        ret = -1; -                        goto out; -                } else { -                        pending_node->node = priv->shd; -                        pending_node->type = GD_NODE_SHD; -                        list_add_tail (&pending_node->list, -                                       &opinfo.pending_bricks); -                        pending_node = NULL; -                } +        pending_node = GF_CALLOC (1, sizeof (*pending_node), +                                  gf_gld_mt_pending_node_t); +        if (!pending_node) { +                ret = -1; +                goto out; +        } else { +                pending_node->node = priv->shd; +                pending_node->type = GD_NODE_SHD; +                list_add_tail (&pending_node->list, +                               &opinfo.pending_bricks); +                pending_node = NULL;          } -  out:          gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret);          return ret; @@ -3222,8 +3389,8 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)          if (opinfo.brick_pending_count > 0)                  opinfo.brick_pending_count--; -        glusterd_handle_brick_rsp (pending_entry, op, ev_ctx->rsp_dict, -                                   op_ctx, &op_errstr); +        glusterd_handle_node_rsp (req_ctx, pending_entry, op, ev_ctx->rsp_dict, +                                  op_ctx, &op_errstr);          if (opinfo.brick_pending_count > 0)                  goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 12aa139f5..b4df82017 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -154,6 +154,11 @@ typedef struct glusterd_pr_brick_rsp_conv_t {          dict_t *dict;  } glusterd_pr_brick_rsp_conv_t; +typedef struct glusterd_heal_rsp_conv_ { +        dict_t *dict; +        glusterd_volinfo_t *volinfo; +} glusterd_heal_rsp_conv_t; +  typedef struct glusterd_status_rsp_conv_ {          int count;          dict_t *dict; diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 537496f08..39a9c6161 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -111,6 +111,11 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,                  }                  break;          } +        case GD_OP_HEAL_VOLUME: +        { +                glusterd_add_bricks_hname_path_to_dict (ctx); +                break; +        }          case GD_OP_PROFILE_VOLUME:          {                  if (ctx && dict_get_int32 (ctx, "count", &count)) { @@ -142,7 +147,6 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,          case GD_OP_ADD_BRICK:          case GD_OP_LOG_ROTATE:          case GD_OP_SYNC_VOLUME: -        case GD_OP_HEAL_VOLUME:          case GD_OP_STATEDUMP_VOLUME:          case GD_OP_REPLACE_BRICK:          case GD_OP_STATUS_VOLUME: @@ -1107,6 +1111,48 @@ out:          return ret;  } +void +_heal_volume_add_peer_rsp (dict_t *peer_dict, char *key, data_t *value, +                           void *data) +{ +        int                             max_brick = 0; +        int                             peer_max_brick = 0; +        int                             ret = 0; +        dict_t                          *ctx_dict = data; + + + +        ret = dict_get_int32 (ctx_dict, "count", &max_brick); +        ret = dict_get_int32 (peer_dict, "count", &peer_max_brick); +        if (peer_max_brick > max_brick) +                ret = dict_set_int32 (ctx_dict, "count", peer_max_brick); +        else +                ret = dict_set_int32 (ctx_dict, "count", max_brick); +        dict_del (peer_dict, "count"); +        dict_copy (peer_dict, ctx_dict); +        return; +} + +int +glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict) +{ +        int            ret      = 0; +        dict_t        *ctx_dict = NULL; +        glusterd_op_t  op       = GD_OP_NONE; + +        GF_ASSERT (rsp_dict); + +        op = glusterd_op_get_op (); +        GF_ASSERT (GD_OP_HEAL_VOLUME == op); + +        ctx_dict = glusterd_op_get_ctx (op); + +        if (!ctx_dict) +                goto out; +        dict_foreach (rsp_dict, _heal_volume_add_peer_rsp, ctx_dict); +out: +        return ret; +}  int32_t  glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov, @@ -1229,6 +1275,13 @@ glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,                  case GD_OP_DEFRAG_BRICK_VOLUME:                  break; +                case GD_OP_HEAL_VOLUME: +                        ret = glusterd_volume_heal_use_rsp_dict (dict); +                        if (ret) +                                goto out; + +                break; +                  default:                  break;                  } @@ -1723,7 +1776,7 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,          char                            *op_errstr = NULL;          int                             pending_bricks = 0;          glusterd_pending_node_t         *pending_node; -        glusterd_req_ctx_t               *req_ctx = NULL; +        glusterd_req_ctx_t              *req_ctx = NULL;          struct rpc_clnt                 *rpc = NULL;          if (!this) { diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 4ec8ae5dc..117e5e8f3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1476,6 +1476,36 @@ _add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data)  }  int32_t +glusterd_add_bricks_hname_path_to_dict (dict_t *dict) +{ +        char                    *volname = NULL; +        glusterd_volinfo_t      *volinfo = NULL; +        glusterd_brickinfo_t    *brickinfo = NULL; +        int                     ret = 0; +        char                    key[256] = {0}; +        int                     index = 0; + +        ret = dict_get_str (dict, "volname", &volname); +        if (ret) { +                gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); +                goto out; +        } + +        ret  = glusterd_volinfo_find (volname, &volinfo); +        if (ret) +                goto out; +        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { +                snprintf (key, sizeof (key), "%d-hostname", index); +                ret = dict_set_str (dict, key, brickinfo->hostname); +                snprintf (key, sizeof (key), "%d-path", index); +                ret = dict_set_str (dict, key, brickinfo->path); +                index++; +        } +out: +        return ret; +} + +int32_t  glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,                               dict_t  *dict, int32_t count)  { diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index f71ecc404..de6185753 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -410,4 +410,7 @@ glusterd_get_trusted_client_filepath (char *filepath,                                        gf_transport_type type);  int  glusterd_restart_rebalance (glusterd_conf_t *conf); + +int32_t +glusterd_add_bricks_hname_path_to_dict (dict_t *dict);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 9df9d4219..caafa9fd0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -470,6 +470,8 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)                                  "failed to "                                  "unserialize req-buffer to dictionary");                          goto out; +                } else { +                        dict->extra_stdfree = cli_req.dict.dict_val;                  }          } @@ -489,8 +491,6 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)  out:          if (ret && dict)                  dict_unref (dict); -        if (cli_req.dict.dict_val) -                free (cli_req.dict.dict_val); //its malloced by xdr          glusterd_friend_sm ();          glusterd_op_sm (); @@ -999,6 +999,7 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)          char                                    msg[2048];          glusterd_conf_t                         *priv = NULL;          dict_t                                  *opt_dict = NULL; +        gf_xl_afr_op_t                          heal_op = GF_AFR_OP_INVALID;          priv = THIS->private;          if (!priv) { @@ -1068,6 +1069,15 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)                  goto out;          } +        ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); +        if (ret || (heal_op == GF_AFR_OP_INVALID)) { +                ret = -1; +                snprintf (msg, sizeof (msg), "Invalid heal-op"); +                *op_errstr = gf_strdup (msg); +                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); +                goto out; +        } +          ret = 0;  out:          gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);  | 
