From a113d936216298a4ea8812b8843cac4837b53cd1 Mon Sep 17 00:00:00 2001 From: Sheetal Pamecha Date: Wed, 20 Nov 2019 12:42:12 +0530 Subject: glusterd: check for same node while adding bricks in disperse volume The optimal way for configuring disperse and replicate volumes is to have all bricks in different nodes. During create operation it fails saying it is not optimal, user must use force to over-ride this behavior. Implementing same during add-brick operation to avoid situation where all the added bricks end up from same host. Operation will error out accordingly. and this can be over-ridden by using force same as create. fixes: #1047 Change-Id: I3ee9c97c1a14b73f4532893bc00187ef9355238b Signed-off-by: Sheetal Pamecha --- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 20 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 224 ++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-utils.h | 3 + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 239 +----------------------- 4 files changed, 248 insertions(+), 238 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 00a26833d58..c41ae61040e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -21,7 +21,6 @@ #include "glusterd-messages.h" #include "glusterd-server-quorum.h" #include -#include "glusterd-volgen.h" #include #include @@ -1412,6 +1411,25 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) is_force = dict_get_str_boolean(dict, "force", _gf_false); + /* Check brick order if the volume type is replicate or disperse. If + * force at the end of command not given then check brick order. + */ + + if (!is_force) { + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || + (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { + ret = glusterd_check_brick_order(dict, msg, volinfo->type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, + "Not adding brick because of " + "bad brick order. %s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + } + if (volinfo->replica_count < replica_count && !is_force) { cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) { diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e5653557aeb..78374a03c99 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -14318,3 +14318,227 @@ out: return ret; } + +static gf_ai_compare_t +glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) +{ + int ret = -1; + struct addrinfo *tmp1 = NULL; + struct addrinfo *tmp2 = NULL; + char firstip[NI_MAXHOST] = {0.}; + char nextip[NI_MAXHOST] = { + 0, + }; + + for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { + ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, + NULL, 0, NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { + ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, + NI_MAXHOST, NULL, 0, NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + if (!strcmp(firstip, nextip)) { + return GF_AI_COMPARE_MATCH; + } + } + } + return GF_AI_COMPARE_NO_MATCH; +} + +/* Check for non optimal brick order for Replicate/Disperse : + * Checks if bricks belonging to a replicate or disperse + * volume are present on the same server + */ +int32_t +glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) +{ + int ret = -1; + int i = 0; + int j = 0; + int k = 0; + xlator_t *this = NULL; + addrinfo_list_t *ai_list = NULL; + addrinfo_list_t *ai_list_tmp1 = NULL; + addrinfo_list_t *ai_list_tmp2 = NULL; + char *brick = NULL; + char *brick_list = NULL; + char *brick_list_dup = NULL; + char *brick_list_ptr = NULL; + char *tmpptr = NULL; + char *volname = NULL; + int32_t brick_count = 0; + int32_t sub_count = 0; + struct addrinfo *ai_info = NULL; + char brick_addr[128] = { + 0, + }; + int addrlen = 0; + + const char failed_string[2048] = + "Failed to perform brick order " + "check. Use 'force' at the end of the command" + " if you want to override this behavior. "; + const char found_string[2048] = + "Multiple bricks of a %s " + "volume are present on the same server. This " + "setup is not optimal. Bricks should be on " + "different nodes to have best fault tolerant " + "configuration. Use 'force' at the end of the " + "command if you want to override this " + "behavior. "; + + this = THIS; + + GF_ASSERT(this); + + ai_list = MALLOC(sizeof(addrinfo_list_t)); + ai_list->info = NULL; + CDS_INIT_LIST_HEAD(&ai_list->list); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve bricks list"); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve brick count"); + goto out; + } + + if (type != GF_CLUSTER_TYPE_DISPERSE) { + ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), + &sub_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could" + " not retrieve replica count"); + goto out; + } + gf_msg_debug(this->name, 0, + "Replicate cluster type " + "found. Checking brick order."); + } else { + ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), + &sub_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could" + " not retrieve disperse count"); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, + "Disperse cluster type" + " found. Checking brick order."); + } + brick_list_dup = brick_list_ptr = gf_strdup(brick_list); + /* Resolve hostnames and get addrinfo */ + while (i < brick_count) { + ++i; + brick = strtok_r(brick_list_dup, " \n", &tmpptr); + brick_list_dup = tmpptr; + if (brick == NULL) + goto check_failed; + tmpptr = strrchr(brick, ':'); + if (tmpptr == NULL) + goto check_failed; + addrlen = strlen(brick) - strlen(tmpptr); + strncpy(brick_addr, brick, addrlen); + brick_addr[addrlen] = '\0'; + ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); + if (ret != 0) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, + "unable to resolve host name for addr %s", brick_addr); + goto out; + } + ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); + if (ai_list_tmp1 == NULL) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "failed to allocate " + "memory"); + freeaddrinfo(ai_info); + goto out; + } + ai_list_tmp1->info = ai_info; + cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); + ai_list_tmp1 = NULL; + } + + i = 0; + ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + + /* Check for bad brick order */ + while (i < brick_count) { + ++i; + ai_info = ai_list_tmp1->info; + ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, + list); + if (0 == i % sub_count) { + j = 0; + continue; + } + ai_list_tmp2 = ai_list_tmp1; + k = j; + while (k < sub_count - 1) { + ++k; + ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); + if (GF_AI_COMPARE_ERROR == ret) + goto check_failed; + if (GF_AI_COMPARE_MATCH == ret) + goto found_bad_brick_order; + ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, + addrinfo_list_t, list); + } + ++j; + } + gf_msg_debug(this->name, 0, "Brick order okay"); + ret = 0; + goto out; + +check_failed: + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, + "Failed bad brick order check"); + snprintf(err_str, sizeof(failed_string), failed_string); + ret = -1; + goto out; + +found_bad_brick_order: + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, + "Bad brick order found"); + if (type == GF_CLUSTER_TYPE_DISPERSE) { + snprintf(err_str, sizeof(found_string), found_string, "disperse"); + } else { + snprintf(err_str, sizeof(found_string), found_string, "replicate"); + } + + ret = -1; +out: + ai_list_tmp2 = NULL; + GF_FREE(brick_list_ptr); + cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) + { + if (ai_list_tmp1->info) + freeaddrinfo(ai_list_tmp1->info); + free(ai_list_tmp2); + ai_list_tmp2 = ai_list_tmp1; + } + free(ai_list_tmp2); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index b58f158fd14..c5303d8a8cb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -857,4 +857,7 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath); int32_t glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count); +int32_t +glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 1c5a2508d9c..a8f26477bc9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -37,240 +37,6 @@ #define glusterd_op_start_volume_args_get(dict, volname, flags) \ glusterd_op_stop_volume_args_get(dict, volname, flags) -gf_ai_compare_t -glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) -{ - int ret = -1; - struct addrinfo *tmp1 = NULL; - struct addrinfo *tmp2 = NULL; - char firstip[NI_MAXHOST] = {0.}; - char nextip[NI_MAXHOST] = { - 0, - }; - - for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { - ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, - NULL, 0, NI_NUMERICHOST); - if (ret) - return GF_AI_COMPARE_ERROR; - for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { - ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, - NI_MAXHOST, NULL, 0, NI_NUMERICHOST); - if (ret) - return GF_AI_COMPARE_ERROR; - if (!strcmp(firstip, nextip)) { - return GF_AI_COMPARE_MATCH; - } - } - } - return GF_AI_COMPARE_NO_MATCH; -} - -/* Check for non optimal brick order for replicate : - * Checks if bricks belonging to a replicate volume - * are present on the same server - */ -int32_t -glusterd_check_brick_order(dict_t *dict, char *err_str) -{ - int ret = -1; - int i = 0; - int j = 0; - int k = 0; - xlator_t *this = NULL; - addrinfo_list_t *ai_list = NULL; - addrinfo_list_t *ai_list_tmp1 = NULL; - addrinfo_list_t *ai_list_tmp2 = NULL; - char *brick = NULL; - char *brick_list = NULL; - char *brick_list_dup = NULL; - char *brick_list_ptr = NULL; - char *tmpptr = NULL; - char *volname = NULL; - int32_t brick_count = 0; - int32_t type = GF_CLUSTER_TYPE_NONE; - int32_t sub_count = 0; - struct addrinfo *ai_info = NULL; - char brick_addr[128] = { - 0, - }; - int addrlen = 0; - - const char failed_string[2048] = - "Failed to perform brick order " - "check. Use 'force' at the end of the command" - " if you want to override this behavior. "; - const char found_string[2048] = - "Multiple bricks of a %s " - "volume are present on the same server. This " - "setup is not optimal. Bricks should be on " - "different nodes to have best fault tolerant " - "configuration. Use 'force' at the end of the " - "command if you want to override this " - "behavior. "; - - this = THIS; - - GF_ASSERT(this); - - ai_list = MALLOC(sizeof(addrinfo_list_t)); - ai_list->info = NULL; - CDS_INIT_LIST_HEAD(&ai_list->list); - - ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get volume name"); - goto out; - } - - ret = dict_get_int32n(dict, "type", SLEN("type"), &type); - if (ret) { - snprintf(err_str, 512, "Unable to get type of volume %s", volname); - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", - err_str); - goto out; - } - - ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could not " - "retrieve bricks list"); - goto out; - } - - ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could not " - "retrieve brick count"); - goto out; - } - - if (type != GF_CLUSTER_TYPE_DISPERSE) { - ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), - &sub_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could" - " not retrieve replica count"); - goto out; - } - gf_msg_debug(this->name, 0, - "Replicate cluster type " - "found. Checking brick order."); - } else { - ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), - &sub_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could" - " not retrieve disperse count"); - goto out; - } - gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, - "Disperse cluster type" - " found. Checking brick order."); - } - - brick_list_dup = brick_list_ptr = gf_strdup(brick_list); - /* Resolve hostnames and get addrinfo */ - while (i < brick_count) { - ++i; - brick = strtok_r(brick_list_dup, " \n", &tmpptr); - brick_list_dup = tmpptr; - if (brick == NULL) - goto check_failed; - tmpptr = strrchr(brick, ':'); - if (tmpptr == NULL) - goto check_failed; - addrlen = strlen(brick) - strlen(tmpptr); - strncpy(brick_addr, brick, addrlen); - brick_addr[addrlen] = '\0'; - ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); - if (ret != 0) { - ret = 0; - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, - "unable to resolve host name for addr %s", brick_addr); - goto out; - } - ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); - if (ai_list_tmp1 == NULL) { - ret = 0; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, - "failed to allocate " - "memory"); - freeaddrinfo(ai_info); - goto out; - } - ai_list_tmp1->info = ai_info; - cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); - ai_list_tmp1 = NULL; - } - - i = 0; - ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); - - /* Check for bad brick order */ - while (i < brick_count) { - ++i; - ai_info = ai_list_tmp1->info; - ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, - list); - if (0 == i % sub_count) { - j = 0; - continue; - } - ai_list_tmp2 = ai_list_tmp1; - k = j; - while (k < sub_count - 1) { - ++k; - ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); - if (GF_AI_COMPARE_ERROR == ret) - goto check_failed; - if (GF_AI_COMPARE_MATCH == ret) - goto found_bad_brick_order; - ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, - addrinfo_list_t, list); - } - ++j; - } - gf_msg_debug(this->name, 0, "Brick order okay"); - ret = 0; - goto out; - -check_failed: - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, - "Failed bad brick order check"); - snprintf(err_str, sizeof(failed_string), failed_string); - ret = -1; - goto out; - -found_bad_brick_order: - gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, - "Bad brick order found"); - if (type == GF_CLUSTER_TYPE_DISPERSE) { - snprintf(err_str, sizeof(found_string), found_string, "disperse"); - } else { - snprintf(err_str, sizeof(found_string), found_string, "replicate"); - } - - ret = -1; -out: - ai_list_tmp2 = NULL; - GF_FREE(brick_list_ptr); - cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) - { - if (ai_list_tmp1->info) - freeaddrinfo(ai_list_tmp1->info); - free(ai_list_tmp2); - ai_list_tmp2 = ai_list_tmp1; - } - free(ai_list_tmp2); - return ret; -} - int __glusterd_handle_create_volume(rpcsvc_request_t *req) { @@ -1203,11 +969,10 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, if (!is_force) { if ((type == GF_CLUSTER_TYPE_REPLICATE) || (type == GF_CLUSTER_TYPE_DISPERSE)) { - ret = glusterd_check_brick_order(dict, msg); + ret = glusterd_check_brick_order(dict, msg, type); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, - "Not " - "creating volume because of " + "Not creating volume because of " "bad brick order"); goto out; } -- cgit