summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt
diff options
context:
space:
mode:
authorggarg <ggarg@redhat.com>2014-04-29 09:58:16 +0530
committerKaushal M <kaushal@redhat.com>2014-09-24 05:29:43 -0700
commitb097225202629448e3b10dc4160125c376971682 (patch)
treee5ff080e3deb3e01fd4cc6036704ad68e790cc1b /xlators/mgmt
parent70d76f20ee127fe7e8e52b2d67e2362283a01f34 (diff)
glusterd: Move brick order check from cli to glusterd.
Previously the brick order check for replicate volumes on volume create and add-brick was done by the cli. This check would fail when a hostname wasn't resolvable and would question the user if it was ok to continue. If the user continued, glusterd would fail the command again as the hostname wouldn't be resolvable. This was unnecessary. This change, moves the check from cli into glusterd. The check is now performed during staging of volume create after the bricks have been resolved. This prevents the above condition from occurring. As a result of this change, the user will no longer be questioned and given an option to continue the operation when a bad brick order is given or the brick order check fails. In such a case, the user can use 'force' to bypass the check and allow the command to succeed. Change-Id: I009861efaf3fb7f553a9b00116a992f031f652cb BUG: 1091935 Signed-off-by: ggarg <ggarg@redhat.com> Reviewed-on: http://review.gluster.org/7589 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaushal M <kaushal@redhat.com>
Diffstat (limited to 'xlators/mgmt')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c237
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h11
2 files changed, 247 insertions, 1 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 334aef9f412..ac8407f077d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -28,12 +28,222 @@
#include "glusterd-messages.h"
#include "run.h"
+#include <stdint.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <stdlib.h>
+
#define glusterd_op_start_volume_args_get(dict, volname, flags) \
glusterd_op_stop_volume_args_get (dict, volname, flags)
extern int
_get_slave_status (dict_t *this, char *key, data_t *value, void *data);
+gf_ai_compare_t
+glusterd_compare_addrinfo (struct addrinfo *first, struct addrinfo *next)
+{
+ int ret = -1;
+ struct addrinfo *tmp1 = NULL;
+ struct addrinfo *tmp2 = NULL;
+ char firstip[NI_MAXHOST] = {0.};
+ char nextip[NI_MAXHOST] = {0,};
+
+ for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
+ ret = getnameinfo (tmp1->ai_addr, tmp1->ai_addrlen, firstip,
+ NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
+ ret = getnameinfo (tmp2->ai_addr, tmp2->ai_addrlen,
+ nextip, NI_MAXHOST, NULL, 0,
+ NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ if (!strcmp (firstip, nextip)) {
+ return GF_AI_COMPARE_MATCH;
+ }
+ }
+ }
+ return GF_AI_COMPARE_NO_MATCH;
+}
+
+/* Check for non optimal brick order for replicate :
+ * Checks if bricks belonging to a replicate volume
+ * are present on the same server
+ */
+int32_t
+glusterd_check_brick_order(dict_t *dict, char *err_str)
+{
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ xlator_t *this = NULL;
+ addrinfo_list_t *ai_list = NULL;
+ addrinfo_list_t *ai_list_tmp1 = NULL;
+ addrinfo_list_t *ai_list_tmp2 = NULL;
+ char *brick = NULL;
+ char *brick_list = NULL;
+ char *brick_list_dup = NULL;
+ char *tmpptr = NULL;
+ char *volname = NULL;
+ int32_t brick_count = 0;
+ int32_t type = GF_CLUSTER_TYPE_NONE;
+ int32_t sub_count = 0;
+ struct addrinfo *ai_info = NULL;
+
+ const char failed_string[2048] = "Failed to perform brick order "
+ "check. Use 'force' at the end of the command"
+ " if you want to override this behavior. ";
+ const char found_string[2048] = "Multiple bricks of a replicate "
+ "volume are present on the same server. This "
+ "setup is not optimal. Use 'force' at the "
+ "end of the command if you want to override "
+ "this behavior. ";
+
+ this = THIS;
+
+ GF_ASSERT(this);
+
+ ai_list = malloc (sizeof (addrinfo_list_t));
+ ai_list->info = NULL;
+ INIT_LIST_HEAD (&ai_list->list);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (err_str, 512, "Unable to get type of volume %s",
+ volname);
+ gf_log (this->name, GF_LOG_WARNING, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "bricks", &brick_list);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could not "
+ "retrieve bricks list");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could not "
+ "retrieve brick count");
+ goto out;
+ }
+
+ if (type != GF_CLUSTER_TYPE_DISPERSE) {
+ ret = dict_get_int32 (dict, "replica-count", &sub_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could"
+ " not retrieve replica count");
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Replicate cluster type "
+ "found. Checking brick order.");
+ } else {
+ ret = dict_get_int32 (dict, "disperse-count", &sub_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could"
+ " not retrieve disperse count");
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_INFO, "Disperse cluster type"
+ " found. Checking brick order.");
+ }
+
+ brick_list_dup = gf_strdup(brick_list);
+ /* Resolve hostnames and get addrinfo */
+ while (i < brick_count) {
+ ++i;
+ brick = strtok_r (brick_list_dup, " \n", &tmpptr);
+ brick_list = tmpptr;
+ if (brick == NULL)
+ goto check_failed;
+ brick = strtok_r (brick, ":", &tmpptr);
+ if (brick == NULL)
+ goto check_failed;
+ ret = getaddrinfo (brick, NULL, NULL, &ai_info);
+ if (ret != 0) {
+ ret = 0;
+ gf_log (this->name, GF_LOG_ERROR, "unable to resolve "
+ "host name");
+ goto out;
+ }
+ ai_list_tmp1 = malloc (sizeof (addrinfo_list_t));
+ if (ai_list_tmp1 == NULL) {
+ ret = 0;
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate "
+ "memory");
+ goto out;
+ }
+ ai_list_tmp1->info = ai_info;
+ list_add_tail (&ai_list_tmp1->list, &ai_list->list);
+ ai_list_tmp1 = NULL;
+ }
+
+ i = 0;
+ ai_list_tmp1 = list_entry (ai_list->list.next, addrinfo_list_t, list);
+
+ /* Check for bad brick order */
+ while (i < brick_count) {
+ ++i;
+ ai_info = ai_list_tmp1->info;
+ ai_list_tmp1 = list_entry (ai_list_tmp1->list.next,
+ addrinfo_list_t, list);
+ if (0 == i % sub_count) {
+ j = 0;
+ continue;
+ }
+ ai_list_tmp2 = ai_list_tmp1;
+ k = j;
+ while (k < sub_count - 1) {
+ ++k;
+ ret = glusterd_compare_addrinfo (ai_info,
+ ai_list_tmp2->info);
+ if (GF_AI_COMPARE_ERROR == ret)
+ goto check_failed;
+ if (GF_AI_COMPARE_MATCH == ret)
+ goto found_bad_brick_order;
+ ai_list_tmp2 = list_entry (ai_list_tmp2->list.next,
+ addrinfo_list_t, list);
+ }
+ ++j;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Brick order okay");
+ ret = 0;
+ goto out;
+
+check_failed:
+ gf_log (this->name, GF_LOG_ERROR, "Failed bad brick order check");
+ snprintf (err_str, sizeof (failed_string), failed_string);
+ ret = -1;
+ goto out;
+
+found_bad_brick_order:
+ gf_log (this->name, GF_LOG_INFO, "Bad brick order found");
+ snprintf (err_str, sizeof (found_string), found_string);
+ ret = -1;
+out:
+ ai_list_tmp2 = NULL;
+ GF_FREE (brick_list_dup);
+ list_for_each_entry (ai_list_tmp1, &ai_list->list, list) {
+ if (ai_list_tmp1->info)
+ freeaddrinfo (ai_list_tmp1->info);
+ free (ai_list_tmp2);
+ ai_list_tmp2 = ai_list_tmp1;
+ }
+ free (ai_list_tmp2);
+ return ret;
+}
+
int
__glusterd_handle_create_volume (rpcsvc_request_t *req)
{
@@ -724,11 +934,12 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr,
char *bricks = NULL;
char *brick_list = NULL;
char *free_ptr = NULL;
- char key[PATH_MAX] = "";
+ char key[PATH_MAX] = "";
glusterd_brickinfo_t *brick_info = NULL;
int32_t brick_count = 0;
int32_t local_brick_count = 0;
int32_t i = 0;
+ int32_t type = 0;
char *brick = NULL;
char *tmpptr = NULL;
xlator_t *this = NULL;
@@ -881,6 +1092,30 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr,
brick_info = NULL;
}
+ /*Check brick order if the volume type is replicate or disperse. If
+ * force at the end of command not given then check brick order.
+ */
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get type of volume %s",
+ volname);
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg);
+ goto out;
+ }
+
+ if (!is_force) {
+ if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
+ (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) ||
+ (type == GF_CLUSTER_TYPE_DISPERSE)) {
+ ret = glusterd_check_brick_order(dict, msg);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Not creating "
+ "volume because of bad brick order");
+ goto out;
+ }
+ }
+ }
+
ret = dict_set_int32 (rsp_dict, "brick_count", local_brick_count);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 45656bef8e1..846603585b1 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -465,6 +465,17 @@ enum glusterd_vol_comp_status_ {
GLUSTERD_VOL_COMP_RJT,
};
+typedef struct addrinfo_list {
+ struct list_head list;
+ struct addrinfo *info;
+} addrinfo_list_t;
+
+typedef enum {
+ GF_AI_COMPARE_NO_MATCH = 0,
+ GF_AI_COMPARE_MATCH = 1,
+ GF_AI_COMPARE_ERROR = 2
+} gf_ai_compare_t;
+
#define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT
#define GLUSTERD_INFO_FILE "glusterd.info"
#define GLUSTERD_VOLUME_QUOTA_CONFIG "quota.conf"