summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaushal M <kaushal@redhat.com>2013-08-12 10:43:52 +0530
committerAnand Avati <avati@redhat.com>2013-09-13 12:11:54 -0700
commit536eccde0bbda0166ca2a2769069e6b9f7ecbf89 (patch)
tree5a89ce2e6619f76c1f196eaa51605e99ca27f7b4
parent67b0e817720eb95aee706a11fdf4633607aacd82 (diff)
glusterd: Calculate volume op-versions only on set/resetv3.4.1qa2
Backport of http://review.gluster.org/5568 The volume op-versions are calculated during a volume set/reset, reading a volume from disk and importing a volume during probe or volume sync. The calculation of the volume op-version depends on the clusters op-version as some features are enabled automatically depending on the clusters op-version. We also don't store the volume op-versions persistently and don't export the volume op-versions during sync. Due to this, there can occur cases which will lead to inconsistencies in volumes in different peers. One such case is below, Consider, a cluster made up 3 peers P1, P2 and P3, operating at op-version N. The cluster has two volumes V1 and V2, which have volume op-versions N (since volume op-version cannot be greater than cluster op-version). We have, Cluster-op-version = N V1 op-version = N V2 op-version = N A set operation on V1 causes the clusters op-version to be bumped up to N+1. Assume that there exist some features that are automatically enabled on op-version N+1. The op-version of V2 remains at N as no operation has been performed on it. So, Cluster op-version = N+1 V1 op-version = N+1 V2 op-version = N Now, we probe a new peer P4. On the new peer we will have the following op-versions, Cluster op-version = N+1 V1 op-version = N+1 V2 op-version = N+1 This happens because we don't send volume op-versions during the sync after probe. P4 will freshly calculate the op-version of V2 (assuming features have been auto enabled due to the cluster op-version being N+1) as N+1. Another case is when glusterd on a peer restarts. Assume P3 was restarted, glusterd will recalculate the volume op-versions during the restore state. Again, op-version of V2 will be calculated as N+1 assuming auto enabled features. This will lead to inconsistency in the volume representation in memory and on disk, as glusterd will assume the volume contains auto enabled features, but the volfiles don't contain them as they were not regenrated. These kind of issues can be solved by calculating the volume op-version only when features are enabled and disabled (ie. during volume set/reset), persisting the volume-op-versions and exporting/importing them. BUG: 1005043 Change-Id: Id8bb05ba2a77e510739b3b1833f98b4d6d1fa4d7 Signed-off-by: Kaushal M <kaushal@redhat.com> Reviewed-on: http://review.gluster.org/5832 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c49
3 files changed, 69 insertions, 6 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index fd82221..ae0c4e8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -583,6 +583,17 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
goto out;
}
+ snprintf (buf, sizeof (buf), "%d", volinfo->op_version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ buf);
+ if (ret)
+ goto out;
+
out:
if (ret)
gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume "
@@ -1736,7 +1747,6 @@ glusterd_store_retrieve_volume (char *volname)
gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
ret = glusterd_volinfo_new (&volinfo);
-
if (ret)
goto out;
@@ -1749,12 +1759,10 @@ glusterd_store_retrieve_volume (char *volname)
GLUSTERD_VOLUME_INFO_FILE);
ret = gf_store_handle_retrieve (path, &volinfo->shandle);
-
if (ret)
goto out;
ret = gf_store_iter_new (volinfo->shandle, &iter);
-
if (ret)
goto out;
@@ -1825,6 +1833,12 @@ glusterd_store_retrieve_volume (char *volname)
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_BACKEND,
strlen (GLUSTERD_STORE_KEY_VOL_BACKEND))) {
volinfo->backend = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_OP_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_OP_VERSION))) {
+ volinfo->op_version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) {
+ volinfo->client_op_version = atoi (value);
} else {
if (is_key_glusterd_hooks_friendly (key)) {
@@ -1903,6 +1917,9 @@ glusterd_store_retrieve_volume (char *volname)
volinfo->subvol_count = (volinfo->brick_count /
volinfo->dist_leaf_count);
+ /* Only calculate volume op-versions if they are not found */
+ if (!volinfo->op_version && !volinfo->client_op_version)
+ gd_update_volume_op_versions (volinfo);
}
if (op_errno != GD_STORE_EOF)
@@ -1921,7 +1938,6 @@ glusterd_store_retrieve_volume (char *volname)
if (ret)
goto out;
- gd_update_volume_op_versions (volinfo);
list_add_tail (&volinfo->vol_list, &priv->volumes);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 30c6e09..4f39bdf 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -56,6 +56,8 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op"
#define GLUSTERD_STORE_KEY_USERNAME "username"
#define GLUSTERD_STORE_KEY_PASSWORD "password"
+#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version"
+#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"
#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 27c9bdc..ad5765d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2077,6 +2077,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
i++;
}
+ /* Add volume op-versions to dict. This prevents volume inconsistencies
+ * in the cluster
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.op-version", count);
+ ret = dict_set_int32 (dict, key, volinfo->op_version);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.client-op-version", count);
+ ret = dict_set_int32 (dict, key, volinfo->client_op_version);
out:
GF_FREE (volume_id_str);
@@ -2666,6 +2677,8 @@ glusterd_import_volinfo (dict_t *vols, int count,
int rb_status = 0;
char *rebalance_id_str = NULL;
char *rb_id_str = NULL;
+ int op_version = 0;
+ int client_op_version = 0;
GF_ASSERT (vols);
GF_ASSERT (volinfo);
@@ -2892,6 +2905,40 @@ glusterd_import_volinfo (dict_t *vols, int count,
ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo);
if (ret)
goto out;
+
+ /* Import the volume's op-versions if available else set it to 1.
+ * Not having op-versions implies this informtation was obtained from a
+ * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version
+ * 1 and all volumes are at op-versions 1.
+ *
+ * Either both the volume op-versions should be absent or both should be
+ * present. Only one being present is a failure
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.op-version", count);
+ ret = dict_get_int32 (vols, key, &op_version);
+ if (ret)
+ ret = 0;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.client-op-version", count);
+ ret = dict_get_int32 (vols, key, &client_op_version);
+ if (ret)
+ ret = 0;
+
+ if (op_version && client_op_version) {
+ new_volinfo->op_version = op_version;
+ new_volinfo->client_op_version = client_op_version;
+ } else if (((op_version == 0) && (client_op_version != 0)) ||
+ ((op_version != 0) && (client_op_version == 0))) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Only one volume op-version found");
+ goto out;
+ } else {
+ new_volinfo->op_version = 1;
+ new_volinfo->client_op_version = 1;
+ }
+
ret = glusterd_import_bricks (vols, count, new_volinfo);
if (ret)
goto out;
@@ -3059,8 +3106,6 @@ glusterd_import_friend_volume (dict_t *vols, size_t count)
(void) glusterd_start_bricks (new_volinfo);
}
- gd_update_volume_op_versions (new_volinfo);
-
ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
ret = glusterd_create_volfiles_and_notify_services (new_volinfo);
if (ret)