summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c229
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c166
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h3
6 files changed, 343 insertions, 70 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 7227c6a7bef..11b16e216ec 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2412,6 +2412,7 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
int
glusterd_start_bricks (glusterd_volinfo_t *volinfo)
+
{
int ret = -1;
glusterd_brickinfo_t *brickinfo = NULL;
@@ -2440,14 +2441,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
goto out;
}
}
-
- }
- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
- if (ret) {
- gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
- "Failed to write volinfo for volume %s",
- volinfo->volname);
- goto out;
}
ret = 0;
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 7139a27ffb4..daf8f21cc6c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -274,8 +274,10 @@ glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
int32_t blk_count, double *throughput, double *time);
gf_boolean_t
glusterd_is_volume_started (glusterd_volinfo_t *volinfo);
+
int
glusterd_start_bricks (glusterd_volinfo_t *volinfo);
+
gf_boolean_t
glusterd_are_all_volumes_stopped ();
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index 19a3cf7b10d..85e92c39141 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -1760,8 +1760,11 @@ out:
* state, i.e either both would be hosting bricks or both would not be hosting
* bricks, then a decision can't be taken and a peer-reject will happen.
*
- * glusterd_compare_and_update_snap() implements the following algorithm to
- * perform the above task:
+ * glusterd_compare_snap() & glusterd_update_snaps () implement the following
+ * algorithm to perform the above task. Please note the former function tries to
+ * iterate over the snaps one at a time and updating the relevant fields in the
+ * dictionary and then glusterd_update_snaps () go over all the snaps and update
+ * them at one go as part of a synctask.
* Step 1: Start.
* Step 2: Check if the peer is missing a delete or restore on the said snap.
* If yes, goto step 6.
@@ -1786,21 +1789,18 @@ out:
*
*/
int32_t
-glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
- char *peername, uuid_t peerid)
+glusterd_compare_snap (dict_t *peer_data, int32_t snap_count,
+ char *peername, uuid_t peerid)
{
char buf[NAME_MAX] = "";
char prefix[NAME_MAX] = "";
char *peer_snap_name = NULL;
char *peer_snap_id = NULL;
- dict_t *dict = NULL;
glusterd_snap_t *snap = NULL;
gf_boolean_t conflict = _gf_false;
gf_boolean_t is_local = _gf_false;
gf_boolean_t is_hosted = _gf_false;
gf_boolean_t missed_delete = _gf_false;
- gf_boolean_t remove_lvm = _gf_true;
-
int32_t ret = -1;
int32_t volcount = 0;
xlator_t *this = NULL;
@@ -1812,6 +1812,14 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
+ ret = dict_set_uint32 (peer_data, buf, 0);
+ snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_set_uint32 (peer_data, buf, 0);
+ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_set_uint32 (peer_data, buf, 0);
+ snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
+ ret = dict_set_uint32 (peer_data, buf, 0);
+
/* Fetch the peer's snapname */
snprintf (buf, sizeof(buf), "%s.snapname", prefix);
ret = dict_get_str (peer_data, buf, &peer_snap_name);
@@ -1868,7 +1876,10 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
/* Peer has snap with the same snapname
* and snap_id, which local node doesn't have.
*/
- goto accept_peer_data;
+ snprintf (buf, sizeof(buf), "%s.accept_peer_data",
+ prefix);
+ ret = dict_set_uint32 (peer_data, buf, 1);
+ goto out;
}
/* Peer has snap with the same snapname
* and snap_id. Now check if peer has a
@@ -1895,12 +1906,18 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
* When removing data from local node, make sure
* we are not removing backend lvm of the snap.
*/
- remove_lvm = _gf_false;
- goto remove_my_data;
+ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_set_uint32 (peer_data, buf, 0);
+ snprintf (buf, sizeof(buf), "%s.remove_my_data",
+ prefix);
+ ret = dict_set_uint32 (peer_data, buf, 1);
+ snprintf (buf, sizeof(buf), "%s.accept_peer_data",
+ prefix);
+ ret = dict_set_uint32 (peer_data, buf, 1);
} else {
ret = 0;
- goto out;
}
+ goto out;
}
/* There is a conflict. Check if the current node is
@@ -1952,50 +1969,176 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
* And local node isn't. Hence remove local node's
* data and accept peer data
*/
-
gf_msg_debug (this->name, 0, "Peer hosts bricks for conflicting "
"snap(%s). Removing local data. Accepting peer data.",
peer_snap_name);
- remove_lvm = _gf_true;
+ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_set_uint32 (peer_data, buf, 1);
+ snprintf (buf, sizeof(buf), "%s.remove_my_data",
+ prefix);
+ ret = dict_set_uint32 (peer_data, buf, 1);
+ snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_set_uint32 (peer_data, buf, 1);
-remove_my_data:
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
- dict = dict_new();
- if (!dict) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_CREATE_FAIL,
- "Unable to create dict");
- ret = -1;
- goto out;
+int32_t
+glusterd_update_snaps_synctask (void *opaque)
+{
+ int32_t ret = -1;
+ int32_t snap_count = 0;
+ int i = 1;
+ xlator_t *this = NULL;
+ dict_t *peer_data = NULL;
+ char buf[NAME_MAX] = "";
+ char prefix[NAME_MAX] = "";
+ char *peer_snap_name = NULL;
+ char *peer_snap_id = NULL;
+ char *peername = NULL;
+ gf_boolean_t remove_lvm = _gf_false;
+ gf_boolean_t remove_my_data = _gf_false;
+ gf_boolean_t accept_peer_data = _gf_false;
+ int32_t val = 0;
+ glusterd_snap_t *snap = NULL;
+ dict_t *dict = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ peer_data = (dict_t *)opaque;
+ GF_ASSERT (peer_data);
+
+ synclock_lock (&conf->big_lock);
+
+ while (conf->restart_bricks) {
+ synclock_unlock (&conf->big_lock);
+ sleep (2);
+ synclock_lock (&conf->big_lock);
}
+ conf->restart_bricks = _gf_true;
- ret = glusterd_snap_remove (dict, snap, remove_lvm, _gf_false,
- _gf_false);
+ ret = dict_get_int32 (peer_data, "snap_count", &snap_count);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_SNAP_REMOVE_FAIL,
- "Failed to remove snap %s", snap->snapname);
+ GD_MSG_DICT_GET_FAILED, "Failed to fetch snap_count");
goto out;
}
-
-accept_peer_data:
-
- /* Accept Peer Data */
- ret = glusterd_import_friend_snap (peer_data, snap_count,
- peer_snap_name, peer_snap_id);
+ ret = dict_get_str (peer_data, "peername", &peername);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_SNAP_IMPORT_FAIL,
- "Failed to import snap %s from peer %s",
- peer_snap_name, peername);
+ GD_MSG_DICT_GET_FAILED, "Failed to fetch peername");
goto out;
}
+ for (i = 1; i <= snap_count; i++) {
+ snprintf (prefix, sizeof(prefix), "snap%d", i);
+
+ /* Fetch the peer's snapname */
+ snprintf (buf, sizeof(buf), "%s.snapname", prefix);
+ ret = dict_get_str (peer_data, buf, &peer_snap_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snapname from peer: %s",
+ peername);
+ goto out;
+ }
+
+ /* Fetch the peer's snap_id */
+ snprintf (buf, sizeof(buf), "%s.snap_id", prefix);
+ ret = dict_get_str (peer_data, buf, &peer_snap_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_id from peer: %s",
+ peername);
+ goto out;
+ }
+
+ /* remove_my_data */
+ snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
+ ret = dict_get_int32 (peer_data, buf, &val);
+ if (val)
+ remove_my_data = _gf_true;
+ else
+ remove_my_data = _gf_false;
+
+ if (remove_my_data) {
+ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_get_int32 (peer_data, buf, &val);
+ if (val)
+ remove_lvm = _gf_true;
+ else
+ remove_lvm = _gf_false;
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ ret = -1;
+ goto out;
+ }
+ snap = glusterd_find_snap_by_name (peer_snap_name);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_PRESENT,
+ "Snapshot %s from peer %s missing on "
+ "localhost", peer_snap_name,
+ peername);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_snap_remove (dict, snap, remove_lvm,
+ _gf_false, _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap %s",
+ snap->snapname);
+ goto out;
+ }
+ if (dict)
+ dict_unref (dict);
+ }
+ snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_get_int32 (peer_data, buf, &val);
+ if (val)
+ accept_peer_data = _gf_true;
+ else
+ accept_peer_data = _gf_false;
+
+ if (accept_peer_data) {
+ /* Accept Peer Data */
+ ret = glusterd_import_friend_snap (peer_data,
+ i,
+ peer_snap_name,
+ peer_snap_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_IMPORT_FAIL,
+ "Failed to import snap %s from peer %s",
+ peer_snap_name, peername);
+ goto out;
+ }
+ }
+ }
+
out:
+ if (peer_data)
+ dict_unref (peer_data);
if (dict)
dict_unref (dict);
+ conf->restart_bricks = _gf_false;
- gf_msg_trace (this->name, 0, "Returning %d", ret);
return ret;
}
@@ -2010,6 +2153,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
int32_t snap_count = 0;
int i = 1;
xlator_t *this = NULL;
+ dict_t *peer_data_copy = NULL;
this = THIS;
GF_ASSERT (this);
@@ -2025,8 +2169,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
for (i = 1; i <= snap_count; i++) {
/* Compare one snapshot from peer_data at a time */
- ret = glusterd_compare_and_update_snap (peer_data, i, peername,
- peerid);
+ ret = glusterd_compare_snap (peer_data, i, peername, peerid);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_SNAPSHOT_OP_FAILED,
@@ -2035,6 +2178,18 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
goto out;
}
}
+ /* Update the snaps at one go */
+ peer_data_copy = dict_copy_with_ref (peer_data, NULL);
+ ret = dict_set_str (peer_data_copy, "peername", peername);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set peername into the dict");
+ if (peer_data_copy)
+ dict_unref (peer_data_copy);
+ goto out;
+ }
+ glusterd_launch_synctask (glusterd_update_snaps_synctask,
+ peer_data_copy);
out:
gf_msg_trace (this->name, 0, "Returning %d", ret);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 7d1fc33b05d..5b2a3dda6d1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -3460,6 +3460,14 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count,
*status = GLUSTERD_VOL_COMP_SCS;
out:
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.update", count);
+
+ if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
+ ret = dict_set_int32 (peer_data, key, 1);
+ } else {
+ ret = dict_set_int32 (peer_data, key, 0);
+ }
if (*status == GLUSTERD_VOL_COMP_RJT) {
gf_event (EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
volinfo->volname);
@@ -3532,13 +3540,12 @@ glusterd_spawn_daemons (void *opaque)
int ret = -1;
synclock_lock (&conf->big_lock);
- glusterd_restart_bricks (conf);
+ glusterd_restart_bricks ();
glusterd_restart_gsyncds (conf);
glusterd_restart_rebalance (conf);
ret = glusterd_snapdsvc_restart ();
ret = glusterd_tierdsvc_restart ();
ret = glusterd_gfproxydsvc_restart ();
-
return ret;
}
@@ -4304,20 +4311,35 @@ out:
int32_t
glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo)
{
- int ret = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brick_proc_t *brick_proc = NULL;
+ int brick_count = 0;
+
GF_ASSERT (volinfo);
cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
if (glusterd_is_brick_started (brickinfo)) {
- ret = glusterd_brick_disconnect (brickinfo);
- if (ret) {
- gf_msg ("glusterd", GF_LOG_ERROR, 0,
- GD_MSD_BRICK_DISCONNECT_FAIL,
- "Failed to "
- "disconnect %s:%s", brickinfo->hostname,
- brickinfo->path);
- break;
+ /* If brick multiplexing is enabled then we can't
+ * blindly set brickinfo->rpc to NULL as it might impact
+ * the other attached bricks.
+ */
+ ret = glusterd_brick_proc_for_port (brickinfo->port,
+ &brick_proc);
+ if (!ret) {
+ brick_count = brick_proc->brick_count;
+ }
+ if (!is_brick_mx_enabled () || brick_count == 0) {
+ ret = glusterd_brick_disconnect (brickinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSD_BRICK_DISCONNECT_FAIL,
+ "Failed to "
+ "disconnect %s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+ break;
+ }
}
}
}
@@ -4556,7 +4578,7 @@ out:
}
int32_t
-glusterd_import_friend_volume (dict_t *peer_data, size_t count)
+glusterd_import_friend_volume (dict_t *peer_data, int count)
{
int32_t ret = -1;
@@ -4565,6 +4587,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
glusterd_volinfo_t *old_volinfo = NULL;
glusterd_volinfo_t *new_volinfo = NULL;
glusterd_svc_t *svc = NULL;
+ int32_t update = 0;
+ char key[512] = {0,};
GF_ASSERT (peer_data);
@@ -4572,6 +4596,15 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
GF_ASSERT (this);
priv = this->private;
GF_ASSERT (priv);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.update", count);
+ ret = dict_get_int32 (peer_data, key, &update);
+ if (ret || !update) {
+ /* if update is 0 that means the volume is not imported */
+ goto out;
+ }
+
ret = glusterd_import_volinfo (peer_data, count,
&new_volinfo, "volume");
if (ret)
@@ -4585,6 +4618,14 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo);
if (0 == ret) {
+ if (new_volinfo->version <= old_volinfo->version) {
+ /* When this condition is true, it already means that
+ * the other synctask thread of import volume has
+ * already up to date volume, so just ignore this volume
+ * now
+ */
+ goto out;
+ }
/* Ref count the old_volinfo such that deleting it doesn't crash
* if its been already in use by other thread
*/
@@ -4615,7 +4656,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
}
}
- ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+ ret = glusterd_store_volinfo (new_volinfo,
+ GLUSTERD_VOLINFO_VER_AC_NONE);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_VOLINFO_STORE_FAIL, "Failed to store "
@@ -4643,6 +4685,60 @@ out:
}
int32_t
+glusterd_import_friend_volumes_synctask (void *opaque)
+{
+ int32_t ret = -1;
+ int32_t count = 0;
+ int i = 1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ dict_t *peer_data = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ peer_data = (dict_t *)opaque;
+ GF_ASSERT (peer_data);
+
+ ret = dict_get_int32 (peer_data, "count", &count);
+ if (ret)
+ goto out;
+
+ synclock_lock (&conf->big_lock);
+
+ /* We need to ensure that importing a volume shouldn't race with an
+ * other thread where as part of restarting glusterd, bricks are
+ * restarted (refer glusterd_restart_bricks ())
+ */
+ while (conf->restart_bricks) {
+ synclock_unlock (&conf->big_lock);
+ sleep (2);
+ synclock_lock (&conf->big_lock);
+ }
+ conf->restart_bricks = _gf_true;
+
+ while (i <= count) {
+ ret = glusterd_import_friend_volume (peer_data, i);
+ if (ret) {
+ conf->restart_bricks = _gf_false;
+ goto out;
+ }
+ i++;
+ }
+ glusterd_svcs_manager (NULL);
+ conf->restart_bricks = _gf_false;
+out:
+ if (peer_data)
+ dict_unref (peer_data);
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
glusterd_import_friend_volumes (dict_t *peer_data)
{
int32_t ret = -1;
@@ -4781,8 +4877,10 @@ glusterd_import_global_opts (dict_t *friend_data)
* recompute if quorum is met. If quorum is not met bricks are
* not started and those already running are stopped
*/
- if (old_quorum != new_quorum)
- glusterd_restart_bricks (conf);
+ if (old_quorum != new_quorum) {
+ glusterd_launch_synctask (glusterd_restart_bricks,
+ NULL);
+ }
}
ret = 0;
@@ -4802,6 +4900,7 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
gf_boolean_t update = _gf_false;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
+ dict_t *peer_data_copy = NULL;
this = THIS;
GF_ASSERT (this);
@@ -4833,18 +4932,23 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
goto out;
}
if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
- ret = glusterd_import_friend_volume (peer_data, i);
- if (ret) {
- goto out;
- }
update = _gf_true;
- *status = GLUSTERD_VOL_COMP_NONE;
}
i++;
}
if (update) {
- glusterd_svcs_manager (NULL);
+ /* Launch the import friend volume as a separate synctask as it
+ * has to trigger start bricks where we may need to wait for the
+ * first brick to come up before attaching the subsequent bricks
+ * in case brick multiplexing is enabled
+ */
+ peer_data_copy = dict_copy_with_ref (peer_data, NULL);
+ glusterd_launch_synctask
+ (glusterd_import_friend_volumes_synctask,
+ peer_data_copy);
+ if (ret)
+ goto out;
}
out:
@@ -5994,7 +6098,7 @@ out:
}
int
-glusterd_restart_bricks (glusterd_conf_t *conf)
+glusterd_restart_bricks (void *opaque)
{
int ret = 0;
glusterd_volinfo_t *volinfo = NULL;
@@ -6002,6 +6106,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
glusterd_snap_t *snap = NULL;
gf_boolean_t start_svcs = _gf_false;
xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
int active_count = 0;
int quorum_count = 0;
gf_boolean_t node_quorum = _gf_false;
@@ -6012,6 +6117,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
conf = this->private;
GF_VALIDATE_OR_GOTO (this->name, conf, return_block);
+ /* We need to ensure that restarting the bricks during glusterd restart
+ * shouldn't race with the import volume thread (refer
+ * glusterd_compare_friend_data ())
+ */
+ while (conf->restart_bricks) {
+ synclock_unlock (&conf->big_lock);
+ sleep (2);
+ synclock_lock (&conf->big_lock);
+ }
+ conf->restart_bricks = _gf_true;
+
++(conf->blockers);
ret = glusterd_get_quorum_cluster_counts (this, &active_count,
&quorum_count);
@@ -6022,8 +6138,9 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
node_quorum = _gf_true;
cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
- if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ if (volinfo->status != GLUSTERD_STATUS_STARTED) {
continue;
+ }
gf_msg_debug (this->name, 0, "starting the volume %s",
volinfo->volname);
@@ -6130,6 +6247,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
out:
--(conf->blockers);
conf->restart_done = _gf_true;
+ conf->restart_bricks = _gf_false;
return_block:
return ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 347a2282b89..4b5b443db0e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -244,6 +244,10 @@ glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node);
int
glusterd_remote_hostname_get (rpcsvc_request_t *req,
char *remote_host, int len);
+
+int32_t
+glusterd_import_friend_volumes_synctask (void *opaque);
+
int32_t
glusterd_import_friend_volumes (dict_t *peer_data);
void
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index d56727744cb..e7a4c80a635 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -197,6 +197,7 @@ typedef struct {
int32_t workers;
uint32_t blockers;
uint32_t mgmt_v3_lock_timeout;
+ gf_boolean_t restart_bricks;
} glusterd_conf_t;
@@ -1081,7 +1082,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
dict_t *volumes, int count);
int
-glusterd_restart_bricks (glusterd_conf_t *conf);
+glusterd_restart_bricks ();
int32_t
glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags,