summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-op-sm.c
diff options
context:
space:
mode:
authorMohammed Rafi KC <rkavunga@redhat.com>2019-02-25 10:05:32 +0530
committerAmar Tumballi <amarts@redhat.com>2019-04-01 03:44:23 +0000
commitbc3694d7cfc868a2ed6344ea123faf19fce28d13 (patch)
tree51764aa4445462081273444d5ff2499b1e5375f7 /xlators/mgmt/glusterd/src/glusterd-op-sm.c
parent92ae26ae8039847e38c738ef98835a14be9d4296 (diff)
mgmt/shd: Implement multiplexing in self heal daemon
Problem: Shd daemon is per node, which means they create a graph with all volumes on it. While this is a great for utilizing resources, it is so good in terms of performance and managebility. Because self-heal daemons doesn't have capability to automatically reconfigure their graphs. So each time when any configurations changes happens to the volumes(replicate/disperse), we need to restart shd to bring the changes into the graph. Because of this all on going heal for all other volumes has to be stopped in the middle, and need to restart all over again. Solution: This changes makes shd as a per volume daemon, so that the graph will be generated for each volumes. When we want to start/reconfigure shd for a volume, we first search for an existing shd running on the node, if there is none, we will start a new process. If already a daemon is running for shd, then we will simply detach a graph for a volume and reatach the updated graph for the volume. This won't touch any of the on going operations for any other volumes on the shd daemon. Example of an shd graph when it is per volume graph ----------------------- | debug-iostat | ----------------------- / | \ / | \ --------- --------- ---------- | AFR-1 | | AFR-2 | | AFR-3 | -------- --------- ---------- A running shd daemon with 3 volumes will be like--> graph ----------------------- | debug-iostat | ----------------------- / | \ / | \ ------------ ------------ ------------ | volume-1 | | volume-2 | | volume-3 | ------------ ------------ ------------ Change-Id: Idcb2698be3eeb95beaac47125565c93370afbd99 fixes: bz#1659708 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-op-sm.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c84
1 files changed, 54 insertions, 30 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 84c34f1fe4a..115622d35c6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -44,6 +44,7 @@
#include "glusterd-snapshot-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-svc-helper.h"
+#include "glusterd-shd-svc-helper.h"
#include "glusterd-shd-svc.h"
#include "glusterd-nfs-svc.h"
#include "glusterd-quotad-svc.h"
@@ -2202,6 +2203,11 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -2216,7 +2222,7 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret)
goto out;
}
@@ -2660,6 +2666,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -2673,7 +2684,7 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
}
}
if (svcs_reconfigure) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(NULL);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
"Unable to restart "
@@ -3054,6 +3065,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -3069,7 +3085,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
"Unable to restart services");
@@ -3102,6 +3118,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -3117,7 +3138,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
"Unable to restart services");
@@ -3324,7 +3345,7 @@ glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret)
goto out;
}
@@ -3607,14 +3628,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
other_count++;
node_count++;
- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, 0,
- vol_opts);
- if (ret)
- goto out;
- other_count++;
- node_count++;
-
} else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0,
vol_opts);
@@ -3648,6 +3661,12 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
other_count++;
node_count++;
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
} else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
if (ret)
@@ -3710,6 +3729,19 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
node_count++;
}
+ if (glusterd_is_shd_compatible_volume(volinfo)) {
+ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
+ if (shd_enabled) {
+ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict,
+ other_index);
+ if (ret)
+ goto out;
+ other_count++;
+ other_index++;
+ node_count++;
+ }
+ }
+
nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
_gf_false);
if (!nfs_disabled) {
@@ -3722,18 +3754,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
node_count++;
}
- if (glusterd_is_shd_compatible_volume(volinfo))
- shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
- if (shd_enabled) {
- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict,
- other_index, vol_opts);
- if (ret)
- goto out;
- other_count++;
- node_count++;
- other_index++;
- }
-
if (glusterd_is_volume_quota_enabled(volinfo)) {
ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict,
other_index, vol_opts);
@@ -6831,16 +6851,18 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
int ret = -1;
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
+ glusterd_svc_t *svc = NULL;
this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
+ svc = &(volinfo->shd.svc);
switch (heal_op) {
case GF_SHD_OP_INDEX_SUMMARY:
case GF_SHD_OP_STATISTICS_HEAL_COUNT:
- if (!priv->shd_svc.online) {
+ if (!svc->online) {
if (!rsp_dict) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
"Received "
@@ -6861,7 +6883,7 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
break;
case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
- if (!priv->shd_svc.online) {
+ if (!svc->online) {
if (!rsp_dict) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
"Received "
@@ -6996,7 +7018,7 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr,
ret = -1;
goto out;
} else {
- pending_node->node = &(priv->shd_svc);
+ pending_node->node = &(volinfo->shd.svc);
pending_node->type = GD_NODE_SHD;
cds_list_add_tail(&pending_node->list, selected);
pending_node = NULL;
@@ -7130,6 +7152,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
glusterd_pending_node_t *pending_node = NULL;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
+ glusterd_svc_t *svc = NULL;
GF_ASSERT(dict);
@@ -7225,7 +7248,8 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
ret = 0;
} else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
- if (!priv->shd_svc.online) {
+ svc = &(volinfo->shd.svc);
+ if (!svc->online) {
ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED,
"Self-heal daemon is not running");
@@ -7237,7 +7261,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
ret = -1;
goto out;
}
- pending_node->node = &(priv->shd_svc);
+ pending_node->node = svc;
pending_node->type = GD_NODE_SHD;
pending_node->index = 0;
cds_list_add_tail(&pending_node->list, selected);