summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/defaults-tmpl.c7
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h1
-rw-r--r--libglusterfs/src/glusterfs/xlator.h2
-rw-r--r--libglusterfs/src/graph.c1
-rw-r--r--libglusterfs/src/libglusterfs.sym1
-rw-r--r--libglusterfs/src/xlator.c23
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c8
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h2
-rw-r--r--tests/basic/graph-cleanup-brick-down-shd-mux.t64
-rw-r--r--tests/basic/volume-scale-shd-mux.t7
-rw-r--r--xlators/protocol/client/src/client.c65
11 files changed, 169 insertions, 12 deletions
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
index 82e7f78d7f3..3cf707f42aa 100644
--- a/libglusterfs/src/defaults-tmpl.c
+++ b/libglusterfs/src/defaults-tmpl.c
@@ -171,8 +171,11 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
/* Make sure this is not a daemon with master xlator */
pthread_mutex_lock(&graph->mutex);
{
- graph->used = 0;
- pthread_cond_broadcast(&graph->child_down_cond);
+ if (graph->parent_down ==
+ graph_total_client_xlator(graph)) {
+ graph->used = 0;
+ pthread_cond_broadcast(&graph->child_down_cond);
+ }
}
pthread_mutex_unlock(&graph->mutex);
}
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index 01262dcd9f5..155bf435386 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -594,6 +594,7 @@ struct _glusterfs_graph {
in client multiplexed code path */
pthread_mutex_t mutex;
pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ int parent_down;
char graph_uuid[128];
};
typedef struct _glusterfs_graph glusterfs_graph_t;
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
index 6449e59f484..6608d6cdf0d 100644
--- a/libglusterfs/src/glusterfs/xlator.h
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -1095,4 +1095,6 @@ mgmt_is_multiplexed_daemon(char *name);
gf_boolean_t
xlator_is_cleanup_starting(xlator_t *this);
+int
+graph_total_client_xlator(glusterfs_graph_t *graph);
#endif /* _XLATOR_H */
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index bbc5ad68d94..e6ae40db2ed 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -1695,6 +1695,7 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
"failed to construct the graph");
goto out;
}
+ graph->parent_down = 0;
graph->last_xl = glusterfs_get_last_xlator(graph);
for (xl = graph->first; xl; xl = xl->next) {
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index 2e83d3f1003..dc7382ba749 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -1169,3 +1169,4 @@ glusterfs_process_svc_detach
mgmt_is_multiplexed_daemon
xlator_is_cleanup_starting
gf_nanosleep
+graph_total_client_xlator
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 9906809f7aa..8605fbd0e6f 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -1542,3 +1542,26 @@ xlator_is_cleanup_starting(xlator_t *this)
out:
return cleanup;
}
+
+int
+graph_total_client_xlator(glusterfs_graph_t *graph)
+{
+ xlator_t *xl = NULL;
+ int count = 0;
+
+ if (!graph) {
+ gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "graph object is null");
+ goto out;
+ }
+
+ xl = graph->first;
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0) {
+ count++;
+ }
+ xl = xl->next;
+ }
+out:
+ return count;
+}
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index 8ef05378351..aa65a1f8766 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -1858,7 +1858,7 @@ rpc_clnt_unref(struct rpc_clnt *rpc)
return rpc;
}
-void
+int
rpc_clnt_disable(struct rpc_clnt *rpc)
{
rpc_clnt_connection_t *conn = NULL;
@@ -1902,8 +1902,9 @@ rpc_clnt_disable(struct rpc_clnt *rpc)
}
pthread_mutex_unlock(&conn->lock);
+ ret = -1;
if (trans) {
- rpc_transport_disconnect(trans, _gf_true);
+ ret = rpc_transport_disconnect(trans, _gf_true);
/* The auth_value was being reset to AUTH_GLUSTERFS_v2.
* if (clnt->auth_value)
* clnt->auth_value = AUTH_GLUSTERFS_v2;
@@ -1919,7 +1920,6 @@ rpc_clnt_disable(struct rpc_clnt *rpc)
* on a connected transport and hence its strictly serialized.
*/
}
-
if (unref)
rpc_clnt_unref(rpc);
@@ -1930,7 +1930,7 @@ rpc_clnt_disable(struct rpc_clnt *rpc)
rpc_clnt_unref(rpc);
out:
- return;
+ return ret;
}
void
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index b46feed50c8..1d3274bbddd 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -250,7 +250,7 @@ int
rpcclnt_cbk_program_register(struct rpc_clnt *svc,
rpcclnt_cb_program_t *program, void *mydata);
-void
+int
rpc_clnt_disable(struct rpc_clnt *rpc);
int
diff --git a/tests/basic/graph-cleanup-brick-down-shd-mux.t b/tests/basic/graph-cleanup-brick-down-shd-mux.t
new file mode 100644
index 00000000000..3c621cdcc26
--- /dev/null
+++ b/tests/basic/graph-cleanup-brick-down-shd-mux.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 2); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#kill one brick and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill an entire subvol and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+#wait for some time to create a race sceanrio
+sleep 1
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill all bricks and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+#wait for some time to create a race sceanrio
+sleep 2
+
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+cleanup
diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t
index 89b833d5ddc..d1ddcbca7dd 100644
--- a/tests/basic/volume-scale-shd-mux.t
+++ b/tests/basic/volume-scale-shd-mux.t
@@ -23,8 +23,6 @@ for i in $(seq 1 2); do
done
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait"
#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
@@ -38,9 +36,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_sh
#Remove the brick and check the detach is successful
$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force
-
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5};
#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18)
@@ -92,6 +90,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd
TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait"
+#Before stopping the process, make sure there is no pending clenup threads hanging
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
+
TEST $CLI volume stop ${V0}
TEST $CLI volume delete ${V0}
EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 776e7160c51..45e7bfedf91 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -61,9 +61,54 @@ out:
}
int
+client_is_last_child_down(xlator_t *this, int32_t event, struct rpc_clnt *rpc)
+{
+ rpc_clnt_connection_t *conn = NULL;
+ int ret = 0;
+
+ clnt_conf_t *conf = this->private;
+ if (!this || !rpc || !conf)
+ goto out;
+
+ if (!conf->parent_down)
+ goto out;
+ conn = &rpc->conn;
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (event == GF_EVENT_CHILD_DOWN && !conn->reconnect && rpc->disabled) {
+ ret = 1;
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+out:
+ return ret;
+}
+
+int
client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...)
{
clnt_conf_t *conf = this->private;
+ glusterfs_ctx_t *ctx = this->ctx;
+ glusterfs_graph_t *graph = this->graph;
+
+ pthread_mutex_lock(&ctx->notify_lock);
+ {
+ while (ctx->notifying)
+ pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock);
+
+ if (client_is_last_child_down(this, event, data) && graph) {
+ pthread_mutex_lock(&graph->mutex);
+ {
+ graph->parent_down++;
+ if (graph->parent_down == graph_total_client_xlator(graph)) {
+ graph->used = 0;
+ pthread_cond_broadcast(&graph->child_down_cond);
+ }
+ }
+ pthread_mutex_unlock(&graph->mutex);
+ }
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
if (conf->last_sent_event == event)
return 0;
@@ -81,6 +126,7 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...)
{
int ret = -1;
glusterfs_ctx_t *ctx = this->ctx;
+
clnt_conf_t *conf = this->private;
pthread_mutex_lock(&ctx->notify_lock);
@@ -94,6 +140,7 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...)
/* We assume that all translators in the graph handle notification
* events in sequence.
* */
+
ret = default_notify(this, event, data);
/* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed
@@ -2376,7 +2423,7 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
replicate), hence make sure events which are passed
to parent are genuine */
ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN,
- NULL);
+ rpc);
if (is_parent_down) {
/* If parent is down, then there should not be any
* operation after a child down.
@@ -2424,6 +2471,8 @@ int
notify(xlator_t *this, int32_t event, void *data, ...)
{
clnt_conf_t *conf = NULL;
+ glusterfs_graph_t *graph = this->graph;
+ int ret = -1;
conf = this->private;
if (!conf)
@@ -2450,7 +2499,19 @@ notify(xlator_t *this, int32_t event, void *data, ...)
}
pthread_mutex_unlock(&conf->lock);
- rpc_clnt_disable(conf->rpc);
+ ret = rpc_clnt_disable(conf->rpc);
+ if (ret == -1 && graph) {
+ pthread_mutex_lock(&graph->mutex);
+ {
+ graph->parent_down++;
+ if (graph->parent_down ==
+ graph_total_client_xlator(graph)) {
+ graph->used = 0;
+ pthread_cond_broadcast(&graph->child_down_cond);
+ }
+ }
+ pthread_mutex_unlock(&graph->mutex);
+ }
break;
default: