From 44dc74645144bfbca4aa0f08e36972359645a42c Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Tue, 1 Sep 2015 20:08:15 -0400 Subject: cluster/dht: maintain start state of rebalance daemon across graph switch. This is a backport of fix 10977. > When we did a graph switch on a rebalance daemon, a second call > to gf_degrag_start() was done. This lead to multiple threads > doing migration. When multiple threads try to move the same > file there can be deadlocks. > Change-Id: I931ca7fe600022f245e3dccaabb1ad004f732c56 > BUG: 1226005 Change-Id: I163d2d04692eba36c986ea9835f588962c92b93f BUG: 1259078 Signed-off-by: Dan Lambright Reviewed-on: http://review.gluster.org/12082 Tested-by: NetBSD Build System Reviewed-by: mohammed rafi kc --- xlators/cluster/dht/src/dht-common.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'xlators/cluster/dht/src/dht-common.c') diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 82a4c392b5a..f83ff6487a2 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -29,7 +29,11 @@ #include #include -int dht_link2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame); +int run_defrag = 0; + +int +dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); + int dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame); @@ -7604,11 +7608,16 @@ unlock: } } - /* rebalance is started with assert_no_child_down. So we do + /* Rebalance is started with assert_no_child_down. So we do * not need to handle CHILD_DOWN event here. + * + * If there is a graph switch, we should not restart the + * rebalance daemon. Use 'run_defrag' to indicate if the + * thread has already started. */ - if (conf->defrag) { + if (conf->defrag && !run_defrag) { if (methods->migration_needed(this)) { + run_defrag = 1; ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, this); -- cgit