summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/tier.c
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2015-12-16 15:28:57 -0500
committerDan Lambright <dlambrig@redhat.com>2015-12-21 05:23:20 -0800
commit3a094f1de03b3da8cdff650c14e46aab87e1905b (patch)
tree8786ebd5baf1cf146d6b2e6efed77c58a9d53f0c /xlators/cluster/dht/src/tier.c
parentd2f48214d436be633efb1136ee951b0736935143 (diff)
cluster/tier: do not block in synctask created from pause tier
We had run sleep() in the pause tier callback. Blocking within a synctask is dangerous. The sleep() call does not inform the synctask scheduler that a thread is no longer running. It therefore believes it is running. If a second synctask already exists, it may not be able to run. This occurs if the thread limit in the pool has been reached. Note the pool size only grows when a synctask is created, not when it is moved from wait state to run state, as is the case when an FOP completes. When the tier is paused during migration, synctasks already exist waiting for responses to FOPs to the server with high probability. The fix is to yield() in the RPC callback, which will place the synctask into the wait queue and free up a thread for the FOP callback. A timer wakes the callback after sufficient time has elapsed for the pause to occur. Change-Id: I6a947ee04c6e5649946cb6d8207ba17263a67fc6 BUG: 1267950 Signed-off-by: Dan Lambright <dlambrig@redhat.com> Reviewed-on: http://review.gluster.org/12987 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/tier.c')
-rw-r--r--xlators/cluster/dht/src/tier.c54
1 files changed, 49 insertions, 5 deletions
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 10036368720..2f415c4dbc2 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -1374,10 +1374,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
goto out;
}
- if (defrag->tier_conf.request_pause)
- defrag->tier_conf.paused = _gf_true;
- else
- defrag->tier_conf.paused = _gf_false;
+ if (tier_conf->request_pause)
+ gf_defrag_wake_pause_tier (tier_conf, _gf_true);
sleep(1);
@@ -1808,6 +1806,8 @@ tier_init (xlator_t *this)
defrag->tier_conf.request_pause = 0;
+ pthread_mutex_init (&defrag->tier_conf.pause_mutex, 0);
+
ret = dict_get_str (this->options,
"tier-pause", &paused);
@@ -1876,6 +1876,40 @@ out:
int
+tier_cli_pause_done (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ gf_msg ("tier", GF_LOG_INFO, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Migrate file paused with op_ret %d", op_ret);
+
+ return op_ret;
+}
+
+int
+tier_cli_pause (void *data)
+{
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ this = data;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, exit);
+
+ defrag = conf->defrag;
+ GF_VALIDATE_OR_GOTO (this->name, defrag, exit);
+
+ gf_defrag_pause_tier (this, defrag);
+
+ ret = 0;
+exit:
+ return ret;
+}
+
+
+int
tier_reconfigure (xlator_t *this, dict_t *options)
{
dht_conf_t *conf = NULL;
@@ -1884,6 +1918,7 @@ tier_reconfigure (xlator_t *this, dict_t *options)
int migrate_mb = 0;
gf_boolean_t req_pause = _gf_false;
int ret = 0;
+ call_frame_t *frame = NULL;
conf = this->private;
@@ -1932,7 +1967,16 @@ tier_reconfigure (xlator_t *this, dict_t *options)
bool, out);
if (req_pause == _gf_true) {
- ret = gf_defrag_pause_tier (this, defrag);
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ ret = synctask_new (this->ctx->env, tier_cli_pause,
+ tier_cli_pause_done, frame, this);
+
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_LOG_TIER_ERROR,