From d80846bee0621f591a7b519743e4d91a620ccdca Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Wed, 11 Apr 2012 11:47:17 +0530 Subject: dht/rebalance: Handle ASSERT_ON_CHILD_DOWN gracefully In rebalance process, instead of terminating, send a stop event. The migration in question will either complete or be handled as error. Also, handle few syncop calls as errors, instead of just logging them. Change-Id: If6ed54474cb0f1fe0e28a5765e6d90966740dfde BUG: 811444 Signed-off-by: shishir gowda Reviewed-on: http://review.gluster.com/3120 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-common.c | 6 +++++- xlators/cluster/dht/src/dht-rebalance.c | 14 +++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index d58cac9fae4..57bacb88865 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -4568,7 +4568,11 @@ dht_notify (xlator_t *this, int event, void *data, ...) if (conf->assert_no_child_down) { gf_log (this->name, GF_LOG_WARNING, "Received CHILD_DOWN. Exiting"); - kill (getpid(), SIGTERM); + if (conf->defrag) { + gf_defrag_stop (conf->defrag, NULL); + } else { + kill (getpid(), SIGTERM); + } } for (i = 0; i < conf->subvolume_cnt; i++) { diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 611ea590102..5e46a80a127 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -784,6 +784,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform setattr on %s (%s)", loc->path, to->name, strerror (errno)); + goto out; } /* Because 'futimes' is not portable */ @@ -804,6 +805,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, \ "%s: failed to perform setattr on %s (%s)", loc->path, from->name, strerror (errno)); + goto out; } /* Do a stat and check the gfid before unlink */ @@ -812,6 +814,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, "%s: failed to do a stat on %s (%s)", loc->path, from->name, strerror (errno)); + goto out; } if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) { @@ -821,6 +824,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform unlink on %s (%s)", loc->path, from->name, strerror (errno)); + goto out; } } @@ -1272,8 +1276,11 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) - gf_defrag_migrate_data (this, defrag, loc, migrate_data); + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { + ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data); + if (ret) + goto out; + } gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path); @@ -1598,7 +1605,8 @@ gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output) defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED; - gf_defrag_status_get (defrag, output); + if (output) + gf_defrag_status_get (defrag, output); ret = 0; out: gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); -- cgit