summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-rebalance.c
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2016-05-14 17:51:44 -0400
committerDan Lambright <dlambrig@redhat.com>2016-05-16 05:21:56 -0700
commitc6355e1dbf02c1d06f2d7b01633fbc3fb13500b7 (patch)
tree0a9828ce184547d49436bd201b2e65fc0cddc2fa /xlators/cluster/dht/src/dht-rebalance.c
parentbead4b82398f478941c2c9153d848f528fbe361e (diff)
tier/detach: Clear tier-fix-layout-complete xattr after migration threads join
Previously we had wrongly placed the clearing tier-fix-layout-complete xattr before the joining of migration threads. This would lead to situations where failure of clearing the xattr would cause the premature death of migration threads. Now we clear the xattr only after the data movement threads join, ensuring that all migration is done. This is a backport of 14285 > Change-Id: I829b671efa165ae13dbff7b00707434970b37a09 > BUG: 1334839 > Signed-off-by: Joseph Fernandes <josferna@redhat.com> Signed-off-by: Dan Lambright <dlambrig@redhat.com> Change-Id: I475242e6a05cacd2252dc5c29b160e7abc5d1791 BUG: 1336148 Reviewed-on: http://review.gluster.org/14341 Smoke: Gluster Build System <jenkins@build.gluster.com> Tested-by: N Balachandran <nbalacha@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: N Balachandran <nbalacha@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-rebalance.c')
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c75
1 files changed, 42 insertions, 33 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index abdb8ebb0a0..d7632376c58 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -3347,39 +3347,45 @@ out:
return ret;
}
-int
+void
gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
{
int ret = -1;
dict_t *dict = NULL;
- /* Check if background fixlayout is completed. */
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, defrag, out);
+
+ /* Check if background fixlayout is completed. This is not
+ * multi-process safe i.e there is a possibility that by the time
+ * we move to remove the xattr there it might have been cleared by some
+ * other detach process from other node. We ignore the error if such
+ * a thing happens */
ret = syncop_getxattr (this, loc, &dict,
GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, NULL);
if (ret) {
/* Background fixlayout not complete - nothing to clear*/
- gf_log (this->name, GF_LOG_WARNING,
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_LOG_TIER_STATUS,
"Unable to retrieve fixlayout xattr."
"Assume background fix layout not complete");
- ret = 0;
goto out;
}
ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_LOG_TIER_STATUS,
"Failed removing tier fix layout "
"xattr from %s", loc->path);
- defrag->total_failures++;
- ret = -1;
goto out;
}
ret = 0;
out:
if (dict)
dict_unref (dict);
- return ret;
}
void
@@ -3396,24 +3402,25 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
int
gf_defrag_start_crawl (void *data)
{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- int ret = -1;
- loc_t loc = {0,};
- struct iatt iatt = {0,};
- struct iatt parent = {0,};
- dict_t *fix_layout = NULL;
- dict_t *migrate_data = NULL;
- dict_t *status = NULL;
- dict_t *dict = NULL;
- glusterfs_ctx_t *ctx = NULL;
- dht_methods_t *methods = NULL;
- int i = 0;
- int thread_index = 0;
- int err = 0;
- int thread_spawn_count = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ dict_t *dict = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ dht_methods_t *methods = NULL;
+ int i = 0;
+ int thread_index = 0;
+ int err = 0;
+ int thread_spawn_count = 0;
pthread_t tid[MAX_MIGRATOR_THREAD_COUNT];
+ gf_boolean_t is_tier_detach = _gf_false;
this = data;
if (!this)
@@ -3618,14 +3625,9 @@ gf_defrag_start_crawl (void *data)
goto out;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- /* If its was a detach remove the tier fix-layout
- * xattr on root */
- ret = gf_tier_clear_fix_layout (this, &loc, defrag);
- if (ret) {
- goto out;
- }
- }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
+ is_tier_detach = _gf_true;
+
}
gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed");
@@ -3661,6 +3663,12 @@ out:
gf_tier_wait_fix_lookup (defrag);
}
+ if (is_tier_detach && ret == 0) {
+ /* If it was a detach remove the tier fix-layout
+ * xattr on root. Ignoring the failure, as nothing has to be
+ * done, logging is done in gf_tier_clear_fix_layout */
+ gf_tier_clear_fix_layout (this, &loc, defrag);
+ }
if (defrag->queue) {
gf_dirent_free (defrag->queue[0].df_entry);
@@ -3700,6 +3708,7 @@ exit:
}
+
static int
gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
{