From 088ad9d931a4d9981e33dd28385fb5844cb89526 Mon Sep 17 00:00:00 2001 From: Tamar Shacked Date: Wed, 15 Jul 2020 10:20:54 +0300 Subject: cluster/dht: suppress file migration error for node not supposed to migrate file A rebalance process does a lookup for every file in the dir it is processing before checking if it supposed to migrate the file. In this issue there are two rebalance processses running on a replica subvol: R1 is migrating the FILE. R2 is not supposed to migrate the FILE, but it does a lookup and finds a stale linkfile which is mostly due to a stale layout. Then, it tries to unlink the stale linkfile and gets EBUSY as the linkfile fd is open due R1 migration. As a result a misleading error msg about FILE migration failure due EBUSY is logged in R2 logfile. Fix: suppress the error in case it occured in a node that is not supposed to migrate the file. fixes: #1371 Change-Id: I37832b404e2b0cc40ac5caf45f14c32c891e71f3 Signed-off-by: Tamar Shacked --- xlators/cluster/dht/src/dht-rebalance.c | 38 ++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'xlators/cluster/dht/src') diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index af74d1a33e7..c141ffce90d 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -2558,10 +2558,10 @@ out: * all hardlinks. */ -int +gf_boolean_t gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) { - int ret = 0; + gf_boolean_t ret = _gf_false; int i = local_subvol_index; char *str = NULL; uint32_t hashval = 0; @@ -2583,12 +2583,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) } str = uuid_utoa_r(gfid, buf); - ret = dht_hash_compute(this, 0, str, &hashval); - if (ret == 0) { + if (dht_hash_compute(this, 0, str, &hashval) == 0) { index = (hashval % entry->count); if (entry->elements[index].info == REBAL_NODEUUID_MINE) { /* Index matches this node's nodeuuid.*/ - ret = 1; + ret = _gf_true; goto out; } @@ -2601,12 +2600,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) /* None of the bricks in the subvol are up. * CHILD_DOWN will kill the process soon */ - return 0; + return _gf_false; } if (entry->elements[index].info == REBAL_NODEUUID_MINE) { /* Index matches this node's nodeuuid.*/ - ret = 1; + ret = _gf_true; goto out; } } @@ -2655,6 +2654,7 @@ gf_defrag_migrate_single_file(void *opaque) struct iatt *iatt_ptr = NULL; gf_boolean_t update_skippedcount = _gf_true; int i = 0; + gf_boolean_t should_i_migrate = 0; rebal_entry = (struct dht_container *)opaque; if (!rebal_entry) { @@ -2709,11 +2709,29 @@ gf_defrag_migrate_single_file(void *opaque) goto out; } + should_i_migrate = gf_defrag_should_i_migrate( + this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid); + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); gf_uuid_copy(entry_loc.pargfid, loc->gfid); ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); + + if (!should_i_migrate) { + /* this node isn't supposed to migrate the file. suppressing any + * potential error from lookup as this file is under migration by + * another node */ + if (ret) { + gf_msg_debug(this->name, -ret, + "Ignoring lookup failure: node isn't migrating %s", + entry_loc.path); + ret = 0; + } + gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); + goto out; + } + if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, "Migrate file failed: %s lookup failed", entry_loc.path); @@ -2734,12 +2752,6 @@ gf_defrag_migrate_single_file(void *opaque) goto out; } - if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, - entry->d_stat.ia_gfid)) { - gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); - goto out; - } - iatt_ptr = &iatt; hashed_subvol = dht_subvol_get_hashed(this, &entry_loc); -- cgit