summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2017-09-27 10:32:36 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-10-02 12:34:23 +0000
commitf5998f07dfd21d06a4119416ca79db50232b50d4 (patch)
treeb72521e7fd07cb7cec55029ad7bfbae9a12c62f2
parent5eab919dee035b9cf1b7f060bcf2d9eaa1e92eb3 (diff)
afr: auto-resolve split-brains for zero-byte files
Backport of https://review.gluster.org/#/c/18283/ Problems: As described in BZ 1491670, renaming hardlinks can result in data/mdata split-brain of the DHT link-to files (T files) without any mismatch of data and metadata. As described in BZ 1486063, for a zero-byte file with only dirty bits set, arbiter brick will likely be chosen as the source brick. Fix: For zero byte files in split-brain, pick first brick as a) data source if file size is zero on all bricks. b) metadata source if metadata is the same on all bricks In arbiter case, if file size is zero on all bricks and there are no pending afr xattrs, pick 1st brick as data source. Change-Id: I0270a9a2f97c3b21087e280bb890159b43975e04 BUG: 1496321 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reported-by: Rahul Hinduja <rhinduja@redhat.com> Reported-by: Mabi <mabi@protonmail.ch>
-rw-r--r--tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c65
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c6
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h8
4 files changed, 80 insertions, 1 deletions
diff --git a/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
index 81655074c9c..fe8e992e8f8 100644
--- a/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
+++ b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
@@ -28,7 +28,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0
TEST kill_brick $V0 $H0 $B0/$V0"0"
TEST stat $M0/datafile
TEST `echo append>>$M0/datafile`
-TEST chmod +x $M0/mdatafile
+TEST chmod -x $M0/mdatafile
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST ! cat $M0/datafile
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 484b7dca54e..0d08bee861e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -922,6 +922,65 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
return fav_child;
}
+int
+afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ int source = -1;
+ int i = 0;
+ afr_private_t *priv = this->private;
+ struct iatt stbuf = {0, };
+
+ if ((AFR_COUNT (locked_on, priv->child_count) < priv->child_count) ||
+ (afr_success_count(replies, priv->child_count) < priv->child_count))
+ return -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].poststat.ia_size != 0)
+ return -1;
+ }
+
+ if (type == AFR_DATA_TRANSACTION)
+ goto mark;
+
+ /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/
+ stbuf = replies[0].poststat;
+ for (i = 1; i < priv->child_count; i++) {
+ if ((!IA_EQUAL (stbuf, replies[i].poststat, type)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, prot)))
+ return -1;
+ }
+ for (i = 1; i < priv->child_count; i++) {
+ if (!afr_xattrs_are_equal (replies[0].xdata,
+ replies[i].xdata))
+ return -1;
+ }
+
+mark:
+ /* All bricks have a zero-byte file. Pick one of them as source. Rest
+ * are sinks.*/
+ for (i = 0 ; i < priv->child_count; i++) {
+ if (source == -1) {
+ source = i;
+ sources[i] = 1;
+ sinks[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
+ }
+ sources[i] = 0;
+ sinks[i] = 1;
+ healed_sinks[i] = 1;
+ }
+
+ return source;
+}
+
/* Return a source depending on the type of heal_op, and set sources[source],
* sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so
* only if the following condition is met:
@@ -950,6 +1009,12 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
priv = this->private;
xdata_req = local->xdata_req;
+ source = afr_mark_source_sinks_if_file_empty (this, sources, sinks,
+ healed_sinks, locked_on,
+ replies, type);
+ if (source >= 0)
+ return source;
+
ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
if (ret)
goto autoheal;
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 82c9ceb5c1f..e51add6ce5c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -579,6 +579,12 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
afr_mark_largest_file_as_source (this, sources, replies);
afr_mark_biggest_witness_as_source (this, sources, witness);
afr_mark_newest_file_as_source (this, sources, replies);
+ if (priv->arbiter_count)
+ /* Choose non-arbiter brick as source for empty files. */
+ afr_mark_source_sinks_if_file_empty (this, sources, sinks,
+ healed_sinks, locked_on,
+ replies,
+ AFR_DATA_TRANSACTION);
out:
afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index ded4903a4c7..92364d26735 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -317,4 +317,12 @@ afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources,
int
afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf);
+
+int
+afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type);
#endif /* !_AFR_SELFHEAL_H */