summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2017-09-14 11:29:15 +0530
committerjiffin tony Thottan <jthottan@redhat.com>2017-10-05 12:17:05 +0000
commitc0d4b32cb028e8b4928ace22468d692fb7e42ca4 (patch)
treec55668d6fd1dc7b62588ca320db6546a04f674f3
parent761942e9fe8f6d7bbd5c56720e52dc4f6663cd9f (diff)
afr: auto-resolve split-brains for zero-byte files
Problems: As described in BZ 1491670, renaming hardlinks can result in data/mdata split-brain of the DHT link-to files (T files) without any mismatch of data and metadata. As described in BZ 1486063, for a zero-byte file with only dirty bits set, arbiter brick will likely be chosen as the source brick. Fix: For zero byte files in split-brain, pick first brick as a) data source if file size is zero on all bricks. b) metadata source if metadata is the same on all bricks In arbiter case, if file size is zero on all bricks and there are no pending afr xattrs, pick 1st brick as data source. (cherry picked from commit 1719cffa911c5287715abfdb991bc8862f0c994e) Change-Id: I0270a9a2f97c3b21087e280bb890159b43975e04 BUG: 1496317 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reported-by: Rahul Hinduja <rhinduja@redhat.com> Reported-by: Mabi <mabi@protonmail.ch>
-rw-r--r--tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c65
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c6
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h8
4 files changed, 80 insertions, 1 deletions
diff --git a/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
index 81655074c9c..fe8e992e8f8 100644
--- a/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
+++ b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
@@ -28,7 +28,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0
TEST kill_brick $V0 $H0 $B0/$V0"0"
TEST stat $M0/datafile
TEST `echo append>>$M0/datafile`
-TEST chmod +x $M0/mdatafile
+TEST chmod -x $M0/mdatafile
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST ! cat $M0/datafile
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 998289711df..6a159dc67d0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1172,6 +1172,65 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
return fav_child;
}
+int
+afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ int source = -1;
+ int i = 0;
+ afr_private_t *priv = this->private;
+ struct iatt stbuf = {0, };
+
+ if ((AFR_COUNT (locked_on, priv->child_count) < priv->child_count) ||
+ (afr_success_count(replies, priv->child_count) < priv->child_count))
+ return -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].poststat.ia_size != 0)
+ return -1;
+ }
+
+ if (type == AFR_DATA_TRANSACTION)
+ goto mark;
+
+ /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/
+ stbuf = replies[0].poststat;
+ for (i = 1; i < priv->child_count; i++) {
+ if ((!IA_EQUAL (stbuf, replies[i].poststat, type)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, prot)))
+ return -1;
+ }
+ for (i = 1; i < priv->child_count; i++) {
+ if (!afr_xattrs_are_equal (replies[0].xdata,
+ replies[i].xdata))
+ return -1;
+ }
+
+mark:
+ /* All bricks have a zero-byte file. Pick one of them as source. Rest
+ * are sinks.*/
+ for (i = 0 ; i < priv->child_count; i++) {
+ if (source == -1) {
+ source = i;
+ sources[i] = 1;
+ sinks[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
+ }
+ sources[i] = 0;
+ sinks[i] = 1;
+ healed_sinks[i] = 1;
+ }
+
+ return source;
+}
+
/* Return a source depending on the type of heal_op, and set sources[source],
* sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so
* only if the following condition is met:
@@ -1200,6 +1259,12 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
priv = this->private;
xdata_req = local->xdata_req;
+ source = afr_mark_source_sinks_if_file_empty (this, sources, sinks,
+ healed_sinks, locked_on,
+ replies, type);
+ if (source >= 0)
+ return source;
+
ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
if (ret)
goto autoheal;
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 13679608dfd..2c254e80aa1 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -581,6 +581,12 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
afr_mark_largest_file_as_source (this, sources, replies);
afr_mark_biggest_witness_as_source (this, sources, witness);
afr_mark_newest_file_as_source (this, sources, replies);
+ if (priv->arbiter_count)
+ /* Choose non-arbiter brick as source for empty files. */
+ afr_mark_source_sinks_if_file_empty (this, sources, sinks,
+ healed_sinks, locked_on,
+ replies,
+ AFR_DATA_TRANSACTION);
out:
afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index c501852ba5f..f7c4695cd46 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -342,4 +342,12 @@ afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies,
int src_idx, int child_idx,
unsigned char *locked_on, int *src, dict_t *xdata);
+int
+afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type);
+
#endif /* !_AFR_SELFHEAL_H */