From 190037c10ce0e5e56e67a248e10f86538e3cdfc4 Mon Sep 17 00:00:00 2001 From: Emmanuel Dreyfus Date: Tue, 23 Dec 2014 06:54:53 +0100 Subject: Avoid spurious directory metedata split brain When directory content is modified, [mc]time is updated. On Linux, the filesystem does it, while at least on NetBSD, the kernel file-system independant code does it. This means that when entries are added while bricks are down, the kernel sends a SETATTR [mc]time which will cause metadata split brain for the directory. In this case, clear the split brain by finding the source with the most recent modification date. Backport of: Ic0177e0df753a4748624d0b906834ed54593adb9 BUG: 1138897 Change-Id: Ic2e697be4f0074e2bbd3f23d6ad40a2d2d126a2c Signed-off-by: Emmanuel Dreyfus Reviewed-on: http://review.gluster.org/9319 Reviewed-by: Krutika Dhananjay Reviewed-by: Ravishankar N Tested-by: Gluster Build System Reviewed-by: Raghavendra Bhat --- xlators/cluster/afr/src/afr-self-heal-metadata.c | 116 ++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 68dc568bc65..87600df3bad 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -99,6 +99,96 @@ out: return ret; } +static inline uint64_t +mtime_ns(struct iatt *ia) +{ + uint64_t ret; + + ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) + + (uint64_t)(ia->ia_mtime_nsec); + + return ret; +} + +/* + * When directory content is modified, [mc]time is updated. On + * Linux, the filesystem does it, while at least on NetBSD, the + * kernel file-system independant code does it. This means that + * when entries are added while bricks are down, the kernel sends + * a SETATTR [mc]time which will cause metadata split brain for + * the directory. In this case, clear the split brain by finding + * the source with the most recent modification date. + */ +static int +afr_dirtime_splitbrain_source (call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, + unsigned char *locked_on) +{ + afr_private_t *priv = NULL; + int source = -1; + struct iatt source_ia; + struct iatt child_ia; + uint64_t mtime = 0; + int i; + int ret = -1; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!locked_on[i]) + continue; + + if (!replies[i].valid) + continue; + + if (replies[i].op_ret != 0) + continue; + + if (mtime_ns(&replies[i].poststat) <= mtime) + continue; + + mtime = mtime_ns(&replies[i].poststat); + source = i; + } + + if (source == -1) + goto out; + + source_ia = replies[source].poststat; + if (source_ia.ia_type != IA_IFDIR) + goto out; + + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; + + if (!replies[i].valid) + continue; + + if (replies[i].op_ret != 0) + continue; + + child_ia = replies[i].poststat; + + if (!IA_EQUAL(source_ia, child_ia, gfid) || + !IA_EQUAL(source_ia, child_ia, type) || + !IA_EQUAL(source_ia, child_ia, prot) || + !IA_EQUAL(source_ia, child_ia, uid) || + !IA_EQUAL(source_ia, child_ia, gid) || + !afr_xattrs_are_equal (replies[source].xdata, + replies[i].xdata)) + goto out; + } + + /* + * Metadata split brain is just about [amc]time + * We return our source. + */ + ret = source; +out: + return ret; +} + /* * Look for mismatching uid/gid or mode or user xattrs even if @@ -122,10 +212,34 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, sources_count = AFR_COUNT (sources, priv->child_count); if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count) { + || !sources_count) { + /* If this is a directory mtime/ctime only split brain + use the most recent */ + source = afr_dirtime_splitbrain_source (frame, this, + replies, locked_on); + if (source != -1) { + gf_log (this->name, GF_LOG_NOTICE, "clear time " + "split brain on %s", + uuid_utoa (replies[source].poststat.ia_gfid)); + sources[source] = 1; + + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; + + if (!locked_on[i]) + continue; + + healed_sinks[i] = 1; + } + + return source; + } + if (!priv->metadata_splitbrain_forced_heal) { return -EIO; } + /* Metadata split brain, select one subvol arbitrarily */ for (i = 0; i < priv->child_count; i++) { -- cgit