summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-self-heal-data.c
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-10-21 21:05:46 +0530
committerJeff Darcy <jdarcy@redhat.com>2015-10-28 06:39:42 -0700
commit641b3a9164227db52df1aab05795c90d06b315f2 (patch)
treee2a55cb4aed7e6376f6a6da55c6493247d99c366 /xlators/cluster/afr/src/afr-self-heal-data.c
parent8e5a7632edd040031e4942134331172805bc8eff (diff)
afr: write zeros to sink for non-sparse files
Problem: If a file is created with zeroes ('dd', 'fallocate' etc.) when a brick is down, the self-heal does not write the zeroes to the sink after it comes up. Consequenty, there is a mismatch in disk-usage amongst the bricks of the replica. Fix: If we definitely know that the file is not sparse, then write the zeroes to the sink even if the checksums match. Change-Id: Ic739b3da5dbf47d99801c0e1743bb13aeb3af864 BUG: 1272460 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/12371 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-data.c')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c57
1 files changed, 41 insertions, 16 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 36d658fa454..4529462c3a9 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -28,13 +28,18 @@ __checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dict_t *xdata)
{
afr_local_t *local = NULL;
+ struct afr_reply *replies = NULL;
int i = (long) cookie;
local = frame->local;
-
- local->replies[i].valid = 1;
- local->replies[i].op_ret = op_ret;
- local->replies[i].op_errno = op_errno;
+ replies = local->replies;
+
+ replies[i].valid = 1;
+ replies[i].op_ret = op_ret;
+ replies[i].op_errno = op_errno;
+ if (xdata)
+ replies[i].buf_has_zeroes = dict_get_str_boolean (xdata,
+ "buf-has-zeroes", _gf_false);
if (strong)
memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
@@ -70,19 +75,23 @@ attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static gf_boolean_t
-__afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int source,
- unsigned char *healed_sinks,
- off_t offset, size_t size)
+__afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct iatt *poststat)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
unsigned char *wind_subvols = NULL;
+ gf_boolean_t checksum_match = _gf_true;
+ dict_t *xdata = NULL;
int i = 0;
priv = this->private;
local = frame->local;
-
+ xdata = dict_new();
+ if (xdata)
+ i = dict_set_int32 (xdata, "check-zero-filled", 1);
wind_subvols = alloca0 (priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (i == source || healed_sinks[i])
@@ -90,7 +99,9 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
}
AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd,
- offset, size, NULL);
+ offset, size, xdata);
+ if (xdata)
+ dict_unref (xdata);
if (!local->replies[source].valid || local->replies[source].op_ret != 0)
return _gf_false;
@@ -101,12 +112,26 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
if (local->replies[i].valid) {
if (memcmp (local->replies[source].checksum,
local->replies[i].checksum,
- MD5_DIGEST_LENGTH))
- return _gf_false;
+ MD5_DIGEST_LENGTH)) {
+ checksum_match = _gf_false;
+ break;
+ }
}
}
- return _gf_true;
+ if (checksum_match) {
+ if (HAS_HOLES (poststat))
+ return _gf_true;
+
+ /* For non-sparse files, we might be better off writing the
+ * zeroes to sinks to avoid mismatch of disk-usage in bricks. */
+ if (local->replies[source].buf_has_zeroes)
+ return _gf_false;
+ else
+ return _gf_true;
+ }
+
+ return _gf_false;
}
@@ -220,7 +245,6 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
}
-
static int
afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks, off_t offset,
@@ -244,8 +268,9 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
if (type == AFR_SELFHEAL_DATA_DIFF &&
- __afr_selfheal_data_checksums_match (frame, this, fd, source,
- healed_sinks, offset, size)) {
+ __afr_can_skip_data_block_heal (frame, this, fd, source,
+ healed_sinks, offset, size,
+ &replies[source].poststat)) {
ret = 0;
goto unlock;
}