diff options
-rw-r--r-- | tests/basic/afr/sparse-file-self-heal.t | 138 | ||||
-rw-r--r-- | tests/volume.rc | 9 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-algorithm.c | 197 |
3 files changed, 293 insertions, 51 deletions
diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t new file mode 100644 index 00000000000..f2a0863c686 --- /dev/null +++ b/tests/basic/afr/sparse-file-self-heal.t @@ -0,0 +1,138 @@ +#!/bin/bash + +#This file checks if self-heal of files with holes is working properly or not +#bigger is 2M, big is 1M, small is anything less +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 data-self-heal-algorithm full +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0; +TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k +TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k +TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k +TEST truncate -s 1G $M0/FILE + +TEST kill_brick $V0 $H0 $B0/${V0}0 + +#File with >128k size hole +TEST truncate -s 1M $M0/big +big_md5sum=$(md5sum $M0/big | awk '{print $1}') + +#File with <128k hole +TEST truncate -s 0 $M0/small +TEST truncate -s 64k $M0/small +small_md5sum=$(md5sum $M0/small | awk '{print $1}') + +#Bigger file truncated to big size hole. +TEST truncate -s 0 $M0/bigger2big +TEST truncate -s 1M $M0/bigger2big +bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}') + +#Big file truncated to Bigger size hole +TEST truncate -s 2M $M0/big2bigger +big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}') + +#Write data to file and restore its sparseness +TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072 +TEST truncate -s 1G $M0/FILE + +$CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST gluster volume heal $V0 full +EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0 + +big_md5sum_0=$(md5sum $B0/${V0}0/big | awk '{print $1}') +small_md5sum_0=$(md5sum $B0/${V0}0/small | awk '{print $1}') +bigger2big_md5sum_0=$(md5sum $B0/${V0}0/bigger2big | awk '{print $1}') +big2bigger_md5sum_0=$(md5sum $B0/${V0}0/big2bigger | awk '{print $1}') + +EXPECT $big_md5sum echo $big_md5sum_0 +EXPECT $small_md5sum echo $small_md5sum_0 +EXPECT $big2bigger_md5sum echo $big2bigger_md5sum_0 +EXPECT $bigger2big_md5sum echo $bigger2big_md5sum_0 + + +EXPECT "1" has_holes $B0/${V0}0/big +#Because self-heal writes the final chunk hole should not be there for +#files < 128K +EXPECT "0" has_holes $B0/${V0}0/small +# Since source is smaller than sink, self-heal does blind copy so no holes will +# be present +EXPECT "0" has_holes $B0/${V0}0/bigger2big +EXPECT "1" has_holes $B0/${V0}0/big2bigger + +#Check that self-heal has not written 0s to sink and made it non-sparse. +USED_KB=`du -s $B0/${V0}0/FILE|cut -f1` +TEST [ $USED_KB -lt 1000000 ] +TEST rm -f $M0/* + +#check the same tests with diff self-heal +TEST $CLI volume set $V0 data-self-heal-algorithm diff + +TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k +TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k +TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k +TEST truncate -s 1G $M0/FILE + +TEST kill_brick $V0 $H0 $B0/${V0}0 + +#File with >128k size hole +TEST truncate -s 1M $M0/big +big_md5sum=$(md5sum $M0/big | awk '{print $1}') + +#File with <128k hole +TEST truncate -s 0 $M0/small +TEST truncate -s 64k $M0/small +small_md5sum=$(md5sum $M0/small | awk '{print $1}') + +#Bigger file truncated to big size hole +TEST truncate -s 0 $M0/bigger2big +TEST truncate -s 1M $M0/bigger2big +bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}') + +#Big file truncated to Bigger size hole +TEST truncate -s 2M $M0/big2bigger +big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}') + +#Write data to file and restore its sparseness +TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072 +TEST truncate -s 1G $M0/FILE + +$CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST gluster volume heal $V0 full +EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0 + +big_md5sum_0=$(md5sum $B0/${V0}0/big | awk '{print $1}') +small_md5sum_0=$(md5sum $B0/${V0}0/small | awk '{print $1}') +bigger2big_md5sum_0=$(md5sum $B0/${V0}0/bigger2big | awk '{print $1}') +big2bigger_md5sum_0=$(md5sum $B0/${V0}0/big2bigger | awk '{print $1}') + +EXPECT $big_md5sum echo $big_md5sum_0 +EXPECT $small_md5sum echo $small_md5sum_0 +EXPECT $big2bigger_md5sum echo $big2bigger_md5sum_0 +EXPECT $bigger2big_md5sum echo $bigger2big_md5sum_0 + +EXPECT "1" has_holes $B0/${V0}0/big +EXPECT "1" has_holes $B0/${V0}0/big2bigger +EXPECT "0" has_holes $B0/${V0}0/bigger2big +EXPECT "0" has_holes $B0/${V0}0/small + +#Check that self-heal has not written 0s to sink and made it non-sparse. +USED_KB=`du -s $B0/${V0}0/FILE|cut -f1` +TEST [ $USED_KB -lt 1000000 ] + +cleanup diff --git a/tests/volume.rc b/tests/volume.rc index 1c58597c661..53f863e2733 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -293,3 +293,12 @@ function get_hex_xattr { local path=$2 getfattr -d -m. -e hex $2 2>/dev/null | grep $1 | cut -f2 -d'=' | cut -f2 -d'x' } + +function has_holes { + if [ $((`stat -c '%b*%B-%s' $1`)) -lt 0 ]; + then + echo "1" + else + echo "0" + fi +} diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c index 83846f152d2..fa11678a58a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -400,7 +400,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame } static int -sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, +sh_loop_sink_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, struct iatt *postbuf, dict_t *xdata) { @@ -458,21 +458,117 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, } static void -sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame, - afr_private_t *priv) +sh_loop_sink_write (call_frame_t *loop_frame, xlator_t *this, + struct iovec *vector, int32_t count, struct iobref *iobref) { - afr_local_t *sh_local = NULL; - afr_self_heal_t *sh = NULL; - afr_local_t *loop_local = NULL; - afr_self_heal_t *loop_sh = NULL; - int i = 0; + afr_private_t * priv = NULL; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; + call_frame_t *sh_frame = NULL; + int call_count = 0; + int i = 0; + + priv = this->private; + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; + sh_frame = loop_sh->sh_frame; + + call_count = sh_number_of_writes_needed (loop_sh->write_needed, + priv->child_count); + if (call_count == 0) { + iobref_unref(loop_local->cont.writev.iobref); + sh_loop_return (sh_frame, this, loop_frame, 0, 0); + goto out; + } + + loop_local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (!loop_sh->write_needed[i]) + continue; + STACK_WIND_COOKIE (loop_frame, sh_loop_sink_write_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->writev, + loop_sh->healing_fd, vector, count, + loop_sh->offset, 0, iobref, NULL); + + if (!--call_count) + break; + } + +out: + return; +} + +static int +sh_loop_sink_read_cbk (call_frame_t *loop_frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct iatt *buf, + struct iobref *iobref, dict_t *xdata) +{ + int32_t child_index = 0; + int call_count = 0; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t *sh_local = NULL; + afr_private_t *priv = NULL; + + child_index = (long) cookie; + priv = this->private; + + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; + + sh_frame = loop_sh->sh_frame; + sh_local = sh_frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "read failed on %s " + "for %s reason :%s", priv->children[child_index]->name, + sh_local->loc.path, strerror (op_errno)); + afr_sh_set_error (loop_sh, op_errno); + } + + if ((op_ret > 0) && (iov_0filled (vector, count) == 0)) { + loop_sh->write_needed[child_index] = 0; + } + + call_count = afr_frame_return (loop_frame); + + if (call_count == 0) { + if (loop_sh->op_ret == -1) { + iobref_unref(loop_local->cont.writev.iobref); + sh_loop_return (sh_frame, this, loop_frame, + loop_sh->op_ret, loop_sh->op_errno); + goto out; + } + sh_loop_sink_write (loop_frame, this, + loop_local->cont.writev.vector, + loop_local->cont.writev.count, + loop_local->cont.writev.iobref); + } +out: + return 0; +} + +static void +sh_prune_writes_if_needed (call_frame_t *sh_frame, call_frame_t *loop_frame, + afr_private_t *priv, xlator_t *this, + struct iovec *vector, int32_t count, + struct iobref *iobref) +{ + afr_local_t *sh_local = NULL; + afr_self_heal_t *sh = NULL; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; + int i = 0; + int call_count = 0; sh_local = sh_frame->local; sh = &sh_local->self_heal; - if (!strcmp (sh->algo->name, "diff")) - return; - loop_local = loop_frame->local; loop_sh = &loop_local->self_heal; @@ -485,10 +581,31 @@ sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame, ((loop_sh->offset + 1) > sh->buf[i].ia_size)) loop_sh->write_needed[i] = 0; } + + call_count = sh_number_of_writes_needed (loop_sh->write_needed, + priv->child_count); + if (!call_count) { + iobref_unref(loop_local->cont.writev.iobref); + sh_loop_return (sh_frame, this, loop_frame, 0, 0); + return; + } + loop_local->call_count = call_count; + for (i = 0; i < priv->child_count; i++) { + if (!loop_sh->write_needed[i]) + continue; + STACK_WIND_COOKIE (loop_frame, sh_loop_sink_read_cbk, (void *)(long) i, + priv->children[i], priv->children[i]->fops->readv, + loop_sh->healing_fd, loop_sh->block_size, + loop_sh->offset, 0, NULL); + if (!--call_count) + break; + } + + return; } static int -sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie, +sh_loop_source_read_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *buf, struct iobref *iobref, dict_t *xdata) @@ -497,8 +614,6 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie, afr_local_t * loop_local = NULL; afr_self_heal_t * loop_sh = NULL; call_frame_t *sh_frame = NULL; - int i = 0; - int call_count = 0; afr_local_t * sh_local = NULL; afr_self_heal_t * sh = NULL; @@ -517,9 +632,10 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie, if (op_ret <= 0) { if (op_ret < 0) { afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - gf_log (this->name, GF_LOG_ERROR, "read failed on %d " - "for %s reason :%s", sh->source, - sh_local->loc.path, strerror (errno)); + gf_log (this->name, GF_LOG_ERROR, "read failed on %s " + "for %s reason :%s", + priv->children[sh->source]->name, + sh_local->loc.path, strerror (op_errno)); } else { sh->eof_reached = _gf_true; gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s", @@ -529,38 +645,17 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie, goto out; } - if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0) - sh_prune_writes_needed (sh_frame, loop_frame, priv); - - call_count = sh_number_of_writes_needed (loop_sh->write_needed, - priv->child_count); - if (call_count == 0) { - sh_loop_return (sh_frame, this, loop_frame, 0, 0); - goto out; - } - - loop_local->call_count = call_count; - - /* - * We only really need the request size at the moment, but the buffer - * is required if we want to issue a retry in the event of a short write. - * Therefore, we duplicate the vector and ref the iobref here... - */ - loop_local->cont.writev.vector = iov_dup(vector, count); - loop_local->cont.writev.iobref = iobref_ref(iobref); + loop_local->cont.writev.vector = iov_dup(vector, count); + loop_local->cont.writev.iobref = iobref_ref(iobref); + loop_local->cont.writev.count = count; - for (i = 0; i < priv->child_count; i++) { - if (!loop_sh->write_needed[i]) - continue; - STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->writev, - loop_sh->healing_fd, vector, count, - loop_sh->offset, 0, iobref, NULL); + if (!strcmp (sh->algo->name, "full") && loop_sh->file_has_holes && + iov_0filled (vector, count) == 0) { + sh_prune_writes_if_needed (sh_frame, loop_frame, priv, this, + vector, count, iobref); + } else { + sh_loop_sink_write (loop_frame, this, vector, count, iobref); - if (!--call_count) - break; } out: @@ -569,7 +664,7 @@ out: static int -sh_loop_read (call_frame_t *loop_frame, xlator_t *this) +sh_loop_source_read (call_frame_t *loop_frame, xlator_t *this) { afr_private_t *priv = NULL; afr_local_t *loop_local = NULL; @@ -579,7 +674,7 @@ sh_loop_read (call_frame_t *loop_frame, xlator_t *this) loop_local = loop_frame->local; loop_sh = &loop_local->self_heal; - STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk, + STACK_WIND_COOKIE (loop_frame, sh_loop_source_read_cbk, (void *) (long) loop_sh->source, priv->children[loop_sh->source], priv->children[loop_sh->source]->fops->readv, @@ -666,7 +761,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, if (write_needed && !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { - sh_loop_read (loop_frame, this); + sh_loop_source_read (loop_frame, this); } else { sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno); @@ -735,7 +830,7 @@ sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this) continue; loop_sh->write_needed[i] = 1; } - sh_loop_read (loop_frame, this); + sh_loop_source_read (loop_frame, this); return 0; } |