summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-self-heal-algorithm.c
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-08-20 15:48:27 +0530
committerVijay Bellur <vijay@gluster.com>2011-08-20 06:42:55 -0700
commit1af420c700fbc49b65cf7faceb3270e81cd991ce (patch)
treeee0dcfe62b4965191424b3121a4dd126e81260b8 /xlators/cluster/afr/src/afr-self-heal-algorithm.c
parent2ebacdfdd3c39bf2d3139cb7d811356758a2350a (diff)
cluster/afr: Perform self-heal without locking the whole file
Change-Id: I206571c77f2d7b3c9f9d7bb82a936366fd99ce5c BUG: 3182 Reviewed-on: http://review.gluster.com/141 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-algorithm.c')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c1114
1 files changed, 384 insertions, 730 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 04b388fe052..1c7cdf41819 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -44,279 +44,249 @@
This file contains the various self-heal algorithms
*/
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno);
+static int
+sh_destroy_frame (call_frame_t *frame, xlator_t *this)
+{
+ if (!frame)
+ goto out;
-/*
- The "full" algorithm. Copies the entire file from
- source to sinks.
-*/
-
+ AFR_STACK_DESTROY (frame);
+out:
+ return 0;
+}
static void
-sh_full_private_cleanup (call_frame_t *frame, xlator_t *this)
+sh_private_cleanup (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
local = frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
-
if (sh_priv)
GF_FREE (sh_priv);
}
-
static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call);
+sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
+{
+ int writes = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (write_needed[i])
+ writes++;
+ }
+
+ return writes;
+}
+
static int
-sh_full_loop_driver_done (call_frame_t *frame, xlator_t *this)
+sh_loop_driver_done (call_frame_t *frame, xlator_t *this,
+ call_frame_t *last_loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int32_t total_blocks = 0;
+ int32_t diff_blocks = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+ total_blocks = sh_priv->total_blocks;
+ diff_blocks = sh_priv->diff_blocks;
- sh_full_private_cleanup (frame, this);
+ sh_private_cleanup (frame, this);
if (sh->op_failed) {
+ GF_ASSERT (!last_loop_frame);
+ //loop_finish should have happened and the old_loop should be NULL
gf_log (this->name, GF_LOG_INFO,
- "full self-heal aborting on %s",
+ "self-heal aborting on %s",
local->loc.path);
local->self_heal.algo_abort_cbk (frame, this);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "full self-heal completed on %s",
- local->loc.path);
+ GF_ASSERT (last_loop_frame);
+ if (diff_blocks == total_blocks) {
+ gf_log (this->name, GF_LOG_INFO, "full self-heal "
+ "completed on %s",local->loc.path);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "diff self-heal on %s: completed. "
+ "(%d blocks of %d were different (%.2f%%))",
+ local->loc.path, diff_blocks, total_blocks,
+ ((diff_blocks * 1.0)/total_blocks) * 100);
+ }
+ sh->old_loop_frame = last_loop_frame;
local->self_heal.algo_completion_cbk (frame, this);
}
+
return 0;
}
-static int
-sh_full_loop_return (call_frame_t *rw_frame, xlator_t *this, off_t offset)
+int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
{
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- AFR_STACK_DESTROY (rw_frame);
+ if (!loop_frame)
+ goto out;
- sh_full_loop_driver (sh_frame, this, _gf_false);
+ loop_local = loop_frame->local;
+ if (loop_local) {
+ loop_sh = &loop_local->self_heal;
+ }
+ if (loop_sh && loop_sh->loop_completion_cbk) {
+ if (loop_sh->data_lock_held) {
+ afr_sh_data_unlock (loop_frame, this,
+ loop_sh->loop_completion_cbk);
+ } else {
+ loop_sh->loop_completion_cbk (loop_frame, this);
+ }
+ } else {
+ //default loop_completion_cbk destroys the loop_frame
+ if (loop_sh && !loop_sh->loop_completion_cbk)
+ GF_ASSERT (!loop_sh->data_lock_held);
+ sh_destroy_frame (loop_frame, this);
+ }
+out:
return 0;
}
-
static int
-sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- rw_sh->offset - op_ret);
-
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- sh->op_failed = 1;
- }
- }
- UNLOCK (&sh_frame->lock);
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- call_count = afr_frame_return (rw_frame);
-
- if (call_count == 0) {
- sh_full_loop_return (rw_frame, this, rw_sh->offset - op_ret);
- }
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ gf_log (this->name, GF_LOG_DEBUG, "Aquired lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ loop_sh->data_lock_held = _gf_true;
+ loop_sh->sh_data_algo_start (loop_frame, this);
return 0;
}
-
static int
-sh_full_read_cbk (call_frame_t *rw_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
- off_t offset = (long) cookie;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- call_count = sh->active_sinks;
-
- rw_local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, offset);
-
- if (op_ret <= 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "read from %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[sh->source]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- sh_full_loop_return (rw_frame, this, offset);
- return 0;
- }
-
- rw_sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
- gf_log (this->name, GF_LOG_DEBUG,
- "block has all 0 filled");
- sh_full_loop_return (rw_frame, this, offset);
- goto out;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
-
- STACK_WIND_COOKIE (rw_frame, sh_full_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
-
- if (!--call_count)
- break;
- }
-
-out:
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+ sh_frame = loop_sh->sh_frame;
+
+ gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
return 0;
}
-
static int
-sh_full_read_write (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
+ call_frame_t *old_loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- afr_self_heal_t *sh = NULL;
- call_frame_t *rw_frame = NULL;
- int32_t op_errno = 0;
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
+ GF_ASSERT (sh_frame);
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
-
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- rw_sh->offset = offset;
- rw_sh->sh_frame = frame;
+ new_loop_frame = copy_frame (sh_frame);
+ if (!new_loop_frame)
+ goto out;
+ //We want the frame to have same lk_oner as sh_frame
+ new_loop_local = afr_local_copy (local, this);
+ if (!new_loop_local)
+ goto out;
+ new_loop_frame->local = new_loop_local;
- STACK_WIND_COOKIE (rw_frame, sh_full_read_cbk,
- (void *) (long) offset,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- offset);
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->sources = memdup (sh->sources,
+ priv->child_count * sizeof (*sh->sources));
+ if (!new_loop_sh->sources)
+ goto out;
+ new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
+ sizeof (*new_loop_sh->write_needed),
+ gf_afr_mt_char);
+ if (!new_loop_sh->write_needed)
+ goto out;
+ new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LEN,
+ gf_afr_mt_uint8_t);
+ if (!new_loop_sh->checksum)
+ goto out;
+ new_loop_sh->offset = offset;
+ new_loop_sh->block_size = sh->block_size;
+ new_loop_sh->old_loop_frame = old_loop_frame;
+ new_loop_sh->sh_frame = sh_frame;
+ new_loop_sh->inode = inode_ref (sh->inode);
+ new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
+ new_loop_sh->source = sh->source;
+ new_loop_sh->active_sinks = sh->active_sinks;
+ new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
+ new_loop_sh->file_has_holes = sh->file_has_holes;
+ new_loop_sh->loop_completion_cbk = sh_destroy_frame;
+ afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
+ sh_loop_lock_success, sh_loop_lock_failure);
return 0;
-
out:
sh->op_failed = 1;
-
- sh_full_loop_driver (frame, this, _gf_false);
-
+ if (new_loop_frame) {
+ new_loop_frame->local = new_loop_local;
+ }
+ if (old_loop_frame)
+ sh_loop_finish (old_loop_frame, this);
+ sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
return 0;
}
-
static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call)
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
gf_boolean_t is_driver_done = _gf_false;
blksize_t block_size = 0;
- off_t offset = 0;
int loop = 0;
+ off_t offset = 0;
priv = this->private;
- local = frame->local;
+ local = sh_frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
@@ -324,23 +294,18 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_
{
if (_gf_false == is_first_call)
sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((sh->op_failed == 0) &&
+ offset = sh_priv->offset;
+ block_size = sh->block_size;
+ while ((!sh->eof_reached) && (0 == sh->op_failed) &&
(sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh->block_size;
+ sh_priv->offset += block_size;
sh_priv->loops_running++;
if (_gf_false == is_first_call)
break;
-
}
if (0 == sh_priv->loops_running) {
is_driver_done = _gf_true;
@@ -348,274 +313,130 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_
}
UNLOCK (&sh_priv->lock);
+ if (0 == loop) {
+ //loop finish does unlock, but the erasing of the pending
+ //xattrs needs to happen before that so do not finish the loop
+ if (is_driver_done && !sh->op_failed)
+ goto driver_done;
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ }
+
+ //If we have more loops to form we should finish previous loop after
+ //the next loop lock
while (loop--) {
if (sh->op_failed) {
// op failed in other loop, stop spawning more loops
- sh_full_loop_driver (frame, this, _gf_false);
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ sh_loop_driver (sh_frame, this, _gf_false, NULL);
} else {
- sh_full_read_write (frame, this, offset);
+ gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
+ "for offset %"PRId64, offset);
+
+ sh_loop_start (sh_frame, this, offset, old_loop_frame);
+ old_loop_frame = NULL;
offset += block_size;
}
}
+driver_done:
if (is_driver_done) {
- sh_full_loop_driver_done (frame, this);
+ sh_loop_driver_done (sh_frame, this, old_loop_frame);
}
-
- return 0;
-}
-
-
-int
-afr_sh_algo_full (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv)
- goto out;
-
- LOCK_INIT (&sh_priv->lock);
-
- sh->private = sh_priv;
-
- local->call_count = 0;
-
- sh_full_loop_driver (frame, this, _gf_true);
-out:
return 0;
}
-
-/*
- * The "diff" algorithm. Copies only those blocks whose checksums
- * don't match with those of source.
- */
-
-
-static void
-sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
-
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
-
- GF_FREE (sh_priv->loops[i]);
- }
- }
-
- if (sh_priv) {
- if (sh_priv->loops)
- GF_FREE (sh_priv->loops);
-
- GF_FREE (sh_priv);
- }
-
-
-}
-
-
-static uint32_t
-__make_cookie (int loop_index, int child_index)
-{
- uint32_t ret = ((loop_index << 16) | child_index);
- return ret;
-}
-
-
-static int
-__loop_index (uint32_t cookie)
-{
- return ((cookie & 0xFFFF0000) >> 16);
-}
-
-
-static int
-__child_index (uint32_t cookie)
-{
- return (cookie & 0x0000FFFF);
-}
-
-
-static void
-sh_diff_loop_state_reset (struct sh_diff_loop_state *loop_state, int child_count)
-{
- loop_state->active = _gf_false;
-// loop_state->offset = 0;
-
- memset (loop_state->write_needed,
- 0, sizeof (*loop_state->write_needed) * child_count);
-
- memset (loop_state->checksum,
- 0, MD5_DIGEST_LEN * child_count);
-}
-
-
static int
-sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
-
- sh_diff_private_cleanup (frame, this);
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
-
- local->self_heal.algo_completion_cbk (frame, this);
- }
-
- return 0;
-}
-
-static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state);
-
-static int
-sh_diff_loop_return (call_frame_t *rw_frame, xlator_t *this,
- struct sh_diff_loop_state *loop_state)
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
afr_local_t * sh_local = NULL;
afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- gf_log (this->name, GF_LOG_TRACE,
- "loop for offset %"PRId64" returned", loop_state->offset);
+ if (loop_frame) {
+ loop_local = loop_frame->local;
+ if (loop_local)
+ loop_sh = &loop_local->self_heal;
+ if (loop_sh)
+ gf_log (this->name, GF_LOG_TRACE, "loop for offset "
+ "%"PRId64" returned", loop_sh->offset);
+ }
- AFR_STACK_DESTROY (rw_frame);
+ if (op_ret == -1) {
+ sh->op_failed = 1;
+ afr_sh_set_error (sh, op_errno);
+ if (loop_frame) {
+ sh_loop_finish (loop_frame, this);
+ loop_frame = NULL;
+ }
+ }
- sh_diff_loop_driver (sh_frame, this, _gf_false, loop_state);
+ sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
return 0;
}
-
static int
-sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *postbuf)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
call_frame_t *sh_frame = NULL;
afr_local_t * sh_local = NULL;
afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- struct sh_diff_loop_state *loop_state = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
int call_count = 0;
int child_index = 0;
- int loop_index = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
gf_log (this->name, GF_LOG_TRACE,
"wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- loop_state->offset);
+ op_ret, sh_local->loc.path, child_index, loop_sh->offset);
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "write to %s failed on subvolume %s (%s)",
+ sh_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
- sh->op_failed = 1;
- }
+ sh->op_failed = 1;
+ afr_sh_set_error (loop_sh, op_errno);
}
- UNLOCK (&sh_frame->lock);
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ loop_sh->op_ret, loop_sh->op_errno);
}
return 0;
@@ -623,74 +444,71 @@ sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
static int
-sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie,
+sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
struct iobref *iobref)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
- uint32_t wcookie = 0;
int i = 0;
int call_count = 0;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t * sh = NULL;
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
-
- call_count = sh_diff_number_of_writes_needed (loop_state->write_needed,
- priv->child_count);
-
- rw_local->call_count = call_count;
gf_log (this->name, GF_LOG_TRACE,
"read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, loop_state->offset);
-
- if ((op_ret <= 0) ||
- (call_count == 0)) {
- sh_diff_loop_return (rw_frame, this, loop_state);
+ op_ret, loop_local->loc.path, loop_sh->offset);
- return 0;
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ sh->op_failed = 1;
+ gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
+ "for %s reason :%s", sh->source,
+ sh_local->loc.path, strerror (errno));
+ } else {
+ sh->eof_reached = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
+ sh_local->loc.path);
+ }
+ sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
+ goto out;
}
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
+ if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0) {
gf_log (this->name, GF_LOG_DEBUG, "0 filled block");
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
goto out;
- }
}
- for (i = 0; i < priv->child_count; i++) {
- if (loop_state->write_needed[i]) {
- wcookie = __make_cookie (loop_index, i);
+ call_count = sh_number_of_writes_needed (loop_sh->write_needed,
+ priv->child_count);
+ GF_ASSERT (call_count > 0);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_write_cbk,
- (void *) (long) wcookie,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count,
- loop_state->offset, iobref);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!loop_sh->write_needed[i])
+ continue;
+ STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ loop_sh->healing_fd, vector, count,
+ loop_sh->offset, iobref);
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
}
out:
@@ -699,100 +517,77 @@ out:
static int
-sh_diff_read (call_frame_t *rw_frame, xlator_t *this,
- int loop_index)
+sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- struct sh_diff_loop_state *loop_state = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- uint32_t cookie = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_state = sh_priv->loops[loop_index];
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- cookie = __make_cookie (loop_index, sh->source);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_read_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh_priv->block_size,
- loop_state->offset);
+ STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->readv,
+ loop_sh->healing_fd, loop_sh->block_size,
+ loop_sh->offset);
return 0;
}
static int
-sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
uint32_t weak_checksum, uint8_t *strong_checksum)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- int loop_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
int child_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
int call_count = 0;
int i = 0;
int write_needed = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
-
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
-
sh->op_failed = 1;
} else {
- memcpy (loop_state->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum,
- MD5_DIGEST_LEN);
+ memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LEN,
+ strong_checksum, MD5_DIGEST_LEN);
}
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
for (i = 0; i < priv->child_count; i++) {
if (sh->sources[i] || !sh_local->child_up[i])
continue;
- if (memcmp (loop_state->checksum + (i * MD5_DIGEST_LEN),
- loop_state->checksum + (sh->source * MD5_DIGEST_LEN),
+ if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LEN),
+ loop_sh->checksum + (sh->source * MD5_DIGEST_LEN),
MD5_DIGEST_LEN)) {
/*
Checksums differ, so this block
@@ -802,9 +597,9 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"checksum on subvolume %s at offset %"
PRId64" differs from that on source",
- priv->children[i]->name, loop_state->offset);
+ priv->children[i]->name, loop_sh->offset);
- write_needed = loop_state->write_needed[i] = 1;
+ write_needed = loop_sh->write_needed[i] = 1;
}
}
@@ -817,271 +612,130 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
UNLOCK (&sh_priv->lock);
if (write_needed && !sh->op_failed) {
- sh_diff_read (rw_frame, this, loop_index);
+ sh_loop_read (loop_frame, this);
} else {
- sh->offset += sh_priv->block_size;
-
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
}
}
return 0;
}
-
-static int
-sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max)
-{
- int i = 0;
-
- LOCK (&sh_priv->lock);
- {
- for (i = 0; i < max; i++) {
- if (sh_priv->loops[i]->active == _gf_false) {
- sh_priv->loops[i]->active = _gf_true;
- break;
- }
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (i == max) {
- gf_log ("[sh-diff]", GF_LOG_ERROR,
- "no free loops found! This shouldn't happen. Please"
- " report this to gluster-devel@nongnu.org");
- }
-
- return i;
-}
-
-
static int
-sh_diff_checksum (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- call_frame_t *rw_frame = NULL;
- uint32_t cookie = 0;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
- int32_t op_errno = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
-
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ afr_sh_algo_private_t *loop_sh_priv = NULL;
+ int call_count = 0;
+ int i = 0;
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
-
- rw_sh->offset = sh->offset;
- rw_sh->sh_frame = frame;
-
- call_count = sh->active_sinks + 1; /* sinks and source */
-
- rw_local->call_count = call_count;
-
- loop_index = sh_diff_find_unused_loop (sh_priv, priv->data_self_heal_window_size);
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- loop_state = sh_priv->loops[loop_index];
- loop_state->offset = offset;
+ loop_sh_priv = loop_sh->private;
- /* we need to send both the loop index and child index,
- so squeeze them both into a 32-bit number */
+ call_count = loop_sh->active_sinks + 1; /* sinks and source */
- cookie = __make_cookie (loop_index, sh->source);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size);
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
continue;
- cookie = __make_cookie (loop_index, i);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size);
if (!--call_count)
break;
}
return 0;
-
-out:
- sh->op_failed = 1;
-
- sh_diff_loop_driver (frame, this, _gf_false, loop_state);
-
- return 0;
}
-
static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state)
+sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
- char sh_type_str[256] = {0,};
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- afr_self_heal_type_str_get(sh, sh_type_str, sizeof(sh_type_str));
-
- LOCK (&sh_priv->lock);
- {
- if (loop_state)
- sh_diff_loop_state_reset (loop_state, priv->child_count);
- if (_gf_false == is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh_priv->block_size;
- while ((0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh_priv->block_size;
- sh_priv->loops_running++;
-
- if (_gf_false == is_first_call)
- break;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- sh_diff_loop_driver (frame, this, _gf_false, NULL);
- } else {
- sh_diff_checksum (frame, this, offset);
- offset += block_size;
- }
- }
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- if (is_driver_done) {
- sh_diff_loop_driver_done (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+ loop_sh->write_needed[i] = 1;
}
+ sh_loop_read (loop_frame, this);
return 0;
}
+static int
+sh_do_nothing (call_frame_t *frame, xlator_t *this)
+{
+ return 0;
+}
int
-afr_sh_algo_diff (call_frame_t *frame, xlator_t *this)
+afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
+ afr_sh_algo_fn sh_data_algo_start)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int i = 0;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
gf_afr_mt_afr_private_t);
if (!sh_priv)
- goto err;
-
- sh_priv->block_size = this->ctx->page_size;
-
- sh->private = sh_priv;
+ goto out;
LOCK_INIT (&sh_priv->lock);
- local->call_count = 0;
-
- sh_priv->loops = GF_CALLOC (priv->data_self_heal_window_size,
- sizeof (*sh_priv->loops),
- gf_afr_mt_sh_diff_loop_state);
- if (!sh_priv->loops)
- goto err;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- sh_priv->loops[i] = GF_CALLOC (1, sizeof (*sh_priv->loops[i]),
- gf_afr_mt_sh_diff_loop_state);
- if (!sh_priv->loops[i])
- goto err;
-
- sh_priv->loops[i]->checksum = GF_CALLOC (priv->child_count,
- MD5_DIGEST_LEN, gf_afr_mt_uint8_t);
- if (!sh_priv->loops[i]->checksum)
- goto err;
-
- sh_priv->loops[i]->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*sh_priv->loops[i]->write_needed),
- gf_afr_mt_char);
- if (!sh_priv->loops[i]->write_needed)
- goto err;
-
- }
+ sh->private = sh_priv;
+ sh->sh_data_algo_start = sh_data_algo_start;
- sh_diff_loop_driver (frame, this, _gf_true, NULL);
+ sh_local->call_count = 0;
+ sh->loop_completion_cbk = sh_do_nothing;
+ sh_loop_driver (sh_frame, this, _gf_true, sh_frame);
+out:
return 0;
-err:
- if (sh_priv) {
- if (sh_priv->loops) {
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
- if (sh_priv->loops[i])
- GF_FREE (sh_priv->loops[i]);
- }
-
- GF_FREE (sh_priv->loops);
- }
+}
- GF_FREE (sh_priv);
- }
+int
+afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
return 0;
}
+int
+afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
+ return 0;
+}
struct afr_sh_algorithm afr_self_heal_algorithms[] = {
{.name = "full", .fn = afr_sh_algo_full},