From 8c224de82b9b3e75f2dd9c264d5d3726dd1ef379 Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Thu, 17 Sep 2009 05:56:30 +0000 Subject: cluster/afr: Make the self-heal algorithm pluggable. Abstract the read/write loop part of data self-heal. This patch has support for the "full" (i.e., read and write entire file) algorithm. Signed-off-by: Anand V. Avati --- xlators/cluster/afr/src/Makefile.am | 4 +- xlators/cluster/afr/src/afr-self-heal-algorithm.c | 233 ++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-algorithm.h | 34 ++++ xlators/cluster/afr/src/afr-self-heal-data.c | 178 ++--------------- xlators/cluster/afr/src/afr.h | 6 + 5 files changed, 287 insertions(+), 168 deletions(-) create mode 100644 xlators/cluster/afr/src/afr-self-heal-algorithm.c create mode 100644 xlators/cluster/afr/src/afr-self-heal-algorithm.h diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index 1bde9e5bad7..df284d12cf7 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -3,10 +3,10 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster afr_la_LDFLAGS = -module -avoidversion -afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c +afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h +noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c new file mode 100644 index 00000000000..bc3917caca0 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -0,0 +1,233 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + + +#include "glusterfs.h" +#include "afr.h" +#include "xlator.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "md5.h" + +#include "afr-transaction.h" +#include "afr-self-heal.h" +#include "afr-self-heal-common.h" +#include "afr-self-heal-algorithm.h" + +/* + This file contains the various self-heal algorithms +*/ + + +/* + The "full" algorithm. Copies the entire file from + source to sinks. +*/ + +static int +sh_full_read_write_iter (call_frame_t *frame, xlator_t *this); + +static int +sh_full_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + int child_index = (long) cookie; + int call_count = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + gf_log (this->name, GF_LOG_TRACE, + "wrote %d bytes of data from %s to child %d, offset %"PRId64"", + op_ret, local->loc.path, child_index, sh->offset - op_ret); + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "write to %s failed on subvolume %s (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + sh->op_failed = 1; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + sh_full_read_write_iter (frame, this); + } + + return 0; +} + + +static int +sh_full_read_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct stat *buf, + struct iobref *iobref) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + int child_index = (long) cookie; + int i = 0; + int call_count = 0; + + off_t offset; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + call_count = sh->active_sinks; + + local->call_count = call_count; + + gf_log (this->name, GF_LOG_TRACE, + "read %d bytes of data from %s on child %d, offset %"PRId64"", + op_ret, local->loc.path, child_index, sh->offset); + + if (op_ret <= 0) { + local->self_heal.algo_completion_cbk (frame, this); + return 0; + } + + /* what if we read less than block size? */ + offset = sh->offset; + sh->offset += op_ret; + + if (sh->file_has_holes) { + if (iov_0filled (vector, count) == 0) { + /* the iter function depends on the + sh->offset already being updated + above + */ + + sh_full_read_write_iter (frame, this); + goto out; + } + } + + for (i = 0; i < priv->child_count; i++) { + if (sh->sources[i] || !local->child_up[i]) + continue; + + /* this is a sink, so write to it */ + STACK_WIND_COOKIE (frame, sh_full_write_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->writev, + sh->healing_fd, vector, count, offset, + iobref); + + if (!--call_count) + break; + } + +out: + return 0; +} + + +static int +sh_full_read_write (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + STACK_WIND_COOKIE (frame, sh_full_read_cbk, + (void *) (long) sh->source, + priv->children[sh->source], + priv->children[sh->source]->fops->readv, + sh->healing_fd, sh->block_size, + sh->offset); + + return 0; +} + + +static int +sh_full_read_write_iter (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + if (sh->op_failed) { + local->self_heal.algo_abort_cbk (frame, this); + goto out; + } + + if (sh->offset >= sh->file_size) { + gf_log (this->name, GF_LOG_TRACE, + "closing fd's of %s", + local->loc.path); + + local->self_heal.algo_completion_cbk (frame, this); + + goto out; + } + + sh_full_read_write (frame, this); + +out: + return 0; +} + + +int +afr_sh_algo_full (call_frame_t *frame, xlator_t *this) +{ + sh_full_read_write (frame, this); + return 0; +} + + +struct afr_sh_algorithm afr_self_heal_algorithms[] = { + {.name = "full", .fn = afr_sh_algo_full}, +}; diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h new file mode 100644 index 00000000000..646fd2ee7c1 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h @@ -0,0 +1,34 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#ifndef __AFR_SELF_HEAL_ALGORITHM_H__ +#define __AFR_SELF_HEAL_ALGORITHM_H__ + + +typedef int (*afr_sh_algo_fn) (call_frame_t *frame, + xlator_t *this); + +struct afr_sh_algorithm { + const char *name; + afr_sh_algo_fn fn; +}; + +struct afr_sh_algorithm afr_self_heal_algorithms[1]; + +#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index d1c01cf5e3a..5f08a005c3d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -47,7 +47,7 @@ #include "afr-transaction.h" #include "afr-self-heal.h" #include "afr-self-heal-common.h" - +#include "afr-self-heal-algorithm.h" int @@ -473,171 +473,10 @@ afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this) } -int -afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this); - -int -afr_sh_data_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *buf) -{ - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t *sh = NULL; - - int child_index = (long) cookie; - int call_count = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - gf_log (this->name, GF_LOG_TRACE, - "wrote %d bytes of data from %s to child %d, offset %"PRId64"", - op_ret, local->loc.path, child_index, sh->offset - op_ret); - - LOCK (&frame->lock); - { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "write to %s failed on subvolume %s (%s)", - local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); - sh->op_failed = 1; - } - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - afr_sh_data_read_write_iter (frame, this); - } - - return 0; -} - - -int -afr_sh_data_read_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, struct stat *buf, - struct iobref *iobref) -{ - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t *sh = NULL; - - int child_index = (long) cookie; - int i = 0; - int call_count = 0; - - off_t offset; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - call_count = sh->active_sinks; - - local->call_count = call_count; - - gf_log (this->name, GF_LOG_TRACE, - "read %d bytes of data from %s on child %d, offset %"PRId64"", - op_ret, local->loc.path, child_index, sh->offset); - - if (op_ret <= 0) { - afr_sh_data_trim_sinks (frame, this); - return 0; - } - - /* what if we read less than block size? */ - offset = sh->offset; - sh->offset += op_ret; - - if (sh->file_has_holes) { - if (iov_0filled (vector, count) == 0) { - /* the iter function depends on the - sh->offset already being updated - above - */ - afr_sh_data_read_write_iter (frame, this); - goto out; - } - } - - for (i = 0; i < priv->child_count; i++) { - if (sh->sources[i] || !local->child_up[i]) - continue; - - /* this is a sink, so write to it */ - STACK_WIND_COOKIE (frame, afr_sh_data_write_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->writev, - sh->healing_fd, vector, count, offset, - iobref); - - if (!--call_count) - break; - } - -out: - return 0; -} - - -int -afr_sh_data_read_write (call_frame_t *frame, xlator_t *this) -{ - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t *sh = NULL; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - STACK_WIND_COOKIE (frame, afr_sh_data_read_cbk, - (void *) (long) sh->source, - priv->children[sh->source], - priv->children[sh->source]->fops->readv, - sh->healing_fd, sh->block_size, - sh->offset); - - return 0; -} - - -int -afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this) +struct afr_sh_algorithm * +afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t *sh = NULL; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - if (sh->op_failed) { - afr_sh_data_finish (frame, this); - goto out; - } - - if (sh->offset >= sh->file_size) { - gf_log (this->name, GF_LOG_TRACE, - "closing fd's of %s", - local->loc.path); - afr_sh_data_trim_sinks (frame, this); - - goto out; - } - - afr_sh_data_read_write (frame, this); - -out: - return 0; + return &afr_self_heal_algorithms[0]; /* full */ } @@ -651,6 +490,8 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int call_count = 0; int child_index = 0; + struct afr_sh_algorithm *sh_algo = NULL; + local = frame->local; sh = &local->self_heal; priv = this->private; @@ -690,7 +531,12 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, "sourcing file %s from %s to other sinks", local->loc.path, priv->children[sh->source]->name); - afr_sh_data_read_write (frame, this); + sh->algo_completion_cbk = afr_sh_data_trim_sinks; + sh->algo_abort_cbk = afr_sh_data_finish; + + sh_algo = afr_sh_data_pick_algo (frame, this); + + sh_algo->fn (frame, this); } return 0; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index d9a3435c3f4..a7f980475f6 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -92,7 +92,13 @@ typedef struct { off_t offset; loc_t parent_loc; + + /* private data for the particular self-heal algorithm */ + void *private; + int (*completion_cbk) (call_frame_t *frame, xlator_t *this); + int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this); + int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this); call_frame_t *sh_frame; } afr_self_heal_t; -- cgit