summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorVikas Gorur <vikas@gluster.com>2009-09-17 05:56:30 +0000
committerAnand V. Avati <avati@dev.gluster.com>2009-09-22 06:13:34 -0700
commit8c224de82b9b3e75f2dd9c264d5d3726dd1ef379 (patch)
tree7a0edbc676b601cc08802dd4680ef816f4fe628c /xlators
parent356449c0b39d600a16b195df30d0fc37693575f8 (diff)
cluster/afr: Make the self-heal algorithm pluggable.
Abstract the read/write loop part of data self-heal. This patch has support for the "full" (i.e., read and write entire file) algorithm. Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/Makefile.am4
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c233
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h34
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c178
-rw-r--r--xlators/cluster/afr/src/afr.h6
5 files changed, 287 insertions, 168 deletions
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 1bde9e5bad7..df284d12cf7 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -3,10 +3,10 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
afr_la_LDFLAGS = -module -avoidversion
-afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c
+afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
new file mode 100644
index 00000000000..bc3917caca0
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -0,0 +1,233 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "xlator.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "md5.h"
+
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+#include "afr-self-heal-algorithm.h"
+
+/*
+ This file contains the various self-heal algorithms
+*/
+
+
+/*
+ The "full" algorithm. Copies the entire file from
+ source to sinks.
+*/
+
+static int
+sh_full_read_write_iter (call_frame_t *frame, xlator_t *this);
+
+static int
+sh_full_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ int child_index = (long) cookie;
+ int call_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
+ op_ret, local->loc.path, child_index, sh->offset - op_ret);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "write to %s failed on subvolume %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->op_failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ sh_full_read_write_iter (frame, this);
+ }
+
+ return 0;
+}
+
+
+static int
+sh_full_read_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct stat *buf,
+ struct iobref *iobref)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ int child_index = (long) cookie;
+ int i = 0;
+ int call_count = 0;
+
+ off_t offset;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ call_count = sh->active_sinks;
+
+ local->call_count = call_count;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "read %d bytes of data from %s on child %d, offset %"PRId64"",
+ op_ret, local->loc.path, child_index, sh->offset);
+
+ if (op_ret <= 0) {
+ local->self_heal.algo_completion_cbk (frame, this);
+ return 0;
+ }
+
+ /* what if we read less than block size? */
+ offset = sh->offset;
+ sh->offset += op_ret;
+
+ if (sh->file_has_holes) {
+ if (iov_0filled (vector, count) == 0) {
+ /* the iter function depends on the
+ sh->offset already being updated
+ above
+ */
+
+ sh_full_read_write_iter (frame, this);
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] || !local->child_up[i])
+ continue;
+
+ /* this is a sink, so write to it */
+ STACK_WIND_COOKIE (frame, sh_full_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ sh->healing_fd, vector, count, offset,
+ iobref);
+
+ if (!--call_count)
+ break;
+ }
+
+out:
+ return 0;
+}
+
+
+static int
+sh_full_read_write (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ STACK_WIND_COOKIE (frame, sh_full_read_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->readv,
+ sh->healing_fd, sh->block_size,
+ sh->offset);
+
+ return 0;
+}
+
+
+static int
+sh_full_read_write_iter (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (sh->op_failed) {
+ local->self_heal.algo_abort_cbk (frame, this);
+ goto out;
+ }
+
+ if (sh->offset >= sh->file_size) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "closing fd's of %s",
+ local->loc.path);
+
+ local->self_heal.algo_completion_cbk (frame, this);
+
+ goto out;
+ }
+
+ sh_full_read_write (frame, this);
+
+out:
+ return 0;
+}
+
+
+int
+afr_sh_algo_full (call_frame_t *frame, xlator_t *this)
+{
+ sh_full_read_write (frame, this);
+ return 0;
+}
+
+
+struct afr_sh_algorithm afr_self_heal_algorithms[] = {
+ {.name = "full", .fn = afr_sh_algo_full},
+};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
new file mode 100644
index 00000000000..646fd2ee7c1
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
+#define __AFR_SELF_HEAL_ALGORITHM_H__
+
+
+typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
+ xlator_t *this);
+
+struct afr_sh_algorithm {
+ const char *name;
+ afr_sh_algo_fn fn;
+};
+
+struct afr_sh_algorithm afr_self_heal_algorithms[1];
+
+#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index d1c01cf5e3a..5f08a005c3d 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -47,7 +47,7 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
-
+#include "afr-self-heal-algorithm.h"
int
@@ -473,171 +473,10 @@ afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
}
-int
-afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, local->loc.path, child_index, sh->offset - op_ret);
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "write to %s failed on subvolume %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_read_write_iter (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_data_read_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct stat *buf,
- struct iobref *iobref)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int child_index = (long) cookie;
- int i = 0;
- int call_count = 0;
-
- off_t offset;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = sh->active_sinks;
-
- local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s on child %d, offset %"PRId64"",
- op_ret, local->loc.path, child_index, sh->offset);
-
- if (op_ret <= 0) {
- afr_sh_data_trim_sinks (frame, this);
- return 0;
- }
-
- /* what if we read less than block size? */
- offset = sh->offset;
- sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
- afr_sh_data_read_write_iter (frame, this);
- goto out;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
- STACK_WIND_COOKIE (frame, afr_sh_data_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
-
- if (!--call_count)
- break;
- }
-
-out:
- return 0;
-}
-
-
-int
-afr_sh_data_read_write (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_read_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- sh->offset);
-
- return 0;
-}
-
-
-int
-afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this)
+struct afr_sh_algorithm *
+afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- goto out;
- }
-
- if (sh->offset >= sh->file_size) {
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd's of %s",
- local->loc.path);
- afr_sh_data_trim_sinks (frame, this);
-
- goto out;
- }
-
- afr_sh_data_read_write (frame, this);
-
-out:
- return 0;
+ return &afr_self_heal_algorithms[0]; /* full */
}
@@ -651,6 +490,8 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_count = 0;
int child_index = 0;
+ struct afr_sh_algorithm *sh_algo = NULL;
+
local = frame->local;
sh = &local->self_heal;
priv = this->private;
@@ -690,7 +531,12 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"sourcing file %s from %s to other sinks",
local->loc.path, priv->children[sh->source]->name);
- afr_sh_data_read_write (frame, this);
+ sh->algo_completion_cbk = afr_sh_data_trim_sinks;
+ sh->algo_abort_cbk = afr_sh_data_finish;
+
+ sh_algo = afr_sh_data_pick_algo (frame, this);
+
+ sh_algo->fn (frame, this);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index d9a3435c3f4..a7f980475f6 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -92,7 +92,13 @@ typedef struct {
off_t offset;
loc_t parent_loc;
+
+ /* private data for the particular self-heal algorithm */
+ void *private;
+
int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
call_frame_t *sh_frame;
} afr_self_heal_t;