summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2012-11-05 21:42:22 +0530
committerVijay Bellur <vbellur@redhat.com>2012-12-03 00:11:02 -0800
commit2fd342a0c21d761d73bfee782717accbce819f24 (patch)
tree981acf4e2fe8f993b0bd520b7dba782ebd87b0de /xlators/cluster/afr/src
parent07c3801808db787e6c0cf0b2bf60a7ab62bc38b7 (diff)
cluster/afr: Provide option to disable readdir failover
In a replica pair unlike files, directories may not have their content in same order, so readdir for same (offset, size) may not give same entries on both the sobvolumes of replica pair. Switching over from one subvolume to another may not be a good idea sometimes. It may lead to duplicate entries or fewer entries or both. This patch provides a way to disable readdir-failover so that applications like rebalance can retry if they want to. Change-Id: I2b23eb224a2e84016a561362932613ac824c11a0 BUG: 859387 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/4159 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r--xlators/cluster/afr/src/afr-common.c1
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c55
-rw-r--r--xlators/cluster/afr/src/afr.c8
-rw-r--r--xlators/cluster/afr/src/afr.h2
4 files changed, 41 insertions, 25 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2e339986621..35201085444 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2462,6 +2462,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
pthread_mutex_init (&fd_ctx->delay_lock, NULL);
INIT_LIST_HEAD (&fd_ctx->paused_calls);
INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
if (ret)
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index ce91ffba729..c201d45fd68 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -497,6 +497,9 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
+ if ((priv->readdir_failover == _gf_false) && (op_ret < 0))
+ goto out;
+
read_child = (long) cookie;
last_index = &local->cont.readdir.last_index;
fresh_children = local->fresh_children;
@@ -593,15 +596,14 @@ int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -626,29 +628,33 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
read_child = afr_inode_get_read_ctx (this, fd->inode,
local->fresh_children);
ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
if (ret < 0) {
op_errno = -ret;
goto out;
}
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
+
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
+ }
+
local->fd = fd_ref (fd);
local->cont.readdir.size = size;
local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
if (fd_ctx->last_tried != call_child) {
gf_log (this->name, GF_LOG_TRACE,
"first up child has changed from %d to %d, "
@@ -675,10 +681,9 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
children[call_child]->fops->readdirp, fd,
size, offset, dict);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index c120ba57b5f..cdc7a609b4d 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -189,6 +189,8 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
options, size, out);
/* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("readdir-failover", priv->readdir_failover, options,
+ bool, out);
priv->did_discovery = _gf_false;
ret = 0;
@@ -332,6 +334,7 @@ init (xlator_t *this)
fix_quorum_options(this,priv,qtype);
GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out);
priv->wait_count = 1;
@@ -758,5 +761,10 @@ struct volume_options options[] = {
.max = 131072,
.default_value = "1KB",
},
+ { .key = {"readdir-failover"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "readdir(p) will not failover if this option is off",
+ .default_value = "on",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 48dfbf37eb8..7de8d82436c 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -160,6 +160,7 @@ typedef struct _afr_private {
afr_self_heald_t shd;
gf_boolean_t choose_local;
gf_boolean_t did_discovery;
+ gf_boolean_t readdir_failover;
uint64_t sh_readdir_size;
} afr_private_t;
@@ -710,6 +711,7 @@ typedef struct {
pthread_mutex_t delay_lock;
gf_timer_t *delay_timer;
call_frame_t *delay_frame;
+ int call_child;
} afr_fd_ctx_t;