summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2012-12-17 14:16:37 +0530
committerVijay Bellur <vbellur@redhat.com>2012-12-17 12:45:38 -0500
commit6cd78bb644a2ee1fb34185b5a86002ef4c536173 (patch)
tree1cb1be15cd5a3e6250b72130b0ee5ebe42d125c0
parent3dddca7eeb15c017a268c849ea6287f6e3a838bb (diff)
cluster/afr: Provide option to disable readdir failover
In a replica pair unlike files, directories may not have their content in same order, so readdir for same (offset, size) may not give same entries on both the sobvolumes of replica pair. Switching over from one subvolume to another may not be a good idea sometimes. It may lead to duplicate entries or fewer entries or both. This patch provides a way to disable readdir-failover so that applications like rebalance can retry if they want to. Change-Id: I02e5762e7f8a5847eaf54356e5d6b5f49fe6c609 BUG: 859387 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: https://code.engineering.redhat.com/gerrit/1989 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c1
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c55
-rw-r--r--xlators/cluster/afr/src/afr.c8
-rw-r--r--xlators/cluster/afr/src/afr.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1
5 files changed, 42 insertions, 25 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 3a8a522..0e1b6dc 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2292,6 +2292,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
pthread_mutex_init (&fd_ctx->delay_lock, NULL);
INIT_LIST_HEAD (&fd_ctx->paused_calls);
INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
if (ret)
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index b661cfa..c6628db 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -517,6 +517,9 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
+ if ((priv->readdir_failover == _gf_false) && (op_ret < 0))
+ goto out;
+
read_child = (long) cookie;
last_index = &local->cont.readdir.last_index;
fresh_children = local->fresh_children;
@@ -623,15 +626,14 @@ int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -656,29 +658,33 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
read_child = afr_inode_get_read_ctx (this, fd->inode,
local->fresh_children);
ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
if (ret < 0) {
op_errno = -ret;
goto out;
}
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
+
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
+ }
+
local->fd = fd_ref (fd);
local->cont.readdir.size = size;
local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
if (fd_ctx->last_tried != call_child) {
gf_log (this->name, GF_LOG_TRACE,
"first up child has changed from %d to %d, "
@@ -705,10 +711,9 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
children[call_child]->fops->readdirp, fd,
size, offset, dict);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 51c51e5..8c7f452 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -167,6 +167,8 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
uint32, out);
+ GF_OPTION_RECONF ("readdir-failover", priv->readdir_failover, options,
+ bool, out);
ret = 0;
out:
return ret;
@@ -294,6 +296,7 @@ init (xlator_t *this)
fix_quorum_options(this,priv,qtype);
GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out);
priv->wait_count = 1;
@@ -609,5 +612,10 @@ struct volume_options options[] = {
"post-operation phase of the transaction to "
"enhance overlap of adjacent write operations.",
},
+ { .key = {"readdir-failover"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "readdir(p) will not failover if this option is off",
+ .default_value = "on",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 554c89a..944f845 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -155,6 +155,7 @@ typedef struct _afr_private {
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
afr_self_heald_t shd;
+ gf_boolean_t readdir_failover;
} afr_private_t;
typedef struct {
@@ -741,6 +742,7 @@ typedef struct {
pthread_mutex_t delay_lock;
gf_timer_t *delay_timer;
call_frame_t *delay_frame;
+ int call_child;
} afr_fd_ctx_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 09c5519..60e4c6c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -136,6 +136,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"cluster.quorum-type", "cluster/replicate", "quorum-type", NULL, NO_DOC, 0},
{"cluster.quorum-count", "cluster/replicate", "quorum-count", NULL, NO_DOC, 0},
+ {"cluster.readdir-failover", "cluster/replicate", NULL, NULL, DOC, 0},
{"cluster.stripe-block-size", "cluster/stripe", "block-size", NULL, DOC, 0},
{"cluster.stripe-coalesce", "cluster/stripe", "coalesce", NULL, DOC, 0},