summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/cluster/afr/src/afr-common.c193
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c4
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-messages.h4
-rw-r--r--xlators/cluster/afr/src/afr-open.c4
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c6
-rw-r--r--xlators/cluster/afr/src/afr.c26
-rw-r--r--xlators/cluster/afr/src/afr.h25
9 files changed, 229 insertions, 41 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 9b2c0d7caea..dec667fd460 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -43,6 +43,20 @@
#include "afr-self-heald.h"
#include "afr-messages.h"
+gf_boolean_t
+afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno)
+{
+ if (priv->consistent_io && local->call_count != priv->child_count) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ AFR_MSG_SUBVOLS_DOWN, "All subvolumes are not up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
call_frame_t *
afr_copy_frame (call_frame_t *base)
{
@@ -1555,6 +1569,100 @@ afr_remove_eager_lock_stub (afr_local_t *local)
UNLOCK (&local->fd->lock);
}
+static gf_boolean_t
+afr_entrylk_is_unlock (entrylk_cmd cmd)
+{
+ if (ENTRYLK_UNLOCK == cmd)
+ return _gf_true;
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_inodelk_is_unlock (int32_t cmd, struct gf_flock *flock)
+{
+ switch (cmd) {
+ case F_SETLKW:
+ case F_SETLK:
+ if (F_UNLCK == flock->l_type)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_lk_is_unlock (int32_t cmd, struct gf_flock *flock)
+{
+ switch (cmd) {
+ case F_RESLK_UNLCK:
+ return _gf_true;
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ if (F_UNLCK == flock->l_type)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
+
+void
+afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ if (!frame || !frame->this || !frame->local || !frame->this->private)
+ return;
+
+ if (*op_ret < 0)
+ return;
+
+ /* Failing inodelk/entrylk/lk here is not a good idea because we
+ * need to cleanup the locks on the other bricks if we choose to fail
+ * the fop here. The brick may go down just after unwind happens as well
+ * so anyways the fop will fail when the next fop is sent so leaving
+ * it like this for now.*/
+ local = frame->local;
+ switch (local->op) {
+ case GF_FOP_LOOKUP:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_LK:
+ return;
+ default:
+ break;
+ }
+
+ priv = frame->this->private;
+ if (!priv->consistent_io)
+ return;
+
+ if (local->event_generation &&
+ (local->event_generation != priv->event_generation))
+ goto inconsistent;
+
+ return;
+inconsistent:
+ *op_ret = -1;
+ *op_errno = ENOTCONN;
+}
+
void
afr_local_cleanup (afr_local_t *local, xlator_t *this)
{
@@ -2997,10 +3105,9 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
if (!local)
goto out;
- if (!local->call_count) {
- op_errno = ENOTCONN;
+ local->op = GF_FOP_FLUSH;
+ if (!afr_is_consistent_io_possible (local, this->private, &op_errno))
goto out;
- }
local->fd = fd_ref(fd);
@@ -3126,11 +3233,9 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
+ local->op = GF_FOP_FSYNC;
+ if (!afr_is_consistent_io_possible (local, priv, &op_errno))
goto out;
- }
local->fd = fd_ref (fd);
@@ -3140,6 +3245,7 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
local->inode = inode_ref (fd->inode);
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_fsync_cbk,
@@ -3210,12 +3316,11 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
+ local->op = GF_FOP_FSYNCDIR;
+ if (!afr_is_consistent_io_possible (local, priv, &op_errno))
goto out;
- }
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fsyncdir_cbk,
@@ -3506,6 +3611,11 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
if (!local)
goto out;
+ local->op = GF_FOP_INODELK;
+ if (!afr_inodelk_is_unlock (cmd, flock) &&
+ !afr_is_consistent_io_possible (local, this->private, &op_errno))
+ goto out;
+
loc_copy (&local->loc, loc);
local->cont.inodelk.volume = gf_strdup (volume);
if (!local->cont.inodelk.volume) {
@@ -3589,12 +3699,23 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
if (!local)
goto out;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local->op = GF_FOP_FINODELK;
+ if (!afr_inodelk_is_unlock (cmd, flock) &&
+ !afr_is_consistent_io_possible (local, this->private, &op_errno))
+ goto out;
+ local->cont.inodelk.volume = gf_strdup (volume);
+ if (!local->cont.inodelk.volume) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_finodelk_cbk,
@@ -3610,7 +3731,6 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
return 0;
out:
AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
-
return 0;
}
@@ -3642,7 +3762,6 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
int
afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
@@ -3660,12 +3779,13 @@ afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
if (!local)
goto out;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local->op = GF_FOP_ENTRYLK;
+ if (!afr_entrylk_is_unlock (cmd) &&
+ !afr_is_consistent_io_possible (local, priv, &op_errno))
+ goto out;
+ local->cont.entrylk.cmd = cmd;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_entrylk_cbk,
@@ -3733,12 +3853,13 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
if (!local)
goto out;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local->op = GF_FOP_FENTRYLK;
+ if (!afr_entrylk_is_unlock (cmd) &&
+ !afr_is_consistent_io_possible (local, priv, &op_errno))
+ goto out;
+ local->cont.entrylk.cmd = cmd;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fentrylk_cbk,
@@ -3823,6 +3944,10 @@ afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (!local)
goto out;
+ local->op = GF_FOP_STATFS;
+ if (!afr_is_consistent_io_possible (local, priv, &op_errno))
+ goto out;
+
if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX])
local->call_count--;
call_count = local->call_count;
@@ -3963,7 +4088,6 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
int
afr_lk (call_frame_t *frame, xlator_t *this,
fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
@@ -3979,6 +4103,11 @@ afr_lk (call_frame_t *frame, xlator_t *this,
if (!local)
goto out;
+ local->op = GF_FOP_LK;
+ if (!afr_lk_is_unlock (cmd, flock) &&
+ !afr_is_consistent_io_possible (local, priv, &op_errno))
+ goto out;
+
local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
sizeof (*local->cont.lk.locked_nodes),
gf_afr_mt_char);
@@ -4311,7 +4440,7 @@ afr_notify (xlator_t *this, int32_t event,
down_children++;
if (down_children == priv->child_count) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_SUBVOLS_DOWN,
"All subvolumes are down. Going offline "
"until atleast one of them comes back up.");
} else {
@@ -4399,7 +4528,6 @@ out:
return ret;
}
-
int
afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
{
@@ -4422,11 +4550,12 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
local->call_count = AFR_COUNT (local->child_up, priv->child_count);
if (local->call_count == 0) {
gf_msg (THIS->name, GF_LOG_INFO, 0,
- AFR_MSG_ALL_SUBVOLS_DOWN, "no subvolumes up");
+ AFR_MSG_SUBVOLS_DOWN, "no subvolumes up");
if (op_errno)
*op_errno = ENOTCONN;
goto out;
}
+
local->event_generation = priv->event_generation;
local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char),
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 2260e5dac26..4e29171482a 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -88,6 +88,10 @@ afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
if (!local)
goto out;
+ local->op = GF_FOP_OPENDIR;
+ if (!afr_is_consistent_io_possible (local, priv, &op_errno))
+ goto out;
+
fd_ctx = afr_fd_ctx_get (fd, this);
if (!fd_ctx)
goto out;
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index c2a5f526c08..718ba318cfe 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1622,7 +1622,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
if (!call_count) {
gf_msg (this->name, GF_LOG_INFO, 0,
- AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_SUBVOLS_DOWN,
"All bricks are down, aborting.");
afr_unlock (frame, this);
goto out;
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index c7af18d0f25..5fb81c696d8 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -93,11 +93,11 @@
/*!
* @messageid 108006
- * @diagnosis All bricks of a replica set are down. Data residing in that
+ * @diagnosis bricks of a replica set are down. Data residing in that
* replica cannot be accessed until one of the bricks come back up.
* @recommendedaction Ensure that the bricks are up.
*/
-#define AFR_MSG_ALL_SUBVOLS_DOWN (GLFS_COMP_BASE_AFR + 6)
+#define AFR_MSG_SUBVOLS_DOWN (GLFS_COMP_BASE_AFR + 6)
/*!
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 059d3f9bd71..7a628350c34 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -130,12 +130,16 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (!local)
goto out;
+ local->op = GF_FOP_OPEN;
fd_ctx = afr_fd_ctx_get (fd, this);
if (!fd_ctx) {
op_errno = ENOMEM;
goto out;
}
+ if (!afr_is_consistent_io_possible (local, priv, &op_errno))
+ goto out;
+
local->fd = fd_ref (fd);
local->fd_ctx = fd_ctx;
fd_ctx->flags = flags;
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 74749f029c8..cb81af42510 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -217,6 +217,12 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
goto read;
}
+ if (!afr_is_consistent_io_possible (local, priv, &local->op_errno)) {
+ local->op_ret = -1;
+ read_subvol = -1;
+ goto read;
+ }
+
local->transaction.type = type;
ret = afr_inode_read_subvol_get (inode, this, data, metadata,
&event_generation);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 6130ad76543..64a42d9fc7e 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -207,6 +207,7 @@ afr_transaction_detach_fop_frame (call_frame_t *frame)
local = frame->local;
+ afr_handle_inconsistent_fop (frame, &local->op_ret, &local->op_errno);
LOCK (&frame->lock);
{
fop_frame = local->transaction.main_frame;
@@ -2238,6 +2239,11 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
local->transaction.resume = afr_transaction_resume;
local->transaction.type = type;
+ if (!afr_is_consistent_io_possible (local, priv, &ret)) {
+ ret = -ret; /*op_errno to ret conversion*/
+ goto out;
+ }
+
ret = afr_transaction_local_init (local, this);
if (ret < 0)
goto out;
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index da62564e93a..48beaf24a6e 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -132,6 +132,7 @@ reconfigure (xlator_t *this, dict_t *options)
int index = -1;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ gf_boolean_t consistent_io = _gf_false;
priv = this->private;
@@ -258,6 +259,11 @@ reconfigure (xlator_t *this, dict_t *options)
priv->did_discovery = _gf_false;
+ GF_OPTION_RECONF ("consistent-io", consistent_io, options, bool, out);
+ if (priv->quorum_count != 0)
+ consistent_io = _gf_false;
+ priv->consistent_io = consistent_io;
+
ret = 0;
out:
return ret;
@@ -494,6 +500,10 @@ init (xlator_t *this)
GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);
GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool,
out);
+ GF_OPTION_INIT ("consistent-io", priv->consistent_io, bool, out);
+
+ if (priv->quorum_count != 0)
+ priv->consistent_io = _gf_false;
priv->wait_count = 1;
@@ -594,14 +604,11 @@ fini (xlator_t *this)
struct xlator_fops fops = {
.lookup = afr_lookup,
- .open = afr_open,
.lk = afr_lk,
.flush = afr_flush,
.statfs = afr_statfs,
.fsync = afr_fsync,
.fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
.inodelk = afr_inodelk,
.finodelk = afr_finodelk,
.entrylk = afr_entrylk,
@@ -629,9 +636,14 @@ struct xlator_fops fops = {
.fallocate = afr_fallocate,
.discard = afr_discard,
.zerofill = afr_zerofill,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
- /* dir read */
+ /*inode open*/
.opendir = afr_opendir,
+ .open = afr_open,
+
+ /* dir read */
.readdir = afr_readdir,
.readdirp = afr_readdirp,
@@ -986,5 +998,11 @@ struct volume_options options[] = {
" with identical mtime and size in more than half the "
"number of bricks in the replica.",
},
+ { .key = {"consistent-io"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If this option is enabled, i/o will fail even if "
+ "one of the bricks is down in the replicas",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 29008287e6d..983f07fcce9 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -152,6 +152,7 @@ typedef struct _afr_private {
gf_boolean_t use_afr_in_pump;
char *locking_scheme;
gf_boolean_t esh_granular;
+ gf_boolean_t consistent_io;
} afr_private_t;
@@ -663,6 +664,10 @@ typedef struct _afr_local {
} inodelk;
struct {
+ entrylk_cmd cmd;
+ } entrylk;
+
+ struct {
off_t offset;
gf_seek_what_t what;
} seek;
@@ -965,16 +970,25 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
int
afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
-#define AFR_STACK_UNWIND(fop, frame, params ...) \
+#define AFR_STACK_UNWIND(fop, frame, op_ret, op_errno, params ...)\
do { \
afr_local_t *__local = NULL; \
xlator_t *__this = NULL; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
if (frame) { \
__local = frame->local; \
__this = frame->this; \
+ afr_handle_inconsistent_fop (frame, &__op_ret,\
+ &__op_errno);\
frame->local = NULL; \
} \
- STACK_UNWIND_STRICT (fop, frame, params); \
+ \
+ STACK_UNWIND_STRICT (fop, frame, __op_ret, \
+ __op_errno, params); \
if (__local) { \
afr_local_cleanup (__local, __this); \
mem_put (__local); \
@@ -1160,4 +1174,11 @@ afr_get_msg_id (char *op_type);
int
afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
inode_t *inode);
+
+gf_boolean_t
+afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno);
+void
+afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno);
#endif /* __AFR_H__ */