diff options
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 193 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-messages.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-open.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 26 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 25 | 
9 files changed, 229 insertions, 41 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9b2c0d7caea..dec667fd460 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -43,6 +43,20 @@  #include "afr-self-heald.h"  #include "afr-messages.h" +gf_boolean_t +afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, +                               int32_t *op_errno) +{ +        if (priv->consistent_io && local->call_count != priv->child_count) { +                gf_msg (THIS->name, GF_LOG_INFO, 0, +                        AFR_MSG_SUBVOLS_DOWN, "All subvolumes are not up"); +                if (op_errno) +                        *op_errno = ENOTCONN; +                return _gf_false; +        } +        return _gf_true; +} +  call_frame_t *  afr_copy_frame (call_frame_t *base)  { @@ -1555,6 +1569,100 @@ afr_remove_eager_lock_stub (afr_local_t *local)          UNLOCK (&local->fd->lock);  } +static gf_boolean_t +afr_entrylk_is_unlock (entrylk_cmd cmd) +{ +        if (ENTRYLK_UNLOCK == cmd) +                return _gf_true; +        return _gf_false; +} + +static gf_boolean_t +afr_inodelk_is_unlock (int32_t cmd, struct gf_flock *flock) +{ +        switch (cmd) { +        case F_SETLKW: +        case F_SETLK: +                if (F_UNLCK == flock->l_type) +                        return _gf_true; +                break; +        default: +                return _gf_false; +        } +        return _gf_false; +} + +static gf_boolean_t +afr_lk_is_unlock (int32_t cmd, struct gf_flock *flock) +{ +        switch (cmd) { +        case F_RESLK_UNLCK: +                return _gf_true; +                break; + +#if F_SETLKW != F_SETLKW64 +        case F_SETLKW64: +#endif +        case F_SETLKW: + +#if F_SETLK != F_SETLK64 +        case F_SETLK64: +#endif +        case F_SETLK: +                if (F_UNLCK == flock->l_type) +                        return _gf_true; +                break; +        default: +                return _gf_false; +        } +        return _gf_false; +} + +void +afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret, +                             int32_t *op_errno) +{ +        afr_private_t *priv = NULL; +        afr_local_t   *local = NULL; + +        if (!frame || !frame->this || !frame->local || !frame->this->private) +                return; + +        if (*op_ret < 0) +                return; + +        /* Failing inodelk/entrylk/lk here is not a good idea because we +         * need to cleanup the locks on the other bricks if we choose to fail +         * the fop here. The brick may go down just after unwind happens as well +         * so anyways the fop will fail when the next fop is sent so leaving +         * it like this for now.*/ +        local = frame->local; +        switch (local->op) { +        case GF_FOP_LOOKUP: +        case GF_FOP_INODELK: +        case GF_FOP_FINODELK: +        case GF_FOP_ENTRYLK: +        case GF_FOP_FENTRYLK: +        case GF_FOP_LK: +                return; +        default: +                break; +        } + +        priv = frame->this->private; +        if (!priv->consistent_io) +                return; + +        if (local->event_generation && +            (local->event_generation != priv->event_generation)) +                goto inconsistent; + +        return; +inconsistent: +        *op_ret = -1; +        *op_errno = ENOTCONN; +} +  void  afr_local_cleanup (afr_local_t *local, xlator_t *this)  { @@ -2997,10 +3105,9 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)  	if (!local)  		goto out; -	if (!local->call_count) { -		op_errno = ENOTCONN; +        local->op = GF_FOP_FLUSH; +	if (!afr_is_consistent_io_possible (local, this->private, &op_errno))  		goto out; -	}  	local->fd = fd_ref(fd); @@ -3126,11 +3233,9 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,  	if (!local)  		goto out; -        call_count = local->call_count; -	if (!call_count) { -		op_errno = ENOTCONN; +        local->op = GF_FOP_FSYNC; +	if (!afr_is_consistent_io_possible (local, priv, &op_errno))  		goto out; -	}          local->fd = fd_ref (fd); @@ -3140,6 +3245,7 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,  	local->inode = inode_ref (fd->inode); +        call_count = local->call_count;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) {                          STACK_WIND_COOKIE (frame, afr_fsync_cbk, @@ -3210,12 +3316,11 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,  	if (!local)  		goto out; -        call_count = local->call_count; -	if (!call_count) { -		op_errno = ENOTCONN; +        local->op = GF_FOP_FSYNCDIR; +	if (!afr_is_consistent_io_possible (local, priv, &op_errno))  		goto out; -	} +        call_count = local->call_count;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) {                          STACK_WIND (frame, afr_fsyncdir_cbk, @@ -3506,6 +3611,11 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,          if (!local)                  goto out; +        local->op = GF_FOP_INODELK; +        if (!afr_inodelk_is_unlock (cmd, flock) && +            !afr_is_consistent_io_possible (local, this->private, &op_errno)) +                goto out; +          loc_copy (&local->loc, loc);          local->cont.inodelk.volume = gf_strdup (volume);          if (!local->cont.inodelk.volume) { @@ -3589,12 +3699,23 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,  	if (!local)  		goto out; -        call_count = local->call_count; -	if (!call_count) { -		op_errno = ENOTCONN; -		goto out; -	} +        local->op = GF_FOP_FINODELK; +        if (!afr_inodelk_is_unlock (cmd, flock) && +            !afr_is_consistent_io_possible (local, this->private, &op_errno)) +                goto out; +        local->cont.inodelk.volume = gf_strdup (volume); +        if (!local->cont.inodelk.volume) { +                op_errno = ENOMEM; +                goto out; +        } + +        local->fd = fd_ref (fd); +        local->cont.inodelk.cmd = cmd; +        local->cont.inodelk.flock = *flock; +        if (xdata) +                local->xdata_req = dict_ref (xdata); +        call_count = local->call_count;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) {                          STACK_WIND (frame, afr_finodelk_cbk, @@ -3610,7 +3731,6 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,  	return 0;  out:  	AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL); -          return 0;  } @@ -3642,7 +3762,6 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          return 0;  } -  int  afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,  	     loc_t *loc, const char *basename, entrylk_cmd cmd, @@ -3660,12 +3779,13 @@ afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,  	if (!local)  		goto out; -        call_count = local->call_count; -	if (!call_count) { -		op_errno = ENOTCONN; -		goto out; -	} +        local->op = GF_FOP_ENTRYLK; +        if (!afr_entrylk_is_unlock (cmd) && +            !afr_is_consistent_io_possible (local, priv, &op_errno)) +                goto out; +        local->cont.entrylk.cmd = cmd; +        call_count = local->call_count;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) {                          STACK_WIND (frame, afr_entrylk_cbk, @@ -3733,12 +3853,13 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,  	if (!local)  		goto out; -        call_count = local->call_count; -	if (!call_count) { -		op_errno = ENOTCONN; -		goto out; -	} +        local->op = GF_FOP_FENTRYLK; +        if (!afr_entrylk_is_unlock (cmd) && +            !afr_is_consistent_io_possible (local, priv, &op_errno)) +                goto out; +        local->cont.entrylk.cmd = cmd; +        call_count = local->call_count;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) {                          STACK_WIND (frame, afr_fentrylk_cbk, @@ -3823,6 +3944,10 @@ afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)  	if (!local)  		goto out; +        local->op = GF_FOP_STATFS; +	if (!afr_is_consistent_io_possible (local, priv, &op_errno)) +		goto out; +          if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX])                  local->call_count--;          call_count = local->call_count; @@ -3963,7 +4088,6 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          return 0;  } -  int  afr_lk (call_frame_t *frame, xlator_t *this,          fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) @@ -3979,6 +4103,11 @@ afr_lk (call_frame_t *frame, xlator_t *this,          if (!local)                  goto out; +        local->op = GF_FOP_LK; +        if (!afr_lk_is_unlock (cmd, flock) && +            !afr_is_consistent_io_possible (local, priv, &op_errno)) +                goto out; +          local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,                                                   sizeof (*local->cont.lk.locked_nodes),                                                   gf_afr_mt_char); @@ -4311,7 +4440,7 @@ afr_notify (xlator_t *this, int32_t event,                                          down_children++;                          if (down_children == priv->child_count) {                                  gf_msg (this->name, GF_LOG_ERROR, 0, -                                        AFR_MSG_ALL_SUBVOLS_DOWN, +                                        AFR_MSG_SUBVOLS_DOWN,                                         "All subvolumes are down. Going offline "                                      "until atleast one of them comes back up.");                          } else { @@ -4399,7 +4528,6 @@ out:          return ret;  } -  int  afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)  { @@ -4422,11 +4550,12 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)          local->call_count = AFR_COUNT (local->child_up, priv->child_count);          if (local->call_count == 0) {                  gf_msg (THIS->name, GF_LOG_INFO, 0, -                        AFR_MSG_ALL_SUBVOLS_DOWN, "no subvolumes up"); +                        AFR_MSG_SUBVOLS_DOWN, "no subvolumes up");                  if (op_errno)                          *op_errno = ENOTCONN;                  goto out;          } +  	local->event_generation = priv->event_generation;  	local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char), diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 2260e5dac26..4e29171482a 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -88,6 +88,10 @@ afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)  	if (!local)  		goto out; +        local->op = GF_FOP_OPENDIR; +        if (!afr_is_consistent_io_possible (local, priv, &op_errno)) +		goto out; +  	fd_ctx = afr_fd_ctx_get (fd, this);  	if (!fd_ctx)  		goto out; diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index c2a5f526c08..718ba318cfe 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1622,7 +1622,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)                  if (!call_count) {                          gf_msg (this->name, GF_LOG_INFO, 0, -                                AFR_MSG_ALL_SUBVOLS_DOWN, +                                AFR_MSG_SUBVOLS_DOWN,                                  "All bricks are down, aborting.");                          afr_unlock (frame, this);                          goto out; diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index c7af18d0f25..5fb81c696d8 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -93,11 +93,11 @@  /*!   * @messageid 108006 - * @diagnosis All bricks of a replica set are down. Data residing in that + * @diagnosis bricks of a replica set are down. Data residing in that   * replica cannot be accessed until one of the bricks come back up.   * @recommendedaction Ensure that the bricks are up.   */ -#define AFR_MSG_ALL_SUBVOLS_DOWN        (GLFS_COMP_BASE_AFR + 6) +#define AFR_MSG_SUBVOLS_DOWN            (GLFS_COMP_BASE_AFR + 6)  /*! diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 059d3f9bd71..7a628350c34 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -130,12 +130,16 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,  	if (!local)  		goto out; +        local->op = GF_FOP_OPEN;  	fd_ctx = afr_fd_ctx_get (fd, this);  	if (!fd_ctx) {  		op_errno = ENOMEM;  		goto out;  	} +        if (!afr_is_consistent_io_possible (local, priv, &op_errno)) +		goto out; +          local->fd = fd_ref (fd);  	local->fd_ctx = fd_ctx;  	fd_ctx->flags = flags; diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 74749f029c8..cb81af42510 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -217,6 +217,12 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,                  goto read;          } +        if (!afr_is_consistent_io_possible (local, priv, &local->op_errno)) { +                local->op_ret = -1; +                read_subvol = -1; +                goto read; +        } +  	local->transaction.type = type;          ret = afr_inode_read_subvol_get (inode, this, data, metadata,                                           &event_generation); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 6130ad76543..64a42d9fc7e 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -207,6 +207,7 @@ afr_transaction_detach_fop_frame (call_frame_t *frame)          local = frame->local; +        afr_handle_inconsistent_fop (frame, &local->op_ret, &local->op_errno);          LOCK (&frame->lock);          {                  fop_frame = local->transaction.main_frame; @@ -2238,6 +2239,11 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)          local->transaction.resume = afr_transaction_resume;          local->transaction.type   = type; +        if (!afr_is_consistent_io_possible (local, priv, &ret)) { +                ret = -ret; /*op_errno to ret conversion*/ +                goto out; +        } +          ret = afr_transaction_local_init (local, this);          if (ret < 0)                  goto out; diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index da62564e93a..48beaf24a6e 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -132,6 +132,7 @@ reconfigure (xlator_t *this, dict_t *options)          int            index       = -1;          char          *qtype       = NULL;          char          *fav_child_policy = NULL; +        gf_boolean_t   consistent_io = _gf_false;          priv = this->private; @@ -258,6 +259,11 @@ reconfigure (xlator_t *this, dict_t *options)          priv->did_discovery = _gf_false; +        GF_OPTION_RECONF ("consistent-io", consistent_io, options, bool, out); +        if (priv->quorum_count != 0) +                consistent_io = _gf_false; +        priv->consistent_io = consistent_io; +          ret = 0;  out:          return ret; @@ -494,6 +500,10 @@ init (xlator_t *this)          GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);          GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool,                          out); +        GF_OPTION_INIT ("consistent-io", priv->consistent_io, bool, out); + +        if (priv->quorum_count != 0) +                priv->consistent_io = _gf_false;          priv->wait_count = 1; @@ -594,14 +604,11 @@ fini (xlator_t *this)  struct xlator_fops fops = {          .lookup      = afr_lookup, -        .open        = afr_open,          .lk          = afr_lk,          .flush       = afr_flush,          .statfs      = afr_statfs,          .fsync       = afr_fsync,          .fsyncdir    = afr_fsyncdir, -        .xattrop     = afr_xattrop, -        .fxattrop    = afr_fxattrop,          .inodelk     = afr_inodelk,          .finodelk    = afr_finodelk,          .entrylk     = afr_entrylk, @@ -629,9 +636,14 @@ struct xlator_fops fops = {          .fallocate   = afr_fallocate,          .discard     = afr_discard,          .zerofill    = afr_zerofill, +        .xattrop     = afr_xattrop, +        .fxattrop    = afr_fxattrop, -        /* dir read */ +        /*inode open*/          .opendir     = afr_opendir, +        .open        = afr_open, + +        /* dir read */          .readdir     = afr_readdir,          .readdirp    = afr_readdirp, @@ -986,5 +998,11 @@ struct volume_options options[] = {                           " with identical mtime and size in more than half the "                           "number of bricks in the replica.",          }, +        { .key = {"consistent-io"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "no", +          .description = "If this option is enabled, i/o will fail even if " +                         "one of the bricks is down in the replicas", +        },          { .key  = {NULL} },  }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 29008287e6d..983f07fcce9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -152,6 +152,7 @@ typedef struct _afr_private {  	gf_boolean_t           use_afr_in_pump;  	char                   *locking_scheme;          gf_boolean_t            esh_granular; +        gf_boolean_t           consistent_io;  } afr_private_t; @@ -663,6 +664,10 @@ typedef struct _afr_local {                  } inodelk;                  struct { +                        entrylk_cmd cmd; +                } entrylk; + +                struct {                          off_t offset;                          gf_seek_what_t what;                  } seek; @@ -965,16 +970,25 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);  int  afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd); -#define AFR_STACK_UNWIND(fop, frame, params ...)                \ +#define AFR_STACK_UNWIND(fop, frame, op_ret, op_errno, params ...)\          do {                                                    \                  afr_local_t *__local = NULL;                    \                  xlator_t    *__this = NULL;                     \ +                int32_t     __op_ret   = 0;                     \ +                int32_t     __op_errno = 0;                     \ +                                                                \ +                __op_ret = op_ret;                              \ +                __op_errno = op_errno;                          \                  if (frame) {                                    \                          __local = frame->local;                 \                          __this = frame->this;                   \ +                        afr_handle_inconsistent_fop (frame, &__op_ret,\ +                                                     &__op_errno);\                          frame->local = NULL;                    \                  }                                               \ -                STACK_UNWIND_STRICT (fop, frame, params);       \ +                                                                \ +                STACK_UNWIND_STRICT (fop, frame, __op_ret,      \ +                                     __op_errno, params);       \                  if (__local) {                                  \                          afr_local_cleanup (__local, __this);    \                          mem_put (__local);                      \ @@ -1160,4 +1174,11 @@ afr_get_msg_id (char *op_type);  int  afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,                               inode_t *inode); + +gf_boolean_t +afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, +                               int32_t *op_errno); +void +afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret, +                             int32_t *op_errno);  #endif /* __AFR_H__ */  | 
