diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2015-06-25 00:22:41 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2015-06-26 19:30:33 -0700 | 
| commit | 0104b5869d89ac58cb13e10417626455c8ba2143 (patch) | |
| tree | 5db665e349e5787e308487f8474652cd346df93a | |
| parent | b335fbe14e0afbec3cf50409707f3c8df5d5c01d (diff) | |
afr: Block fops when file is in split-brain
For directories, block metadata FOPS.
For non-directories, block data and metadata FOPS.
Do not block entry FOPS.
Change-Id: Id7f656f4a513b9d33c457dd7f2d58028dbef8e61
BUG: 1235007
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/11371
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
| -rwxr-xr-x | tests/bugs/glusterfs/bug-873962.t | 8 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 58 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 22 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 | 
5 files changed, 80 insertions, 18 deletions
diff --git a/tests/bugs/glusterfs/bug-873962.t b/tests/bugs/glusterfs/bug-873962.t index 492d0285497..7faa9998159 100755 --- a/tests/bugs/glusterfs/bug-873962.t +++ b/tests/bugs/glusterfs/bug-873962.t @@ -65,8 +65,8 @@ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $  #Files are in split-brain, so open should fail  TEST ! cat $M0/a;  TEST ! cat $M1/a; -TEST cat $M0/b; -TEST cat $M1/b; +TEST ! cat $M0/b; +TEST ! cat $M1/b;  #Reset split-brain status  TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a; @@ -92,8 +92,8 @@ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $  #Files are in split-brain, so open should fail  TEST ! cat $M0/c  TEST ! cat $M1/c -TEST cat $M0/d -TEST cat $M1/d +TEST ! cat $M0/d +TEST ! cat $M1/d  TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c  TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f644c9dc200..b42772fda09 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -336,6 +336,58 @@ out:  }  int +afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this, +                       unsigned char *readable, int *event_p, int type) +{ + +        afr_private_t *priv = this->private; +        afr_local_t *local = frame->local; +        unsigned char *data = alloca0 (priv->child_count); +        unsigned char *metadata = alloca0 (priv->child_count); +        int data_count = 0; +        int metadata_count = 0; +        int event_generation = 0; +        int ret = 0; + +        /* We don't care about split-brains for entry transactions. */ +        if (type == AFR_ENTRY_TRANSACTION || type == AFR_ENTRY_RENAME_TRANSACTION) +                return 0; + +        ret = afr_inode_read_subvol_get (inode, this, data, metadata, +                                         &event_generation); +        if (ret == -1) +                return -EIO; + +        data_count = AFR_COUNT (data, priv->child_count); +        metadata_count = AFR_COUNT (metadata, priv->child_count); + +        if (inode->ia_type == IA_IFDIR) { +                /* For directories, allow even if it is in data split-brain. */ +                if (type == AFR_METADATA_TRANSACTION) { +                        if (!metadata_count) +                                return -EIO; +                } +        } else { +                /* For files, abort in case of data/metadata split-brain. */ +                if (!data_count || !metadata_count) +                        return -EIO; +        } + +        if (type == AFR_METADATA_TRANSACTION && readable) +                memcpy (readable, metadata, priv->child_count * sizeof *metadata); +        if (type == AFR_DATA_TRANSACTION && readable) { +                if (!data_count) +                        memcpy (readable, local->child_up, +                                priv->child_count * sizeof *readable); +                else +                        memcpy (readable, data, priv->child_count * sizeof *data); +        } +        if (event_p) +                *event_p = event_generation; +        return 0; +} + +int  afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,                                    int *spb_choice)  { @@ -593,6 +645,8 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,  	for (i = 0; i < priv->child_count; i++) {  		if (data_accused[i])  			continue; +                if ((priv->arbiter_count == 1) && (i == ARBITER_BRICK_INDEX)) +                        continue;  		if (replies[i].poststat.ia_size < maxsize)  			data_accused[i] = 1;  	} @@ -1677,6 +1731,10 @@ afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,           * the slowest local subvolume is far preferable to a remote one.           */          if (is_local) { +                /* Don't set arbiter as read child. */ +                if ((priv->arbiter_count == 1) && +                    (child_index == ARBITER_BRICK_INDEX)) +                        goto out;                  gf_log (this->name, GF_LOG_INFO,                          "selecting local read_child %s",                          priv->children[child_index]->name); diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 6121108872f..6e545497108 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -52,6 +52,9 @@ afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)                  local->op_ret = ret;                              \                  local->op_errno = errnum;                          \                  read_subvol = index;                              \ +                gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,\ +                        "Failing %s on gfid %s: split-brain observed.",\ +                        gf_fop_list[local->op], uuid_utoa (inode->gfid));\                  goto label;                                       \          } while (0) @@ -59,7 +62,6 @@ int  afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)  {  	afr_local_t *local = NULL; -        afr_private_t *priv = NULL;  	int read_subvol = 0;  	int event_generation = 0;  	inode_t *inode = NULL; @@ -68,27 +70,19 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)  	local = frame->local;  	inode = local->inode; -        priv  = frame->this->private;  	if (err)                  AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, -err, -1, readfn); -	ret = afr_inode_read_subvol_type_get (inode, this, local->readable, -					      &event_generation, -					      local->transaction.type); +	ret = afr_inode_get_readable (frame, inode, this, local->readable, +			              &event_generation, +				      local->transaction.type);  	if (ret == -1 || !event_generation)  		/* Even after refresh, we don't have a good  		   read subvolume. Time to bail */                  AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); -         /* For directories in split-brain, we need to allow all fops -          * except (f)getxattr and access. */ -        if (!AFR_COUNT(local->readable, priv->child_count) && -            local->transaction.type == AFR_DATA_TRANSACTION && -            inode->ia_type == IA_IFDIR) -                memcpy (local->readable, local->child_up, priv->child_count); -  	read_subvol = afr_read_subvol_select_by_policy (inode, this,  							local->readable, NULL);  	if (read_subvol == -1) @@ -237,8 +231,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,  	if (read_subvol < 0 || read_subvol > priv->child_count) {  		gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,                         "Unreadable subvolume %d found with event generation " -                       "%d. (Possible split-brain)", -                        read_subvol, event_generation); +                       "%d for gfid %s. (Possible split-brain)", +                        read_subvol, event_generation, uuid_utoa(inode->gfid));  		goto refresh;  	} diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index a2023884465..b27cfedaddb 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1967,6 +1967,13 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)          if (ret < 0)              goto out; +        ret = afr_inode_get_readable (frame, local->inode, this, 0, 0, type); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, +                        "Failing %s on gfid %s: split-brain observed.", +                        gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); +                goto out; +        }          afr_transaction_eager_lock_init (local, this);          if (local->fd && local->transaction.eager_lock_on) diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 0bb184c78ae..c8e1a5f0008 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -761,6 +761,9 @@ typedef struct afr_read_subvol_args {                                              (op_errno == EBADFD)))  int +afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this, +                        unsigned char *readable, int *event_p, int type); +int  afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,  			   unsigned char *data_subvols,  			   unsigned char *metadata_subvols,  | 
