diff options
| author | Krutika Dhananjay <kdhananj@redhat.com> | 2015-01-22 13:53:47 +0530 | 
|---|---|---|
| committer | Raghavendra Bhat <raghavendra@redhat.com> | 2015-02-11 01:30:02 -0800 | 
| commit | c57c455347a72ebf0085add49ff59aae26c7a70d (patch) | |
| tree | 0cda97240cf5595c08635681c9cb4a265cdd05df | |
| parent | 1c14d8268b36e401ad7ac74ba3f082100fbe2bcc (diff) | |
cluster/afr: When parent and entry read subvols are different, set entry->inode to NULL
        Backport of: http://review.gluster.org/#/c/9477
That way a lookup would be forced on the entry, and its attributes will
always be selected from its read subvol.
Change-Id: Iaba25e2cd5f83e983fc8b1a1f48da3850808e6b8
BUG: 1186119
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: http://review.gluster.org/9562
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
| -rw-r--r-- | tests/basic/afr/resolve.t | 8 | ||||
| -rw-r--r-- | tests/volume.rc | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 56 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 61 | 
4 files changed, 106 insertions, 26 deletions
diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t index 7dd432996f6..4adef6726eb 100644 --- a/tests/basic/afr/resolve.t +++ b/tests/basic/afr/resolve.t @@ -24,7 +24,7 @@ TEST kill_brick $V0 $H0 $B0/${V0}0  rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a  TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0  #Test that the lookup returns ENOENT instead of ESTALE  #If lookup returns ESTALE this command will fail with ESTALE  TEST touch f @@ -44,6 +44,8 @@ echo jkl > $M1/b  #gfid-mismatch happened. This should result in EIO  TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1  TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST ! cat $M0/b +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +# The kernel knows nothing about the tricks done to the volume, and the file +# may still be in page cache. Wait for timeout. +EXPECT_WITHIN 10 "^$" cat $M0/b  cleanup diff --git a/tests/volume.rc b/tests/volume.rc index 71d0947c5a6..f64639d6cef 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -103,6 +103,13 @@ function _afr_child_up_status {          echo "$up"  } +function afr_child_up_status_meta { +        local mnt=$1 +        local repl=$2 +        local child=$3 +        grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}' +} +  function afr_child_up_status {          local vol=$1          #brick_id is (brick-num in volume info - 1) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index aefad8be959..3952681e6a1 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1213,6 +1213,50 @@ afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2)          return _gf_true;  } +static int +afr_get_parent_read_subvol (xlator_t *this, inode_t *parent, +                            struct afr_reply *replies, unsigned char *readable) +{ +        int             i                    = 0; +        int             par_read_subvol      = -1; +        int             par_read_subvol_iter = -1; +        afr_private_t  *priv                 = NULL; + +        priv = this->private; + +        if (parent) +                par_read_subvol = afr_data_subvol_get (parent, this, 0, 0); + +        for (i = 0; i < priv->child_count; i++) { +                if (!replies[i].valid) +                        continue; + +                if (replies[i].op_ret < 0) +                        continue; + +                if (par_read_subvol_iter == -1) { +                        par_read_subvol_iter = i; +                        continue; +                } + +                if ((par_read_subvol_iter != par_read_subvol) && readable[i]) +                        par_read_subvol_iter = i; + +                if (i == par_read_subvol) +                        par_read_subvol_iter = i; +        } +        /* At the end of the for-loop, the only reason why @par_read_subvol_iter +         * could be -1 is when this LOOKUP has failed on all sub-volumes. +         * So it is okay to send an arbitrary subvolume (0 in this case) +         * as parent read subvol. +         */ +        if (par_read_subvol_iter == -1) +                par_read_subvol_iter = 0; + +        return par_read_subvol_iter; + +} +  static void  afr_lookup_done (call_frame_t *frame, xlator_t *this)  { @@ -1221,23 +1265,25 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)  	int                 i = -1;  	int                 op_errno = 0;  	int                 read_subvol = 0; +        int                 par_read_subvol = 0;  	unsigned char      *readable = NULL;  	int                 event = 0;  	struct afr_reply   *replies = NULL;  	uuid_t              read_gfid = {0, };  	gf_boolean_t        locked_entry = _gf_false;  	gf_boolean_t        can_interpret = _gf_true; +        inode_t            *parent = NULL;          priv  = this->private;          local = frame->local;  	replies = local->replies; +        parent = local->loc.parent;  	locked_entry = afr_is_entry_possibly_under_txn (local, this);  	readable = alloca0 (priv->child_count); -	afr_inode_read_subvol_get (local->loc.parent, this, readable, -				   NULL, &event); +	afr_inode_read_subvol_get (parent, this, readable, NULL, &event);  	/* First, check if we have a gfid-change from somewhere,  	   If so, propagate that so that a fresh lookup can be @@ -1263,7 +1309,6 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)  			   "underway" in creation */  			local->op_ret = -1;  			local->op_errno = ENOENT; -			read_subvol = i;  			goto unwind;  		} @@ -1341,10 +1386,13 @@ unwind:  	if (read_subvol == -1)  		read_subvol = 0; +        par_read_subvol = afr_get_parent_read_subvol (this, parent, replies, +                                                      readable); +  	AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,  			  local->inode, &local->replies[read_subvol].poststat,  			  local->replies[read_subvol].xdata, -			  &local->replies[read_subvol].postparent); +			  &local->replies[par_read_subvol].postparent);  }  /* diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 41f5e60032d..3b3d3093c5d 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -123,6 +123,39 @@ out:          return 0;  } +static int +afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol) +{ +	int             gen               = 0; +        int             entry_read_subvol = 0; +	unsigned char  *data_readable     = NULL; +	unsigned char  *metadata_readable = NULL; +        afr_private_t  *priv              = NULL; + +        priv = this->private; +	data_readable = alloca0 (priv->child_count); +	metadata_readable = alloca0 (priv->child_count); + +	afr_inode_read_subvol_get (inode, this, data_readable, +				   metadata_readable, &gen); + +        if (gen != priv->event_generation || +                !data_readable[par_read_subvol] || +                !metadata_readable[par_read_subvol]) +                return -1; + +        /* Once the control reaches the following statement, it means that the +         * parent's read subvol is perfectly readable. So calling +         * either afr_data_subvol_get() or afr_metadata_subvol_get() would +         * yield the same result. Hence, choosing afr_data_subvol_get() below. +         */ +        entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0); +        if (entry_read_subvol != par_read_subvol) +                return -1; + +        return 0; + +}  #define BACKEND_D_OFF_BITS 63  #define PRESENT_D_OFF_BITS 63 @@ -254,17 +287,12 @@ static void  afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,  			       gf_dirent_t *entries, fd_t *fd)  { -	afr_private_t *priv = NULL; -        gf_dirent_t *entry = NULL; -        gf_dirent_t *tmp = NULL; -	unsigned char *data_readable = NULL; -	unsigned char *metadata_readable = NULL; -	int gen = 0; +        int            ret   = -1; +        gf_dirent_t   *entry = NULL; +        gf_dirent_t   *tmp   = NULL; +        xlator_t      *this  = NULL; -	priv = THIS->private; - -	data_readable = alloca0 (priv->child_count); -	metadata_readable = alloca0 (priv->child_count); +        this = THIS;          list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {                  if (__is_root_gfid (fd->inode->gfid) && @@ -277,17 +305,12 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,  		list_add_tail (&entry->list, &entries->list);  		if (entry->inode) { -			gen = 0; -			afr_inode_read_subvol_get (entry->inode, THIS, -						   data_readable, -						   metadata_readable, &gen); - -			if (gen != priv->event_generation || -				!data_readable[subvol] || -				!metadata_readable[subvol]) { - +                        ret = afr_validate_read_subvol (entry->inode, this, +                                                        subvol); +                        if (ret == -1) {  				inode_unref (entry->inode);  				entry->inode = NULL; +                                continue;  			}  		}          }  | 
