diff options
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 144 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 6 | 
3 files changed, 23 insertions, 131 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index d8da5edccc5..69e13078652 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -739,7 +739,7 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)                 uuid_copy (gfid_copy, inode->gfid);          } -        if (hashmode > 1) { +        if (hashmode > 1 && inode->ia_type != IA_IFDIR) {                  /*                   * Why getpid?  Because it's one of the cheapest calls                   * available - faster than gethostname etc. - and returns a @@ -2264,6 +2264,8 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)                  goto out;          } +	fd_ctx->readdir_subvol = -1; +  	pthread_mutex_init (&fd_ctx->delay_lock, NULL);          INIT_LIST_HEAD (&fd_ctx->eager_locked); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 3b3d3093c5d..28bf89f2842 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -157,132 +157,6 @@ afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)  } -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) - -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) - -static uint64_t -afr_bits_for (uint64_t num) -{ -	uint64_t bits = 0, ctrl = 1; - -	while (ctrl < num) { -		ctrl *= 2; -		bits ++; -	} - -	return bits; -} - -int -afr_itransform (xlator_t *this, int subvol, uint64_t x, uint64_t *y_p) -{ -        afr_private_t *conf = NULL; -        int         cnt = 0; -        int         max = 0; -        uint64_t    y = 0; -        uint64_t    hi_mask = 0; -        uint64_t    off_mask = 0; -        int         max_bits = 0; - -        if (x == ((uint64_t) -1)) { -                y = (uint64_t) -1; -                goto out; -        } - -        conf = this->private; -        if (!conf) -                goto out; - -        max = conf->child_count; -        cnt = subvol; - -	if (max == 1) { -		y = x; -		goto out; -	} - -        max_bits = afr_bits_for (max); - -        hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); - -        if (x & hi_mask) { -                /* HUGE d_off */ -                off_mask = MASK << max_bits; -                y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt; -        } else { -                /* small d_off */ -                y = ((x * max) + cnt); -        } - -out: -        if (y_p) -                *y_p = y; - -        return 0; -} - - -int -afr_deitransform (xlator_t *this, uint64_t y, int *subvol_p, -                  uint64_t *x_p) -{ -        afr_private_t *conf = NULL; -        int         cnt = 0; -        int         max = 0; -        uint64_t    x = 0; -        int         subvol = 0; -        int         max_bits = 0; -        uint64_t    off_mask = 0; -        uint64_t    host_mask = 0; - -        if (!this->private) -                return -1; - -        conf = this->private; -        max = conf->child_count; - -	if (max == 1) { -		x = y; -		cnt = 0; -		goto out; -	} - -        if (y & TOP_BIT) { -                /* HUGE d_off */ -                max_bits = afr_bits_for (max); -                off_mask = (MASK << max_bits); -                host_mask = ~(off_mask); - -                x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS; - -                cnt = y & host_mask; -	} else { -                /* small d_off */ -                cnt = y % max; -                x = y / max; -        } - -out: -        subvol = cnt; - -        if (subvol_p) -                *subvol_p = subvol; - -        if (x_p) -                *x_p = x; - -        return 0; -} - -  static void  afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,  			       gf_dirent_t *entries, fd_t *fd) @@ -301,7 +175,6 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,                  }  		list_del_init (&entry->list); -		afr_itransform (THIS, subvol, entry->d_off, &entry->d_off);  		list_add_tail (&entry->list, &entries->list);  		if (entry->inode) { @@ -356,9 +229,11 @@ afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)  {  	afr_local_t *local = NULL;  	afr_private_t *priv = NULL; +	afr_fd_ctx_t *fd_ctx = NULL;  	priv = this->private;  	local = frame->local; +	fd_ctx = afr_fd_ctx_get (local->fd, this);  	if (subvol == -1) {  		AFR_STACK_UNWIND (readdir, frame, local->op_ret, @@ -366,6 +241,8 @@ afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)  		return 0;  	} +	fd_ctx->readdir_subvol = subvol; +          if (local->op == GF_FOP_READDIR)                  STACK_WIND_COOKIE (frame, afr_readdir_cbk,                                     (void *) (long) subvol, @@ -393,18 +270,27 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,          afr_local_t   *local     = NULL;          int32_t       op_errno   = 0;  	int           subvol = -1; +	afr_fd_ctx_t *fd_ctx = NULL;  	local = AFR_FRAME_INIT (frame, op_errno);  	if (!local)  		goto out; +	fd_ctx = afr_fd_ctx_get (fd, this); +	if (!fd_ctx) { +	        op_errno = EINVAL; +		goto out; +        } +  	local->op = whichop;          local->fd = fd_ref (fd);          local->cont.readdir.size = size;  	local->cont.readdir.offset = offset;          local->xdata_req = (dict)? dict_ref (dict) : NULL; -	if (offset == 0) { +	subvol = fd_ctx->readdir_subvol; + +	if (offset == 0 || subvol == -1) {  		/* First readdir has option of failing over and selecting  		   an appropriate read subvolume */  		afr_read_txn (frame, this, fd->inode, afr_readdir_wind, @@ -412,8 +298,6 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,  	} else {  		/* But continued readdirs MUST stick to the same subvolume  		   without an option to failover */ -		afr_deitransform (this, offset, &subvol, -				  (uint64_t *)&local->cont.readdir.offset);  		afr_readdir_wind (frame, this, subvol);  	} diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 7e138c54ec0..4044fd59d4e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -301,6 +301,12 @@ typedef struct {  	/* list of frames currently in progress */  	struct list_head  eager_locked; + +	/* the subvolume on which the latest sequence of readdirs (starting +	   at offset 0) has begun. Till the next readdir request with 0 offset +	   arrives, we continue to read off this subvol. +	*/ +	int readdir_subvol;  } afr_fd_ctx_t;  | 
