summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/cluster/afr/src/afr-common.c4
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c144
-rw-r--r--xlators/cluster/afr/src/afr.h6
3 files changed, 23 insertions, 131 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 5d6737f72df..2cbd0ce4c90 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -741,7 +741,7 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
uuid_copy (gfid_copy, inode->gfid);
}
- if (hashmode > 1) {
+ if (hashmode > 1 && inode->ia_type != IA_IFDIR) {
/*
* Why getpid? Because it's one of the cheapest calls
* available - faster than gethostname etc. - and returns a
@@ -2218,6 +2218,8 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto out;
}
+ fd_ctx->readdir_subvol = -1;
+
pthread_mutex_init (&fd_ctx->delay_lock, NULL);
INIT_LIST_HEAD (&fd_ctx->eager_locked);
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 41f5e60032d..af6a1787593 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -124,132 +124,6 @@ out:
}
-#define BACKEND_D_OFF_BITS 63
-#define PRESENT_D_OFF_BITS 63
-
-#define ONE 1ULL
-#define MASK (~0ULL)
-#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
-#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
-
-#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
-#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
-
-static uint64_t
-afr_bits_for (uint64_t num)
-{
- uint64_t bits = 0, ctrl = 1;
-
- while (ctrl < num) {
- ctrl *= 2;
- bits ++;
- }
-
- return bits;
-}
-
-int
-afr_itransform (xlator_t *this, int subvol, uint64_t x, uint64_t *y_p)
-{
- afr_private_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
- uint64_t hi_mask = 0;
- uint64_t off_mask = 0;
- int max_bits = 0;
-
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
- goto out;
- }
-
- conf = this->private;
- if (!conf)
- goto out;
-
- max = conf->child_count;
- cnt = subvol;
-
- if (max == 1) {
- y = x;
- goto out;
- }
-
- max_bits = afr_bits_for (max);
-
- hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
-
- if (x & hi_mask) {
- /* HUGE d_off */
- off_mask = MASK << max_bits;
- y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
- } else {
- /* small d_off */
- y = ((x * max) + cnt);
- }
-
-out:
- if (y_p)
- *y_p = y;
-
- return 0;
-}
-
-
-int
-afr_deitransform (xlator_t *this, uint64_t y, int *subvol_p,
- uint64_t *x_p)
-{
- afr_private_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- int subvol = 0;
- int max_bits = 0;
- uint64_t off_mask = 0;
- uint64_t host_mask = 0;
-
- if (!this->private)
- return -1;
-
- conf = this->private;
- max = conf->child_count;
-
- if (max == 1) {
- x = y;
- cnt = 0;
- goto out;
- }
-
- if (y & TOP_BIT) {
- /* HUGE d_off */
- max_bits = afr_bits_for (max);
- off_mask = (MASK << max_bits);
- host_mask = ~(off_mask);
-
- x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
-
- cnt = y & host_mask;
- } else {
- /* small d_off */
- cnt = y % max;
- x = y / max;
- }
-
-out:
- subvol = cnt;
-
- if (subvol_p)
- *subvol_p = subvol;
-
- if (x_p)
- *x_p = x;
-
- return 0;
-}
-
-
static void
afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
gf_dirent_t *entries, fd_t *fd)
@@ -273,7 +147,6 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
}
list_del_init (&entry->list);
- afr_itransform (THIS, subvol, entry->d_off, &entry->d_off);
list_add_tail (&entry->list, &entries->list);
if (entry->inode) {
@@ -333,9 +206,11 @@ afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
priv = this->private;
local = frame->local;
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
if (subvol == -1) {
AFR_STACK_UNWIND (readdir, frame, local->op_ret,
@@ -343,6 +218,8 @@ afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
return 0;
}
+ fd_ctx->readdir_subvol = subvol;
+
if (local->op == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
(void *) (long) subvol,
@@ -370,18 +247,27 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
afr_local_t *local = NULL;
int32_t op_errno = 0;
int subvol = -1;
+ afr_fd_ctx_t *fd_ctx = NULL;
local = AFR_FRAME_INIT (frame, op_errno);
if (!local)
goto out;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
local->op = whichop;
local->fd = fd_ref (fd);
local->cont.readdir.size = size;
local->cont.readdir.offset = offset;
local->xdata_req = (dict)? dict_ref (dict) : NULL;
- if (offset == 0) {
+ subvol = fd_ctx->readdir_subvol;
+
+ if (offset == 0 || subvol == -1) {
/* First readdir has option of failing over and selecting
an appropriate read subvolume */
afr_read_txn (frame, this, fd->inode, afr_readdir_wind,
@@ -389,8 +275,6 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
} else {
/* But continued readdirs MUST stick to the same subvolume
without an option to failover */
- afr_deitransform (this, offset, &subvol,
- (uint64_t *)&local->cont.readdir.offset);
afr_readdir_wind (frame, this, subvol);
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 7e138c54ec0..4044fd59d4e 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -301,6 +301,12 @@ typedef struct {
/* list of frames currently in progress */
struct list_head eager_locked;
+
+ /* the subvolume on which the latest sequence of readdirs (starting
+ at offset 0) has begun. Till the next readdir request with 0 offset
+ arrives, we continue to read off this subvol.
+ */
+ int readdir_subvol;
} afr_fd_ctx_t;