summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/gf-dirent.c128
-rw-r--r--libglusterfs/src/gf-dirent.h10
-rw-r--r--libglusterfs/src/glusterfs.h2
-rw-r--r--libglusterfs/src/graph.c125
-rw-r--r--libglusterfs/src/xlator.h7
-rw-r--r--tests/bugs/distribute/bug-1190734.t104
-rw-r--r--xlators/cluster/dht/src/dht-common.c17
-rw-r--r--xlators/cluster/dht/src/dht-common.h7
-rw-r--r--xlators/cluster/dht/src/dht-helper.c133
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c2
-rw-r--r--xlators/cluster/dht/src/dht-shared.c27
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c18
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c46
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h2
-rw-r--r--xlators/protocol/client/src/client-handshake.c2
-rw-r--r--xlators/protocol/client/src/client-helpers.c17
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c2
-rw-r--r--xlators/protocol/client/src/client.c11
-rw-r--r--xlators/protocol/client/src/client.h4
20 files changed, 482 insertions, 183 deletions
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
index f6fd3ab54ee..b5f395afc36 100644
--- a/libglusterfs/src/gf-dirent.c
+++ b/libglusterfs/src/gf-dirent.c
@@ -21,6 +21,134 @@
#include "compat.h"
#include "xlator.h"
+#define ONE 1ULL
+#define PRESENT_D_OFF_BITS 63
+#define BACKEND_D_OFF_BITS 63
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define MASK (~0ULL)
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+
+static uint64_t
+bits_for (uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits++;
+ }
+
+ return bits;
+}
+
+int
+gf_deitransform(xlator_t *this,
+ uint64_t offset)
+{
+ int cnt = 0;
+ int max = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ cnt = 0;
+ goto out;
+ }
+
+ if (offset & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
+
+ cnt = offset & host_mask;
+ } else {
+ /* small d_off */
+ cnt = offset % max;
+ }
+out:
+ return cnt;
+}
+
+uint64_t
+gf_dirent_orig_offset(xlator_t *this,
+ uint64_t offset)
+{
+ int max = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t orig_offset;
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ orig_offset = offset;
+ goto out;
+ }
+
+ if (offset & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = bits_for (max);
+ off_mask = (MASK << max_bits);
+ orig_offset = ((offset & ~TOP_BIT) & off_mask) << SHIFT_BITS;
+ } else {
+ /* small d_off */
+ orig_offset = offset / max;
+ }
+out:
+ return orig_offset;
+}
+
+int
+gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id)
+{
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
+ }
+
+ if (!x) {
+ y = 0;
+ goto out;
+ }
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
+
+ max_bits = bits_for (max);
+
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
+
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | client_id;
+ } else {
+ /* small d_off */
+ y = ((x * max) + client_id);
+ }
+
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
+}
+
gf_dirent_t *
gf_dirent_for_name (const char *name)
{
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h
index 4c1ff0b1684..07c605f82b0 100644
--- a/libglusterfs/src/gf-dirent.h
+++ b/libglusterfs/src/gf-dirent.h
@@ -22,6 +22,16 @@
#define gf_dirent_size(name) (sizeof (gf_dirent_t) + strlen (name) + 1)
+int
+gf_deitransform(xlator_t *this, uint64_t y);
+
+int
+gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id);
+
+uint64_t
+gf_dirent_orig_offset (xlator_t *this, uint64_t offset);
+
+
struct _dir_entry_t {
struct _dir_entry_t *next;
char *name;
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index a810f3a81f0..791e6dc5fd8 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -452,6 +452,7 @@ struct _glusterfs_graph {
int id; /* Used in logging */
int used; /* Should be set when fuse gets
first CHILD_UP */
+ uint32_t leaf_count;
uint32_t volfile_checksum;
};
typedef struct _glusterfs_graph glusterfs_graph_t;
@@ -617,6 +618,7 @@ int glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
int glusterfs_graph_destroy_residual (glusterfs_graph_t *graph);
int glusterfs_graph_deactivate (glusterfs_graph_t *graph);
int glusterfs_graph_destroy (glusterfs_graph_t *graph);
+int glusterfs_get_leaf_count (glusterfs_graph_t *graph);
int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
glusterfs_graph_t *glusterfs_graph_construct (FILE *fp);
glusterfs_graph_t *glusterfs_graph_new ();
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index b427740f10f..709ec3b3ce3 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -515,15 +515,138 @@ glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
/* XXX: --xlator-option additions */
gf_add_cmdline_options (graph, &ctx->cmd_args);
-
return 0;
}
+static
+xlator_t *glusterfs_root(glusterfs_graph_t *graph)
+{
+ return graph->first;
+}
+
+static
+int glusterfs_is_leaf(xlator_t *xl)
+{
+ int ret = 0;
+
+ if (!xl->children)
+ ret = 1;
+
+ return ret;
+}
+
+static
+uint32_t glusterfs_count_leaves(xlator_t *xl)
+{
+ int n = 0;
+ xlator_list_t *list = NULL;
+
+ if (glusterfs_is_leaf(xl))
+ n = 1;
+ else
+ for (list = xl->children; list; list = list->next)
+ n += glusterfs_count_leaves(list->xlator);
+
+ return n;
+}
+
+int glusterfs_get_leaf_count(glusterfs_graph_t *graph)
+{
+ return graph->leaf_count;
+}
+
+static
+int _glusterfs_leaf_position(xlator_t *tgt, int *id, xlator_t *xl)
+{
+ xlator_list_t *list = NULL;
+ int found = 0;
+
+ if (xl == tgt)
+ found = 1;
+ else if (glusterfs_is_leaf(xl))
+ *id += 1;
+ else
+ for (list = xl->children; !found && list; list = list->next)
+ found = _glusterfs_leaf_position(tgt, id, list->xlator);
+
+ return found;
+}
+
+int glusterfs_leaf_position(xlator_t *tgt)
+{
+ xlator_t *root = NULL;
+ int pos = 0;
+
+ root = glusterfs_root(tgt->graph);
+
+ if (!_glusterfs_leaf_position(tgt, &pos, root))
+ pos = -1;
+
+ return pos;
+}
+
+static int
+_glusterfs_reachable_leaves(xlator_t *base, xlator_t *xl, dict_t *leaves)
+{
+ xlator_list_t *list = NULL;
+ int err = 1;
+ int pos = 0;
+ char strpos[6];
+
+ if (glusterfs_is_leaf(xl)) {
+ pos = glusterfs_leaf_position(xl);
+ if (pos < 0)
+ goto out;
+ sprintf(strpos, "%d", pos);
+
+ err = dict_set_static_ptr(leaves, strpos, base);
+
+ } else {
+ for (err = 0, list = xl->children;
+ !err && list;
+ list = list->next)
+ err = _glusterfs_reachable_leaves(base, list->xlator,
+ leaves);
+ }
+
+out:
+ return err;
+}
+
+/*
+ * This function determines which leaves are children (or grandchildren)
+ * of the given base. The base may have multiple sub volumes. Each sub
+ * volumes in turn may have sub volumes.. until the leaves are reached.
+ * Each leaf is numbered 1,2,3,...etc.
+ *
+ * The base translator calls this function to see which of *its* subvolumes
+ * it would forward an FOP to, to *get to* a particular leaf.
+ * That information is built into the "leaves" dictionary.
+ * key:destination leaf# -> value:base subvolume xlator.
+ */
+
+int
+glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves)
+{
+ xlator_list_t *list = NULL;
+ int err = 0;
+
+ for (list = base->children; !err && list; list = list->next)
+ err = _glusterfs_reachable_leaves(list->xlator,
+ list->xlator, leaves);
+
+ return err;
+}
int
glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
{
int ret = 0;
+ xlator_t *root = NULL;
+
+ root = glusterfs_root(graph);
+
+ graph->leaf_count = glusterfs_count_leaves(root);
/* XXX: all xlator options validation */
ret = glusterfs_graph_validate_options (graph);
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 733f6cf47ab..5a71ceb3f31 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -978,4 +978,11 @@ glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp,
int
loc_touchup (loc_t *loc, const char *name);
+
+int
+glusterfs_leaf_position(xlator_t *tgt);
+
+int
+glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves);
+
#endif /* _XLATOR_H */
diff --git a/tests/bugs/distribute/bug-1190734.t b/tests/bugs/distribute/bug-1190734.t
new file mode 100644
index 00000000000..c2f2338dc0f
--- /dev/null
+++ b/tests/bugs/distribute/bug-1190734.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+BRICK_COUNT=3
+FILE_COUNT=100
+
+function create_files {
+ rm -rf $2
+ mkdir $2
+ for i in `seq 1 $1`; do
+ touch $2/file_$i
+ done
+}
+
+function check_file_count {
+ ORIG_FILE_COUNT=`find $2 | tail -n +2 |wc -l`
+ [ $ORIG_FILE_COUNT -eq $1 ]
+}
+
+function reset {
+ $CLI volume stop $V0
+ umount $1
+ $CLI volume delete $V0
+}
+
+function start_mount_fuse {
+ $CLI volume start $V0
+ [ $? -ne 0 ] && return 1
+
+ $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ [ $? -ne 0 ] && return 1
+
+ create_files $FILE_COUNT $M0/$1
+ [ $? -ne 0 ] && return 1
+
+ return 0
+}
+
+function start_mount_nfs {
+ $CLI volume start $V0
+ [ $? -ne 0 ] && return 1
+
+ sleep 3
+ mount_nfs $H0:/$V0 $N0
+ [ $? -ne 0 ] && return 1
+
+ create_files $FILE_COUNT $N0/$1
+ [ $? -ne 0 ] && return 1
+
+ return 0
+}
+
+function start_removing_bricks {
+ check_file_count $FILE_COUNT $1
+ [ $? -ne 0 ] && return 1
+ $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
+ [ $? -ne 0 ] && return 1
+
+ return 0
+}
+
+function finish_removing_bricks {
+
+ $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 commit
+ [ $? -ne 0 ] && return 1
+
+ check_file_count $FILE_COUNT $1
+ return $?
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+# Test 1-2 Create repliacted volume
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
+
+# ------- test 1: AFR, fuse + remove bricks
+
+TEST start_mount_fuse test1
+TEST start_removing_bricks $M0/test1
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 status > /tmp/out
+TEST finish_removing_bricks $M0/test1
+reset $M0
+
+# ------- test 2: AFR, nfs + remove bricks
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
+
+TEST start_mount_nfs test2
+TEST start_removing_bricks $N0/test2
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+TEST finish_removing_bricks $N0/test2
+reset $N0
+
+cleanup
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ef816f53532..729ab30e672 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -2840,6 +2840,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
+
hashed_subvol = dht_subvol_get_hashed (this, loc);
if (!hashed_subvol) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -2864,6 +2865,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
op_errno = ENODATA;
goto err;
}
+
STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
hashed_subvol->fops->getxattr, loc,
GF_XATTR_PATHINFO_KEY, xdata);
@@ -3854,9 +3856,7 @@ list:
}
}
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
-
+ entry->d_off = orig_entry->d_off;
entry->d_stat = orig_entry->d_stat;
entry->d_ino = orig_entry->d_ino;
entry->d_type = orig_entry->d_type;
@@ -3988,9 +3988,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
-
+ entry->d_off = orig_entry->d_off;
entry->d_ino = orig_entry->d_ino;
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
@@ -4050,7 +4048,6 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
dht_local_t *local = NULL;
int op_errno = -1;
xlator_t *xvol = NULL;
- off_t xoff = 0;
int ret = 0;
dht_conf_t *conf = NULL;
@@ -4072,7 +4069,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->xattr_req = (dict)? dict_ref (dict) : NULL;
local->first_up_subvol = dht_first_up_subvol (this);
- dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+ dht_deitransform (this, yoff, &xvol);
/* TODO: do proper readdir */
if (whichop == GF_FOP_READDIRP) {
@@ -4111,10 +4108,10 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff, local->xattr);
+ fd, size, yoff, local->xattr);
} else {
STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff, local->xattr);
+ fd, size, yoff, local->xattr);
}
return 0;
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 779b470585c..67e693146af 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -332,6 +332,7 @@ struct dht_conf {
gf_boolean_t unhashed_sticky_bit;
struct timeval last_stat_fetch;
gf_lock_t layout_lock;
+ dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
dht */
gf_boolean_t use_readdirp;
@@ -501,9 +502,7 @@ int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
int dht_frame_return (call_frame_t *frame);
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
-int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
- uint64_t *x);
+int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol);
void dht_local_wipe (xlator_t *this, dht_local_t *local);
dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
@@ -775,6 +774,8 @@ int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t offset, off_t len, dict_t *xdata);
+int
+dht_set_subvol_range(xlator_t *this);
int32_t dht_init (xlator_t *this);
void dht_fini (xlator_t *this);
int dht_reconfigure (xlator_t *this, dict_t *options);
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index bf21f39a3a7..f4e5305d791 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -62,20 +62,6 @@ dht_frame_return (call_frame_t *frame)
return this_call_cnt;
}
-
-static uint64_t
-dht_bits_for (uint64_t num)
-{
- uint64_t bits = 0, ctrl = 1;
-
- while (ctrl < num) {
- ctrl *= 2;
- bits ++;
- }
-
- return bits;
-}
-
/*
* A slightly "updated" version of the algorithm described in the commit log
* is used here.
@@ -88,66 +74,6 @@ dht_bits_for (uint64_t num)
* upwards which is described as 64, are both made "configurable."
*/
-
-#define BACKEND_D_OFF_BITS 63
-#define PRESENT_D_OFF_BITS 63
-
-#define ONE 1ULL
-#define MASK (~0ULL)
-#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
-#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
-
-#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
-#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
-
-int
-dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
-{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
- uint64_t hi_mask = 0;
- uint64_t off_mask = 0;
- int max_bits = 0;
-
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
- goto out;
- }
-
- conf = this->private;
- if (!conf)
- goto out;
-
- max = conf->subvolume_cnt;
- cnt = dht_subvol_cnt (this, subvol);
-
- if (max == 1) {
- y = x;
- goto out;
- }
-
- max_bits = dht_bits_for (max);
-
- hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
-
- if (x & hi_mask) {
- /* HUGE d_off */
- off_mask = MASK << max_bits;
- y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
- } else {
- /* small d_off */
- y = ((x * max) + cnt);
- }
-
-out:
- if (y_p)
- *y_p = y;
-
- return 0;
-}
-
int
dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
xlator_t **subvol)
@@ -205,55 +131,44 @@ out:
return ret;
}
-int
-dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
+static xlator_t *
+dht_get_subvol_from_id(xlator_t *this, int client_id)
{
+ xlator_t *xl = NULL;
dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
+ char sid[6] = { 0 };
+
+ conf = this->private;
+
+ sprintf(sid, "%d", client_id);
+ if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **) &xl))
+ xl = NULL;
+
+ return xl;
+}
+
+int
+dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p)
+{
+ int client_id = 0;
xlator_t *subvol = 0;
- int max_bits = 0;
- uint64_t off_mask = 0;
- uint64_t host_mask = 0;
+ dht_conf_t *conf = NULL;
if (!this->private)
return -1;
conf = this->private;
- max = conf->subvolume_cnt;
- if (max == 1) {
- x = y;
- cnt = 0;
- goto out;
- }
+ client_id = gf_deitransform(this, y);
- if (y & TOP_BIT) {
- /* HUGE d_off */
- max_bits = dht_bits_for (max);
- off_mask = (MASK << max_bits);
- host_mask = ~(off_mask);
+ subvol = dht_get_subvol_from_id(this, client_id);
- x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
-
- cnt = y & host_mask;
- } else {
- /* small d_off */
- cnt = y % max;
- x = y / max;
- }
-
-out:
- subvol = conf->subvolumes[cnt];
+ if (!subvol)
+ subvol = conf->subvolumes[0];
if (subvol_p)
*subvol_p = subvol;
- if (x_p)
- *x_p = x;
-
return 0;
}
@@ -829,6 +744,8 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
}
conf->subvolume_cnt = cnt;
+ dht_set_subvol_range(this);
+
cnt = 0;
for (subvols = this->children; subvols; subvols = subvols->next)
conf->subvolumes[cnt++] = subvols->xlator;
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 757ec731d26..3ea75b34ad0 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -166,7 +166,6 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
int i = 0;
int ret = 0;
-
ret = dht_hash_compute (this, layout->type, name, &hash);
if (ret != 0) {
gf_log (this->name, GF_LOG_WARNING,
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 466042c74a0..3531872dd31 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1492,12 +1492,14 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (defrag->stats == _gf_true) {
gettimeofday (&start, NULL);
}
+
if (defrag->defrag_pattern &&
(gf_defrag_pattern_match (defrag, entry->d_name,
entry->d_stat.ia_size)
== _gf_false)) {
continue;
}
+
loc_wipe (&entry_loc);
ret =dht_build_child_loc (this, &entry_loc, loc,
entry->d_name);
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 22a7260f829..860f3e716f0 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -214,6 +214,8 @@ dht_fini (xlator_t *this)
GF_FREE (conf->file_layouts);
}
+ dict_destroy(conf->leaf_to_subvol);
+
GF_FREE (conf->subvolumes);
GF_FREE (conf->subvolume_status);
@@ -288,7 +290,6 @@ out:
return ret;
}
-
int
dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
{
@@ -344,6 +345,27 @@ dht_init_regex (xlator_t *this, dict_t *odict, char *name,
}
int
+dht_set_subvol_range(xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ goto out;
+
+ conf->leaf_to_subvol = dict_new();
+ if (!conf->leaf_to_subvol)
+ goto out;
+
+ ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
+
+out:
+ return ret;
+}
+
+int
dht_reconfigure (xlator_t *this, dict_t *options)
{
dht_conf_t *conf = NULL;
@@ -676,6 +698,9 @@ dht_init (xlator_t *this)
this->private = conf;
+ if (dht_set_subvol_range(this))
+ goto err;
+
return 0;
err:
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index c705b80fe82..17e1a3d124e 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -304,8 +304,6 @@ void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries)
list_for_each_entry(entry, &entries->list, list)
{
- entry->d_off = ec_itransform(ec, idx, entry->d_off);
-
if (entry->d_stat.ia_type == IA_IFREG)
{
if ((entry->dict == NULL) ||
@@ -413,10 +411,20 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
if (fop->offset != 0)
{
- int32_t idx;
+ int32_t idx = -1;
+ ec_t *ec = fop->xl->private;
+
+ idx = gf_deitransform(fop->xl, fop->offset);
+
+ if ((idx < 0) || (idx >= ec->nodes)) {
- fop->offset = ec_deitransform(fop->xl->private, &idx,
- fop->offset);
+ gf_log(fop->xl->name, GF_LOG_ERROR,
+ "Invalid index %d in readdirp request", idx);
+
+ fop->error = EIO;
+
+ return EC_STATE_REPORT;
+ }
fop->mask &= 1ULL << idx;
}
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index 783e3d475ce..139957b55c6 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -16,17 +16,6 @@
#include "ec-fops.h"
#include "ec-helpers.h"
-#define BACKEND_D_OFF_BITS 63
-#define PRESENT_D_OFF_BITS 63
-
-#define ONE 1ULL
-#define MASK (~0ULL)
-#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
-#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
-
-#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
-#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
-
#ifndef ffsll
#define ffsll(x) __builtin_ffsll(x)
#endif
@@ -106,41 +95,6 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
}
}
-uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset)
-{
- int32_t bits;
-
- if (offset == -1ULL)
- {
- return -1ULL;
- }
-
- bits = ec->bits_for_nodes;
- if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0)
- {
- return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx;
- }
-
- return (offset * ec->nodes) + idx;
-}
-
-uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset)
-{
- uint64_t mask = 0;
-
- if ((offset & TOP_BIT) != 0)
- {
- mask = MASK << ec->bits_for_nodes;
-
- *idx = offset & ~mask;
- return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS;
- }
-
- *idx = offset % ec->nodes;
-
- return offset / ec->nodes;
-}
-
int32_t ec_bits_count(uint64_t n)
{
n -= (n >> 1) & 0x5555555555555555ULL;
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index 5f5d9382532..11d2707b3c0 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -16,8 +16,6 @@
const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits);
const char * ec_fop_name(int32_t id);
void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...);
-uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset);
-uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset);
int32_t ec_bits_count(uint64_t n);
int32_t ec_bits_index(uint64_t n);
int32_t ec_bits_consume(uint64_t * n);
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 531b38eaf83..90192ad2ac8 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -1194,6 +1194,8 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
}
*/
+ conf->client_id = glusterfs_leaf_position(this);
+
gf_log (this->name, GF_LOG_INFO,
"Connected to %s, attached to remote volume '%s'.",
conf->rpc->conn.name,
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 5d9f00fdc70..be5e7b57739 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -141,12 +141,16 @@ client_local_wipe (clnt_local_t *local)
}
int
-unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
+unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp,
+ gf_dirent_t *entries)
{
struct gfs3_dirlist *trav = NULL;
gf_dirent_t *entry = NULL;
int entry_len = 0;
int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
trav = rsp->reply;
while (trav) {
@@ -156,7 +160,8 @@ unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
goto out;
entry->d_ino = trav->d_ino;
- entry->d_off = trav->d_off;
+ gf_itransform (this, trav->d_off, &entry->d_off,
+ conf->client_id);
entry->d_len = trav->d_len;
entry->d_type = trav->d_type;
@@ -182,12 +187,17 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
inode_table_t *itable = NULL;
int entry_len = 0;
int ret = -1;
+ clnt_conf_t *conf = NULL;
trav = rsp->reply;
if (fd)
itable = fd->inode->table;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
while (trav) {
entry_len = gf_dirent_size (trav->name);
entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t);
@@ -195,7 +205,8 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
goto out;
entry->d_ino = trav->d_ino;
- entry->d_off = trav->d_off;
+ gf_itransform (this, trav->d_off, &entry->d_off,
+ conf->client_id);
entry->d_len = trav->d_len;
entry->d_type = trav->d_type;
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
index 7ca91e9880c..10abe845c0c 100644
--- a/xlators/protocol/client/src/client-rpc-fops.c
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -2450,7 +2450,7 @@ client3_3_readdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
INIT_LIST_HEAD (&entries.list);
if (rsp.op_ret > 0) {
- unserialize_rsp_dirent (&rsp, &entries);
+ unserialize_rsp_dirent (this, &rsp, &entries);
}
GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata,
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index d9e7ccd0c4f..7fca01c5b6a 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -24,6 +24,7 @@
#include "xdr-rpc.h"
#include "glusterfs3.h"
+#include "gf-dirent.h"
extern rpc_clnt_prog_t clnt_handshake_prog;
extern rpc_clnt_prog_t clnt_dump_prog;
@@ -1913,6 +1914,9 @@ client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!conf || !conf->fops)
goto out;
+ if (off != 0)
+ off = gf_dirent_orig_offset(this, off);
+
args.fd = fd;
args.size = size;
args.offset = off;
@@ -1948,6 +1952,9 @@ client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!conf || !conf->fops)
goto out;
+ if (off != 0)
+ off = gf_dirent_orig_offset(this, off);
+
args.fd = fd;
args.size = size;
args.offset = off;
@@ -2447,7 +2454,7 @@ build_client_config (xlator_t *this, clnt_conf_t *conf)
{
int ret = -1;
- if (!conf)
+ if (!conf)
goto out;
GF_OPTION_INIT ("frame-timeout", conf->rpc_conf.rpc_timeout,
@@ -2470,6 +2477,8 @@ build_client_config (xlator_t *this, clnt_conf_t *conf)
GF_OPTION_INIT ("send-gids", conf->send_gids, bool, out);
+ conf->client_id = glusterfs_leaf_position(this);
+
ret = client_check_remote_host (this, this->options);
if (ret)
goto out;
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index 7157e120dda..40200b6afc4 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -85,6 +85,7 @@ typedef struct clnt_conf {
rpc_clnt_prog_t *handshake;
rpc_clnt_prog_t *dump;
+ int client_id;
uint64_t reopen_fd_count; /* Count of fds reopened after a
connection is established */
gf_lock_t rec_lock;
@@ -228,7 +229,8 @@ int client_submit_request (xlator_t *this, void *req,
struct iovec *rsp_payload, int rsp_count,
struct iobref *rsp_iobref, xdrproc_t xdrproc);
-int unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries);
+int unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp,
+ gf_dirent_t *entries);
int unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries);