diff options
| -rw-r--r-- | libglusterfs/src/gf-dirent.c | 128 | ||||
| -rw-r--r-- | libglusterfs/src/gf-dirent.h | 10 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs.h | 2 | ||||
| -rw-r--r-- | libglusterfs/src/graph.c | 125 | ||||
| -rw-r--r-- | libglusterfs/src/xlator.h | 7 | ||||
| -rw-r--r-- | tests/bugs/distribute/bug-1190734.t | 104 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 17 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 7 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 133 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 27 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-dir-read.c | 18 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 46 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-helpers.h | 2 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 2 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-helpers.c | 17 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-rpc-fops.c | 2 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client.c | 11 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client.h | 4 | 
20 files changed, 482 insertions, 183 deletions
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c index f6fd3ab54ee..b5f395afc36 100644 --- a/libglusterfs/src/gf-dirent.c +++ b/libglusterfs/src/gf-dirent.c @@ -21,6 +21,134 @@  #include "compat.h"  #include "xlator.h" +#define ONE 1ULL +#define PRESENT_D_OFF_BITS 63 +#define BACKEND_D_OFF_BITS 63 +#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) +#define MASK (~0ULL) +#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) +#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) + +static uint64_t +bits_for (uint64_t num) +{ +	uint64_t bits = 0, ctrl = 1; + +	while (ctrl < num) { +		ctrl *= 2; +		bits++; +	} + +	return bits; +} + +int +gf_deitransform(xlator_t *this, +                uint64_t offset) +{ +        int         cnt = 0; +        int         max = 0; +        int         max_bits = 0; +        uint64_t    off_mask = 0; +        uint64_t    host_mask = 0; + +        max = glusterfs_get_leaf_count(this->graph); + +	if (max == 1) { +		cnt = 0; +		goto out; +	} + +        if (offset & TOP_BIT) { +                /* HUGE d_off */ +                max_bits = bits_for (max); +                off_mask = (MASK << max_bits); +                host_mask = ~(off_mask); + +                cnt = offset & host_mask; +	} else { +                /* small d_off */ +                cnt = offset % max; +        } +out: +        return cnt; +} + +uint64_t +gf_dirent_orig_offset(xlator_t *this, +                      uint64_t offset) +{ +        int         max = 0; +        int         max_bits = 0; +        uint64_t    off_mask = 0; +        uint64_t    orig_offset; + +        max = glusterfs_get_leaf_count(this->graph); + +	if (max == 1) { +                orig_offset = offset; +		goto out; +	} + +        if (offset & TOP_BIT) { +                /* HUGE d_off */ +                max_bits = bits_for (max); +                off_mask = (MASK << max_bits); +                orig_offset = ((offset & ~TOP_BIT) & off_mask) << SHIFT_BITS; +	} else { +                /* small d_off */ +                orig_offset = offset / max; +        } +out: +        return orig_offset; +} + +int +gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id) +{ +        int         max = 0; +        uint64_t    y = 0; +        uint64_t    hi_mask = 0; +        uint64_t    off_mask = 0; +        int         max_bits = 0; + +        if (x == ((uint64_t) -1)) { +                y = (uint64_t) -1; +                goto out; +        } + +        if (!x) { +                y = 0; +                goto out; +        } + +        max = glusterfs_get_leaf_count(this->graph); + +	if (max == 1) { +		y = x; +		goto out; +	} + +        max_bits = bits_for (max); + +        hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); + +        if (x & hi_mask) { +                /* HUGE d_off */ +                off_mask = MASK << max_bits; +                y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | client_id; +        } else { +                /* small d_off */ +                y = ((x * max) + client_id); +        } + +out: +        if (y_p) +                *y_p = y; + +        return 0; +} +  gf_dirent_t *  gf_dirent_for_name (const char *name)  { diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h index 4c1ff0b1684..07c605f82b0 100644 --- a/libglusterfs/src/gf-dirent.h +++ b/libglusterfs/src/gf-dirent.h @@ -22,6 +22,16 @@  #define gf_dirent_size(name) (sizeof (gf_dirent_t) + strlen (name) + 1) +int +gf_deitransform(xlator_t *this, uint64_t y); + +int +gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id); + +uint64_t +gf_dirent_orig_offset (xlator_t *this, uint64_t offset); + +  struct _dir_entry_t {          struct _dir_entry_t *next;  	char                *name; diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index a810f3a81f0..791e6dc5fd8 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -452,6 +452,7 @@ struct _glusterfs_graph {          int                       id;    /* Used in logging */          int                       used;  /* Should be set when fuse gets                                              first CHILD_UP */ +        uint32_t                  leaf_count;          uint32_t                  volfile_checksum;  };  typedef struct _glusterfs_graph glusterfs_graph_t; @@ -617,6 +618,7 @@ int glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);  int glusterfs_graph_destroy_residual (glusterfs_graph_t *graph);  int glusterfs_graph_deactivate (glusterfs_graph_t *graph);  int glusterfs_graph_destroy (glusterfs_graph_t *graph); +int glusterfs_get_leaf_count (glusterfs_graph_t *graph);  int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);  glusterfs_graph_t *glusterfs_graph_construct (FILE *fp);  glusterfs_graph_t *glusterfs_graph_new (); diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index b427740f10f..709ec3b3ce3 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -515,15 +515,138 @@ glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)          /* XXX: --xlator-option additions */          gf_add_cmdline_options (graph, &ctx->cmd_args); -          return 0;  } +static +xlator_t *glusterfs_root(glusterfs_graph_t *graph) +{ +        return graph->first; +} + +static +int glusterfs_is_leaf(xlator_t *xl) +{ +        int ret = 0; + +        if (!xl->children) +                ret = 1; + +        return ret; +} + +static +uint32_t glusterfs_count_leaves(xlator_t *xl) +{ +        int n = 0; +        xlator_list_t *list = NULL; + +        if (glusterfs_is_leaf(xl)) +                n = 1; +        else +                for (list = xl->children; list; list = list->next) +                        n += glusterfs_count_leaves(list->xlator); + +        return n; +} + +int glusterfs_get_leaf_count(glusterfs_graph_t *graph) +{ +        return graph->leaf_count; +} + +static +int _glusterfs_leaf_position(xlator_t *tgt, int *id, xlator_t *xl) +{ +        xlator_list_t *list = NULL; +        int found = 0; + +        if (xl == tgt) +                found = 1; +        else if (glusterfs_is_leaf(xl)) +                *id += 1; +        else +                for (list = xl->children; !found && list; list = list->next) +                        found = _glusterfs_leaf_position(tgt, id, list->xlator); + +        return found; +} + +int glusterfs_leaf_position(xlator_t *tgt) +{ +        xlator_t *root = NULL; +        int pos = 0; + +        root = glusterfs_root(tgt->graph); + +        if (!_glusterfs_leaf_position(tgt, &pos, root)) +                pos = -1; + +        return pos; +} + +static int +_glusterfs_reachable_leaves(xlator_t *base, xlator_t *xl, dict_t *leaves) +{ +        xlator_list_t *list = NULL; +        int err = 1; +        int pos = 0; +        char strpos[6]; + +        if (glusterfs_is_leaf(xl)) { +                pos = glusterfs_leaf_position(xl); +                if (pos < 0) +                        goto out; +                sprintf(strpos, "%d", pos); + +                err = dict_set_static_ptr(leaves, strpos, base); + +        } else { +                for (err = 0, list = xl->children; +                     !err && list; +                     list = list->next) +                        err = _glusterfs_reachable_leaves(base, list->xlator, +                                                          leaves); +        } + +out: +        return err; +} + +/* + * This function determines which leaves are children (or grandchildren) + * of the given base. The base may have multiple sub volumes. Each sub + * volumes in turn may have sub volumes.. until the leaves are reached. + * Each leaf is numbered 1,2,3,...etc. + * + * The base translator calls this function to see which of *its* subvolumes + * it would forward an FOP to, to *get to* a particular leaf. + * That information is built into the "leaves" dictionary. + * key:destination leaf# -> value:base subvolume xlator. + */ + +int +glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves) +{ +        xlator_list_t *list = NULL; +        int err = 0; + +        for (list = base->children; !err && list; list = list->next) +                err = _glusterfs_reachable_leaves(list->xlator, +                                                  list->xlator, leaves); + +        return err; +}  int  glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)  {          int ret = 0; +        xlator_t *root = NULL; + +        root = glusterfs_root(graph); + +        graph->leaf_count = glusterfs_count_leaves(root);          /* XXX: all xlator options validation */          ret = glusterfs_graph_validate_options (graph); diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 733f6cf47ab..5a71ceb3f31 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -978,4 +978,11 @@ glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp,  int  loc_touchup (loc_t *loc, const char *name); + +int +glusterfs_leaf_position(xlator_t *tgt); + +int +glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves); +  #endif /* _XLATOR_H */ diff --git a/tests/bugs/distribute/bug-1190734.t b/tests/bugs/distribute/bug-1190734.t new file mode 100644 index 00000000000..c2f2338dc0f --- /dev/null +++ b/tests/bugs/distribute/bug-1190734.t @@ -0,0 +1,104 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../nfs.rc + +BRICK_COUNT=3 +FILE_COUNT=100 + +function create_files { +    rm -rf $2 +    mkdir $2 +    for i in `seq 1 $1`; do +        touch $2/file_$i +    done +} + +function check_file_count { +    ORIG_FILE_COUNT=`find $2 | tail -n +2 |wc -l` +    [ $ORIG_FILE_COUNT -eq $1 ] +} + +function reset { +    $CLI volume stop $V0 +    umount $1 +    $CLI volume delete $V0 +} + +function start_mount_fuse { +    $CLI volume start $V0 +    [ $? -ne 0 ] && return 1 + +    $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +    [ $? -ne 0 ] && return 1 + +    create_files $FILE_COUNT $M0/$1 +    [ $? -ne 0 ] && return 1 + +    return 0 +} + +function start_mount_nfs { +    $CLI volume start $V0 +    [ $? -ne 0 ] && return 1 + +    sleep 3 +    mount_nfs $H0:/$V0 $N0 +    [ $? -ne 0 ] && return 1 + +    create_files $FILE_COUNT $N0/$1 +    [ $? -ne 0 ] && return 1 + +    return 0 +} + +function start_removing_bricks { +    check_file_count $FILE_COUNT $1 +    [ $? -ne 0 ] && return 1 +    $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2  $H0:$B0/${V0}3 start +    [ $? -ne 0 ] && return 1 + +    return 0 +} + +function finish_removing_bricks { + +    $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2  $H0:$B0/${V0}3 commit +    [ $? -ne 0 ] && return 1 + +    check_file_count $FILE_COUNT $1 +    return $? +} + +cleanup + +TEST glusterd +TEST pidof glusterd + +# Test 1-2 Create repliacted volume + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \ +    $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5 + +# ------- test 1: AFR, fuse + remove bricks + +TEST start_mount_fuse test1 +TEST start_removing_bricks $M0/test1 +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2  $H0:$B0/${V0}3" +$CLI  volume remove-brick $V0 replica 2  $H0:$B0/${V0}2  $H0:$B0/${V0}3 status > /tmp/out +TEST finish_removing_bricks $M0/test1 +reset $M0 + +# ------- test 2: AFR, nfs + remove bricks + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \ +    $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5 + +TEST start_mount_nfs test2 +TEST start_removing_bricks $N0/test2 +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2  $H0:$B0/${V0}3" +TEST finish_removing_bricks $N0/test2 +reset $N0 + +cleanup diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index ef816f53532..729ab30e672 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2840,6 +2840,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,          }          if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) { +                  hashed_subvol = dht_subvol_get_hashed (this, loc);                  if (!hashed_subvol) {                          gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2864,6 +2865,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,                          op_errno = ENODATA;                          goto err;                  } +                  STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,                              hashed_subvol->fops->getxattr, loc,                              GF_XATTR_PATHINFO_KEY, xdata); @@ -3854,9 +3856,7 @@ list:                          }                  } -                dht_itransform (this, prev->this, orig_entry->d_off, -                                &entry->d_off); - +                entry->d_off  = orig_entry->d_off;                  entry->d_stat = orig_entry->d_stat;                  entry->d_ino  = orig_entry->d_ino;                  entry->d_type = orig_entry->d_type; @@ -3988,9 +3988,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  goto unwind;                          } -                        dht_itransform (this, prev->this, orig_entry->d_off, -                                        &entry->d_off); - +                        entry->d_off  = orig_entry->d_off;                          entry->d_ino  = orig_entry->d_ino;                          entry->d_type = orig_entry->d_type;                          entry->d_len  = orig_entry->d_len; @@ -4050,7 +4048,6 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,          dht_local_t  *local  = NULL;          int           op_errno = -1;          xlator_t     *xvol = NULL; -        off_t         xoff = 0;          int           ret = 0;          dht_conf_t   *conf = NULL; @@ -4072,7 +4069,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,          local->xattr_req = (dict)? dict_ref (dict) : NULL;          local->first_up_subvol = dht_first_up_subvol (this); -        dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); +        dht_deitransform (this, yoff, &xvol);          /* TODO: do proper readdir */          if (whichop == GF_FOP_READDIRP) { @@ -4111,10 +4108,10 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,                  }                  STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, -                            fd, size, xoff, local->xattr); +                            fd, size, yoff, local->xattr);          } else {                  STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir, -                            fd, size, xoff, local->xattr); +                            fd, size, yoff, local->xattr);          }          return 0; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 779b470585c..67e693146af 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -332,6 +332,7 @@ struct dht_conf {          gf_boolean_t   unhashed_sticky_bit;          struct timeval last_stat_fetch;          gf_lock_t      layout_lock; +        dict_t        *leaf_to_subvol;          void          *private;     /* Can be used by wrapper xlators over                                         dht */          gf_boolean_t   use_readdirp; @@ -501,9 +502,7 @@ int dht_disk_layout_merge (xlator_t   *this, dht_layout_t *layout,  int dht_frame_return (call_frame_t *frame); -int                             dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); -int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol, -                      uint64_t *x); +int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol);  void dht_local_wipe (xlator_t *this, dht_local_t *local);  dht_local_t *dht_local_init (call_frame_t    *frame, loc_t *loc, fd_t *fd, @@ -775,6 +774,8 @@ int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,  int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,                      off_t offset, off_t len, dict_t *xdata); +int +dht_set_subvol_range(xlator_t *this);  int32_t dht_init (xlator_t *this);  void    dht_fini (xlator_t *this);  int     dht_reconfigure (xlator_t *this, dict_t *options); diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index bf21f39a3a7..f4e5305d791 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -62,20 +62,6 @@ dht_frame_return (call_frame_t *frame)          return this_call_cnt;  } - -static uint64_t -dht_bits_for (uint64_t num) -{ -	uint64_t bits = 0, ctrl = 1; - -	while (ctrl < num) { -		ctrl *= 2; -		bits ++; -	} - -	return bits; -} -  /*   * A slightly "updated" version of the algorithm described in the commit log   * is used here. @@ -88,66 +74,6 @@ dht_bits_for (uint64_t num)   *   upwards which is described as 64, are both made "configurable."   */ - -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) - -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) - -int -dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) -{ -        dht_conf_t *conf = NULL; -        int         cnt = 0; -        int         max = 0; -        uint64_t    y = 0; -        uint64_t    hi_mask = 0; -        uint64_t    off_mask = 0; -        int         max_bits = 0; - -        if (x == ((uint64_t) -1)) { -                y = (uint64_t) -1; -                goto out; -        } - -        conf = this->private; -        if (!conf) -                goto out; - -        max = conf->subvolume_cnt; -        cnt = dht_subvol_cnt (this, subvol); - -	if (max == 1) { -		y = x; -		goto out; -	} - -        max_bits = dht_bits_for (max); - -        hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); - -        if (x & hi_mask) { -                /* HUGE d_off */ -                off_mask = MASK << max_bits; -                y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt; -        } else { -                /* small d_off */ -                y = ((x * max) + cnt); -        } - -out: -        if (y_p) -                *y_p = y; - -        return 0; -} -  int  dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,                             xlator_t **subvol) @@ -205,55 +131,44 @@ out:          return ret;  } -int -dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, -                  uint64_t *x_p) +static xlator_t * +dht_get_subvol_from_id(xlator_t *this, int client_id)  { +        xlator_t *xl = NULL;          dht_conf_t *conf = NULL; -        int         cnt = 0; -        int         max = 0; -        uint64_t    x = 0; +        char sid[6] = { 0 }; + +        conf = this->private; + +        sprintf(sid, "%d", client_id); +        if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **) &xl)) +                xl = NULL; + +        return xl; +} + +int +dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p) +{ +        int         client_id = 0;          xlator_t   *subvol = 0; -        int         max_bits = 0; -        uint64_t    off_mask = 0; -        uint64_t    host_mask = 0; +        dht_conf_t *conf = NULL;          if (!this->private)                  return -1;          conf = this->private; -        max = conf->subvolume_cnt; -	if (max == 1) { -		x = y; -		cnt = 0; -		goto out; -	} +        client_id = gf_deitransform(this, y); -        if (y & TOP_BIT) { -                /* HUGE d_off */ -                max_bits = dht_bits_for (max); -                off_mask = (MASK << max_bits); -                host_mask = ~(off_mask); +        subvol = dht_get_subvol_from_id(this, client_id); -                x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS; - -                cnt = y & host_mask; -	} else { -                /* small d_off */ -                cnt = y % max; -                x = y / max; -        } - -out: -        subvol = conf->subvolumes[cnt]; +        if (!subvol) +                subvol = conf->subvolumes[0];          if (subvol_p)                  *subvol_p = subvol; -        if (x_p) -                *x_p = x; -          return 0;  } @@ -829,6 +744,8 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)          }          conf->subvolume_cnt = cnt; +        dht_set_subvol_range(this); +          cnt = 0;          for (subvols = this->children; subvols; subvols = subvols->next)                  conf->subvolumes[cnt++] = subvols->xlator; diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 757ec731d26..3ea75b34ad0 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -166,7 +166,6 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)          int        i = 0;          int        ret = 0; -          ret = dht_hash_compute (this, layout->type, name, &hash);          if (ret != 0) {                  gf_log (this->name, GF_LOG_WARNING, diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 466042c74a0..3531872dd31 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1492,12 +1492,14 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,                          if (defrag->stats == _gf_true) {                                  gettimeofday (&start, NULL);                          } +                          if (defrag->defrag_pattern &&                              (gf_defrag_pattern_match (defrag, entry->d_name,                                                        entry->d_stat.ia_size)                               == _gf_false)) {                                  continue;                          } +                          loc_wipe (&entry_loc);                          ret =dht_build_child_loc (this, &entry_loc, loc,                                                    entry->d_name); diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 22a7260f829..860f3e716f0 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -214,6 +214,8 @@ dht_fini (xlator_t *this)                          GF_FREE (conf->file_layouts);                  } +                dict_destroy(conf->leaf_to_subvol); +                  GF_FREE (conf->subvolumes);                  GF_FREE (conf->subvolume_status); @@ -288,7 +290,6 @@ out:          return ret;  } -  int  dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)  { @@ -344,6 +345,27 @@ dht_init_regex (xlator_t *this, dict_t *odict, char *name,  }  int +dht_set_subvol_range(xlator_t *this) +{ +        int ret = -1; +        dht_conf_t *conf = NULL; + +        conf = this->private; + +        if (!conf) +                goto out; + +        conf->leaf_to_subvol = dict_new(); +        if (!conf->leaf_to_subvol) +                goto out; + +        ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol); + +out: +        return ret; +} + +int  dht_reconfigure (xlator_t *this, dict_t *options)  {          dht_conf_t      *conf = NULL; @@ -676,6 +698,9 @@ dht_init (xlator_t *this)          this->private = conf; +        if (dht_set_subvol_range(this)) +                goto err; +          return 0;  err: diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index c705b80fe82..17e1a3d124e 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -304,8 +304,6 @@ void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries)      list_for_each_entry(entry, &entries->list, list)      { -        entry->d_off = ec_itransform(ec, idx, entry->d_off); -          if (entry->d_stat.ia_type == IA_IFREG)          {              if ((entry->dict == NULL) || @@ -413,10 +411,20 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)              if (fop->offset != 0)              { -                int32_t idx; +                int32_t idx = -1; +                ec_t    *ec = fop->xl->private; + +                idx = gf_deitransform(fop->xl, fop->offset); + +                if ((idx < 0) || (idx >= ec->nodes)) { -                fop->offset = ec_deitransform(fop->xl->private, &idx, -                                              fop->offset); +                        gf_log(fop->xl->name, GF_LOG_ERROR, +                               "Invalid index %d in readdirp request", idx); + +                        fop->error = EIO; + +                        return EC_STATE_REPORT; +                }                  fop->mask &= 1ULL << idx;              } diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 783e3d475ce..139957b55c6 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -16,17 +16,6 @@  #include "ec-fops.h"  #include "ec-helpers.h" -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) - -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) -  #ifndef ffsll  #define ffsll(x) __builtin_ffsll(x)  #endif @@ -106,41 +95,6 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)      }  } -uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset) -{ -    int32_t bits; - -    if (offset == -1ULL) -    { -        return -1ULL; -    } - -    bits = ec->bits_for_nodes; -    if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0) -    { -        return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx; -    } - -    return (offset * ec->nodes) + idx; -} - -uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset) -{ -    uint64_t mask = 0; - -    if ((offset & TOP_BIT) != 0) -    { -        mask = MASK << ec->bits_for_nodes; - -        *idx = offset & ~mask; -        return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS; -    } - -    *idx = offset % ec->nodes; - -    return offset / ec->nodes; -} -  int32_t ec_bits_count(uint64_t n)  {      n -= (n >> 1) & 0x5555555555555555ULL; diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index 5f5d9382532..11d2707b3c0 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -16,8 +16,6 @@  const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits);  const char * ec_fop_name(int32_t id);  void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...); -uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset); -uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset);  int32_t ec_bits_count(uint64_t n);  int32_t ec_bits_index(uint64_t n);  int32_t ec_bits_consume(uint64_t * n); diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 531b38eaf83..90192ad2ac8 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -1194,6 +1194,8 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m          }          */ +        conf->client_id = glusterfs_leaf_position(this); +          gf_log (this->name, GF_LOG_INFO,                  "Connected to %s, attached to remote volume '%s'.",                  conf->rpc->conn.name, diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index 5d9f00fdc70..be5e7b57739 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -141,12 +141,16 @@ client_local_wipe (clnt_local_t *local)  }  int -unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries) +unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp, +                        gf_dirent_t *entries)  {          struct gfs3_dirlist  *trav      = NULL;  	gf_dirent_t          *entry     = NULL;          int                   entry_len = 0;          int                   ret       = -1; +        clnt_conf_t          *conf = NULL; + +        conf = this->private;          trav = rsp->reply;          while (trav) { @@ -156,7 +160,8 @@ unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)                          goto out;                  entry->d_ino  = trav->d_ino; -                entry->d_off  = trav->d_off; +                gf_itransform (this, trav->d_off, &entry->d_off, +                                      conf->client_id);                  entry->d_len  = trav->d_len;                  entry->d_type = trav->d_type; @@ -182,12 +187,17 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd,          inode_table_t        *itable    = NULL;          int                   entry_len = 0;          int                   ret       = -1; +        clnt_conf_t          *conf      = NULL;          trav = rsp->reply;          if (fd)                  itable = fd->inode->table; +        conf = this->private; +        if (!conf) +                goto out; +          while (trav) {                  entry_len = gf_dirent_size (trav->name);                  entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t); @@ -195,7 +205,8 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd,                          goto out;                  entry->d_ino  = trav->d_ino; -                entry->d_off  = trav->d_off; +                gf_itransform (this, trav->d_off, &entry->d_off, +                                      conf->client_id);                  entry->d_len  = trav->d_len;                  entry->d_type = trav->d_type; diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index 7ca91e9880c..10abe845c0c 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -2450,7 +2450,7 @@ client3_3_readdir_cbk (struct rpc_req *req, struct iovec *iov, int count,          INIT_LIST_HEAD (&entries.list);          if (rsp.op_ret > 0) { -                unserialize_rsp_dirent (&rsp, &entries); +                unserialize_rsp_dirent (this, &rsp, &entries);          }          GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata, diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index d9e7ccd0c4f..7fca01c5b6a 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -24,6 +24,7 @@  #include "xdr-rpc.h"  #include "glusterfs3.h" +#include "gf-dirent.h"  extern rpc_clnt_prog_t clnt_handshake_prog;  extern rpc_clnt_prog_t clnt_dump_prog; @@ -1913,6 +1914,9 @@ client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,          if (!conf || !conf->fops)                  goto out; +        if (off != 0) +                off = gf_dirent_orig_offset(this, off); +          args.fd = fd;          args.size = size;          args.offset = off; @@ -1948,6 +1952,9 @@ client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,          if (!conf || !conf->fops)                  goto out; +        if (off != 0) +                off = gf_dirent_orig_offset(this, off); +          args.fd = fd;          args.size = size;          args.offset = off; @@ -2447,7 +2454,7 @@ build_client_config (xlator_t *this, clnt_conf_t *conf)  {          int                     ret = -1; -        if (!conf) +       if (!conf)                  goto out;          GF_OPTION_INIT ("frame-timeout", conf->rpc_conf.rpc_timeout, @@ -2470,6 +2477,8 @@ build_client_config (xlator_t *this, clnt_conf_t *conf)          GF_OPTION_INIT ("send-gids", conf->send_gids, bool, out); +        conf->client_id = glusterfs_leaf_position(this); +          ret = client_check_remote_host (this, this->options);          if (ret)                  goto out; diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 7157e120dda..40200b6afc4 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -85,6 +85,7 @@ typedef struct clnt_conf {          rpc_clnt_prog_t       *handshake;          rpc_clnt_prog_t       *dump; +        int                    client_id;          uint64_t               reopen_fd_count; /* Count of fds reopened after a                                                     connection is established */          gf_lock_t              rec_lock; @@ -228,7 +229,8 @@ int client_submit_request (xlator_t *this, void *req,                             struct iovec *rsp_payload, int rsp_count,                             struct iobref *rsp_iobref, xdrproc_t xdrproc); -int unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries); +int unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp, +                            gf_dirent_t *entries);  int unserialize_rsp_direntp (xlator_t *this, fd_t *fd,                               struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries);  | 
