diff options
| author | Mohammed Rafi KC <rkavunga@redhat.com> | 2015-02-17 20:17:58 +0530 | 
|---|---|---|
| committer | Raghavendra Bhat <raghavendra@redhat.com> | 2015-03-27 04:34:08 -0700 | 
| commit | 7febb66a26f01c94f8e76bb90cf4edd7c6cc1421 (patch) | |
| tree | fc455167a14965ac54490af2acbc08ad480a5d6c /rpc | |
| parent | d21990e093d99d8adbacae1ba2c56ff7606e2c37 (diff) | |
rdma: pre-register iobuf_pool with rdma devices.
        Back port pf : http://review.gluster.org/9506
registring buffers with rdma device is a time consuming
operation. So performing registration in code path will
decrease the performance.
Using a pre registered memory will give a bettor performance,
ie, register iobuf_pool during rdma initialization. For
dynamically created arena, we can register with all the
device.
Change-Id: Ic79183e2efd014c43faf5911fdb6d5cfbcee64ca
BUG: 1202212
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Reviewed-on: http://review.gluster.org/9506
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Tested-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-on: http://review.gluster.org/9889
Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
Diffstat (limited to 'rpc')
| -rw-r--r-- | rpc/rpc-transport/rdma/src/rdma.c | 200 | ||||
| -rw-r--r-- | rpc/rpc-transport/rdma/src/rdma.h | 10 | 
2 files changed, 196 insertions, 14 deletions
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c index 92d5da258f2..cb5ce77291e 100644 --- a/rpc/rpc-transport/rdma/src/rdma.c +++ b/rpc/rpc-transport/rdma/src/rdma.c @@ -15,6 +15,7 @@  #include "dict.h"  #include "glusterfs.h" +#include "iobuf.h"  #include "logging.h"  #include "rdma.h"  #include "name.h" @@ -361,6 +362,135 @@ gf_rdma_post_recv (struct ibv_srq *srq,          return ibv_post_srq_recv (srq, &wr, &bad_wr);  } +int +gf_rdma_deregister_arena (struct list_head **mr_list, +                          struct iobuf_arena *iobuf_arena) +{ +        gf_rdma_arena_mr *tmp     = NULL; +        int               count   = 0, i = 0; + +        count = iobuf_arena->iobuf_pool->rdma_device_count; +        for (i = 0; i < count; i++) { +                list_for_each_entry(tmp, mr_list[i], list) { +                        if (tmp->iobuf_arena == iobuf_arena) { +                                if (ibv_dereg_mr(tmp->mr)) { +                                        gf_log("rdma", GF_LOG_WARNING, +                                        "deallocation of memory region " +                                        "failed"); +                                        return -1; +                                } +                                list_del(&tmp->list); +                                GF_FREE(tmp); +                                break; +                        } +                } +        } + +        return 0; +} + + +int +gf_rdma_register_arena (void **arg1, void *arg2) +{ +        struct ibv_mr       *mr          = NULL; +        gf_rdma_arena_mr    *new         = NULL; +        struct iobuf_pool   *iobuf_pool  = NULL; +        gf_rdma_device_t    **device     = (gf_rdma_device_t **)arg1; +        struct iobuf_arena  *iobuf_arena = arg2; +        int                  count       = 0, i = 0; + +        iobuf_pool = iobuf_arena->iobuf_pool; +        count = iobuf_pool->rdma_device_count; +        for (i = 0; i < count; i++) { +                new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr), +                                gf_common_mt_rdma_arena_mr); +                INIT_LIST_HEAD (&new->list); +                new->iobuf_arena = iobuf_arena; + +                mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base, +                                         iobuf_arena->arena_size, +                                         IBV_ACCESS_REMOTE_READ | +                                         IBV_ACCESS_LOCAL_WRITE | +                                         IBV_ACCESS_REMOTE_WRITE +                                         ); +                if (!mr) +                        gf_log("rdma", GF_LOG_WARNING, +                               "allocation of mr failed"); + +                new->mr = mr; +                list_add (&new->list, &device[i]->all_mr); +                new = NULL; +        } + +        return 0; + +} + +static void +gf_rdma_register_iobuf_pool (rpc_transport_t *this) +{ +        struct iobuf_pool   *iobuf_pool = NULL; +        struct iobuf_arena  *tmp        = NULL; +        gf_rdma_private_t   *priv       = NULL; +        gf_rdma_device_t    *device     = NULL; +        struct ibv_mr       *mr         = NULL; +        gf_rdma_arena_mr    *new        = NULL; + +        priv = this->private; +        device = priv->device; +        iobuf_pool = this->ctx->iobuf_pool; + +        if (!list_empty(&iobuf_pool->all_arenas)) { + +                list_for_each_entry (tmp, &iobuf_pool->all_arenas, all_list) { +                        new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr), +                                        gf_common_mt_rdma_arena_mr); +                        INIT_LIST_HEAD (&new->list); +                        new->iobuf_arena = tmp; + +                        mr = ibv_reg_mr(device->pd, tmp->mem_base, +                                        tmp->arena_size, +                                        IBV_ACCESS_REMOTE_READ | +                                        IBV_ACCESS_LOCAL_WRITE | +                                        IBV_ACCESS_REMOTE_WRITE); +                        if (!mr) { +                                gf_log ("rdma", GF_LOG_WARNING, "failed to pre" +                                        " register buffers with rdma " +                                        "devices."); + +                        } +                        new->mr = mr; +                        list_add (&new->list, &device->all_mr); + +                        new = NULL; +                } +        } + +       return; +} + +static struct ibv_mr* +gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size) +{ +        gf_rdma_arena_mr  *tmp        = NULL; +        gf_rdma_private_t  *priv       = NULL; +        gf_rdma_device_t   *device     = NULL; + +        priv = this->private; +        device = priv->device; + +        if (!list_empty(&device->all_mr)) { +                list_for_each_entry (tmp, &device->all_mr, list) { +                        if (tmp->iobuf_arena->mem_base <= ptr && +                            ptr < tmp->iobuf_arena->mem_base + +                            tmp->iobuf_arena->arena_size) +                                return tmp->mr; +                        } +        } + +        return NULL; +}  static int32_t  gf_rdma_create_posts (rpc_transport_t *this) @@ -510,11 +640,13 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,          int32_t            i        = 0;          gf_rdma_device_t  *trav     = NULL, *device = NULL;          gf_rdma_ctx_t     *rdma_ctx = NULL; +        struct iobuf_pool *iobuf_pool = NULL;          priv        = this->private;          options     = &priv->options;          ctx         = this->ctx;          rdma_ctx    = ctx->ib; +        iobuf_pool = ctx->iobuf_pool;          trav = rdma_ctx->device; @@ -530,10 +662,10 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,                  if (trav == NULL) {                          goto out;                  } -                  priv->device = trav;                  trav->context = ibctx; - +                iobuf_pool->device[iobuf_pool->rdma_device_count] = trav; +                iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr;                  trav->request_ctx_pool                          = mem_pool_new (gf_rdma_request_context_t,                                          GF_RDMA_POOL_SIZE); @@ -613,6 +745,9 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,                  gf_rdma_queue_init (&trav->sendq);                  gf_rdma_queue_init (&trav->recvq); +                INIT_LIST_HEAD (&trav->all_mr); +                gf_rdma_register_iobuf_pool(this); +                  if (gf_rdma_create_posts (this) < 0) {                          gf_log (this->name, GF_LOG_ERROR,                                  "could not allocate posts for device (%s)", @@ -1239,9 +1374,13 @@ __gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer,                  readch->rc_discrim = hton32 (1);                  readch->rc_position = hton32 (*pos); +                mr = gf_rdma_get_pre_registred_mr(peer->trans, +                                (void *)vector[i].iov_base, vector[i].iov_len); +                if (!mr) {                  mr = ibv_reg_mr (device->pd, vector[i].iov_base,                                   vector[i].iov_len,                                   IBV_ACCESS_REMOTE_READ); +                }                  if (!mr) {                          gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,                                  "memory registration failed (%s) (peer:%s)", @@ -1374,10 +1513,16 @@ __gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer,          device = priv->device;          for (i = 0; i < count; i++) { + +                mr = gf_rdma_get_pre_registred_mr(peer->trans, +                                (void *)vector[i].iov_base, vector[i].iov_len); +                if (!mr) {                  mr = ibv_reg_mr (device->pd, vector[i].iov_base,                                   vector[i].iov_len,                                   IBV_ACCESS_REMOTE_WRITE                                   | IBV_ACCESS_LOCAL_WRITE); +                } +                  if (!mr) {                          gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,                                  "memory registration failed (%s) (peer:%s)", @@ -1504,16 +1649,30 @@ out:  static inline void -__gf_rdma_deregister_mr (struct ibv_mr **mr, int count) +__gf_rdma_deregister_mr (gf_rdma_device_t *device, +                         struct ibv_mr **mr, int count)  { -        int i = 0; +        gf_rdma_arena_mr    *tmp   = NULL; +        int                  i     = 0; +        int                  found = 0; -        if (mr == NULL) { +               if (mr == NULL) {                  goto out;          }          for (i = 0; i < count; i++) { -                ibv_dereg_mr (mr[i]); +                 found = 0; +                 if (!list_empty(&device->all_mr)) { +                 list_for_each_entry(tmp, &device->all_mr, list) { +                        if (tmp->mr == mr[i]) { +                                found = 1; +                                break; +                        } +                 } +                 } +                if (!found) +                        ibv_dereg_mr (mr[i]); +          }  out: @@ -1558,9 +1717,10 @@ gf_rdma_quota_put (gf_rdma_peer_t *peer)  void  __gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)  { -        gf_rdma_peer_t    *peer = NULL; -        gf_rdma_private_t *priv = NULL; -        int32_t            ret  = 0; +        gf_rdma_peer_t    *peer   = NULL; +        gf_rdma_private_t *priv   = NULL; +        gf_rdma_device_t  *device = NULL; +        int32_t            ret    = 0;          if (context == NULL) {                  goto out; @@ -1568,9 +1728,10 @@ __gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)          peer = context->peer; -        __gf_rdma_deregister_mr (context->mr, context->mr_count); -          priv = peer->trans->private; +        device = priv->device; +        __gf_rdma_deregister_mr (device, context->mr, context->mr_count); +          if (priv->connected) {                  ret = __gf_rdma_quota_put (peer); @@ -1602,13 +1763,14 @@ out:  void -gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx) +gf_rdma_post_context_destroy (gf_rdma_device_t *device, +                              gf_rdma_post_context_t *ctx)  {          if (ctx == NULL) {                  goto out;          } -        __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count); +        __gf_rdma_deregister_mr (device, ctx->mr, ctx->mr_count);          if (ctx->iobref != NULL) {                  iobref_unref (ctx->iobref); @@ -1640,7 +1802,7 @@ gf_rdma_post_unref (gf_rdma_post_t *post)          pthread_mutex_unlock (&post->lock);          if (refcount == 0) { -                gf_rdma_post_context_destroy (&post->ctx); +                gf_rdma_post_context_destroy (post->device, &post->ctx);                  if (post->type == GF_RDMA_SEND_POST) {                          gf_rdma_put_post (&post->device->sendq, post);                  } else { @@ -2060,10 +2222,16 @@ __gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer,                   * Infiniband Architecture Specification Volume 1                   * (Release 1.2.1)                   */ +                ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr( +                                peer->trans, (void *)vector[i].iov_base, +                                vector[i].iov_len); + +                if (!ctx->mr[ctx->mr_count]) {                  ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd,                                                       vector[i].iov_base,                                                       vector[i].iov_len,                                                       IBV_ACCESS_LOCAL_WRITE); +                }                  if (ctx->mr[ctx->mr_count] == NULL) {                          gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,                                  "registering memory for IBV_ACCESS_LOCAL_WRITE " @@ -4553,6 +4721,7 @@ int32_t  init (rpc_transport_t *this)  {          gf_rdma_private_t *priv = NULL; +        struct iobuf_pool *iobuf_pool = NULL;          priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t);          if (!priv) @@ -4565,6 +4734,9 @@ init (rpc_transport_t *this)                          "Failed to initialize IB Device");                  return -1;          } +        iobuf_pool = this->ctx->iobuf_pool; +        iobuf_pool->rdma_registration = gf_rdma_register_arena; +        iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena;          return 0;  } diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h index 7f76244f071..fda01aa53ef 100644 --- a/rpc/rpc-transport/rdma/src/rdma.h +++ b/rpc/rpc-transport/rdma/src/rdma.h @@ -34,6 +34,7 @@  /* FIXME: give appropriate values to these macros */  #define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1) +  /* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update   * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h .   */ @@ -328,9 +329,18 @@ struct __gf_rdma_device {          struct mem_pool *request_ctx_pool;          struct mem_pool *ioq_pool;          struct mem_pool *reply_info_pool; +        struct list_head all_mr;  };  typedef struct __gf_rdma_device gf_rdma_device_t; + +struct __gf_rdma_arena_mr { +        struct list_head list; +        struct iobuf_arena *iobuf_arena; +        struct ibv_mr *mr; +}; + +typedef struct __gf_rdma_arena_mr gf_rdma_arena_mr;  struct __gf_rdma_ctx {          gf_rdma_device_t          *device;          struct rdma_event_channel *rdma_cm_event_channel;  | 
