summaryrefslogtreecommitdiffstats
path: root/rpc/rpc-transport
diff options
context:
space:
mode:
Diffstat (limited to 'rpc/rpc-transport')
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c200
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h10
2 files changed, 196 insertions, 14 deletions
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
index d3b9c6354b4..e6ed91e05c7 100644
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -15,6 +15,7 @@
#include "dict.h"
#include "glusterfs.h"
+#include "iobuf.h"
#include "logging.h"
#include "rdma.h"
#include "name.h"
@@ -361,6 +362,135 @@ gf_rdma_post_recv (struct ibv_srq *srq,
return ibv_post_srq_recv (srq, &wr, &bad_wr);
}
+int
+gf_rdma_deregister_arena (struct list_head **mr_list,
+ struct iobuf_arena *iobuf_arena)
+{
+ gf_rdma_arena_mr *tmp = NULL;
+ int count = 0, i = 0;
+
+ count = iobuf_arena->iobuf_pool->rdma_device_count;
+ for (i = 0; i < count; i++) {
+ list_for_each_entry(tmp, mr_list[i], list) {
+ if (tmp->iobuf_arena == iobuf_arena) {
+ if (ibv_dereg_mr(tmp->mr)) {
+ gf_log("rdma", GF_LOG_WARNING,
+ "deallocation of memory region "
+ "failed");
+ return -1;
+ }
+ list_del(&tmp->list);
+ GF_FREE(tmp);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+gf_rdma_register_arena (void **arg1, void *arg2)
+{
+ struct ibv_mr *mr = NULL;
+ gf_rdma_arena_mr *new = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
+ gf_rdma_device_t **device = (gf_rdma_device_t **)arg1;
+ struct iobuf_arena *iobuf_arena = arg2;
+ int count = 0, i = 0;
+
+ iobuf_pool = iobuf_arena->iobuf_pool;
+ count = iobuf_pool->rdma_device_count;
+ for (i = 0; i < count; i++) {
+ new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
+ gf_common_mt_rdma_arena_mr);
+ INIT_LIST_HEAD (&new->list);
+ new->iobuf_arena = iobuf_arena;
+
+ mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base,
+ iobuf_arena->arena_size,
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE
+ );
+ if (!mr)
+ gf_log("rdma", GF_LOG_WARNING,
+ "allocation of mr failed");
+
+ new->mr = mr;
+ list_add (&new->list, &device[i]->all_mr);
+ new = NULL;
+ }
+
+ return 0;
+
+}
+
+static void
+gf_rdma_register_iobuf_pool (rpc_transport_t *this)
+{
+ struct iobuf_pool *iobuf_pool = NULL;
+ struct iobuf_arena *tmp = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ struct ibv_mr *mr = NULL;
+ gf_rdma_arena_mr *new = NULL;
+
+ priv = this->private;
+ device = priv->device;
+ iobuf_pool = this->ctx->iobuf_pool;
+
+ if (!list_empty(&iobuf_pool->all_arenas)) {
+
+ list_for_each_entry (tmp, &iobuf_pool->all_arenas, all_list) {
+ new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
+ gf_common_mt_rdma_arena_mr);
+ INIT_LIST_HEAD (&new->list);
+ new->iobuf_arena = tmp;
+
+ mr = ibv_reg_mr(device->pd, tmp->mem_base,
+ tmp->arena_size,
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE);
+ if (!mr) {
+ gf_log ("rdma", GF_LOG_WARNING, "failed to pre"
+ " register buffers with rdma "
+ "devices.");
+
+ }
+ new->mr = mr;
+ list_add (&new->list, &device->all_mr);
+
+ new = NULL;
+ }
+ }
+
+ return;
+}
+
+static struct ibv_mr*
+gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size)
+{
+ gf_rdma_arena_mr *tmp = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ device = priv->device;
+
+ if (!list_empty(&device->all_mr)) {
+ list_for_each_entry (tmp, &device->all_mr, list) {
+ if (tmp->iobuf_arena->mem_base <= ptr &&
+ ptr < tmp->iobuf_arena->mem_base +
+ tmp->iobuf_arena->arena_size)
+ return tmp->mr;
+ }
+ }
+
+ return NULL;
+}
static int32_t
gf_rdma_create_posts (rpc_transport_t *this)
@@ -510,11 +640,13 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
int32_t i = 0;
gf_rdma_device_t *trav = NULL, *device = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
priv = this->private;
options = &priv->options;
ctx = this->ctx;
rdma_ctx = ctx->ib;
+ iobuf_pool = ctx->iobuf_pool;
trav = rdma_ctx->device;
@@ -530,10 +662,10 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
if (trav == NULL) {
goto out;
}
-
priv->device = trav;
trav->context = ibctx;
-
+ iobuf_pool->device[iobuf_pool->rdma_device_count] = trav;
+ iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr;
trav->request_ctx_pool
= mem_pool_new (gf_rdma_request_context_t,
GF_RDMA_POOL_SIZE);
@@ -613,6 +745,9 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
gf_rdma_queue_init (&trav->sendq);
gf_rdma_queue_init (&trav->recvq);
+ INIT_LIST_HEAD (&trav->all_mr);
+ gf_rdma_register_iobuf_pool(this);
+
if (gf_rdma_create_posts (this) < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not allocate posts for device (%s)",
@@ -1239,9 +1374,13 @@ __gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer,
readch->rc_discrim = hton32 (1);
readch->rc_position = hton32 (*pos);
+ mr = gf_rdma_get_pre_registred_mr(peer->trans,
+ (void *)vector[i].iov_base, vector[i].iov_len);
+ if (!mr) {
mr = ibv_reg_mr (device->pd, vector[i].iov_base,
vector[i].iov_len,
IBV_ACCESS_REMOTE_READ);
+ }
if (!mr) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
"memory registration failed (%s) (peer:%s)",
@@ -1374,10 +1513,16 @@ __gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer,
device = priv->device;
for (i = 0; i < count; i++) {
+
+ mr = gf_rdma_get_pre_registred_mr(peer->trans,
+ (void *)vector[i].iov_base, vector[i].iov_len);
+ if (!mr) {
mr = ibv_reg_mr (device->pd, vector[i].iov_base,
vector[i].iov_len,
IBV_ACCESS_REMOTE_WRITE
| IBV_ACCESS_LOCAL_WRITE);
+ }
+
if (!mr) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
"memory registration failed (%s) (peer:%s)",
@@ -1504,16 +1649,30 @@ out:
static inline void
-__gf_rdma_deregister_mr (struct ibv_mr **mr, int count)
+__gf_rdma_deregister_mr (gf_rdma_device_t *device,
+ struct ibv_mr **mr, int count)
{
- int i = 0;
+ gf_rdma_arena_mr *tmp = NULL;
+ int i = 0;
+ int found = 0;
- if (mr == NULL) {
+ if (mr == NULL) {
goto out;
}
for (i = 0; i < count; i++) {
- ibv_dereg_mr (mr[i]);
+ found = 0;
+ if (!list_empty(&device->all_mr)) {
+ list_for_each_entry(tmp, &device->all_mr, list) {
+ if (tmp->mr == mr[i]) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ if (!found)
+ ibv_dereg_mr (mr[i]);
+
}
out:
@@ -1558,9 +1717,10 @@ gf_rdma_quota_put (gf_rdma_peer_t *peer)
void
__gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)
{
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ int32_t ret = 0;
if (context == NULL) {
goto out;
@@ -1568,9 +1728,10 @@ __gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)
peer = context->peer;
- __gf_rdma_deregister_mr (context->mr, context->mr_count);
-
priv = peer->trans->private;
+ device = priv->device;
+ __gf_rdma_deregister_mr (device, context->mr, context->mr_count);
+
if (priv->connected) {
ret = __gf_rdma_quota_put (peer);
@@ -1602,13 +1763,14 @@ out:
void
-gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx)
+gf_rdma_post_context_destroy (gf_rdma_device_t *device,
+ gf_rdma_post_context_t *ctx)
{
if (ctx == NULL) {
goto out;
}
- __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count);
+ __gf_rdma_deregister_mr (device, ctx->mr, ctx->mr_count);
if (ctx->iobref != NULL) {
iobref_unref (ctx->iobref);
@@ -1640,7 +1802,7 @@ gf_rdma_post_unref (gf_rdma_post_t *post)
pthread_mutex_unlock (&post->lock);
if (refcount == 0) {
- gf_rdma_post_context_destroy (&post->ctx);
+ gf_rdma_post_context_destroy (post->device, &post->ctx);
if (post->type == GF_RDMA_SEND_POST) {
gf_rdma_put_post (&post->device->sendq, post);
} else {
@@ -2060,10 +2222,16 @@ __gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer,
* Infiniband Architecture Specification Volume 1
* (Release 1.2.1)
*/
+ ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr(
+ peer->trans, (void *)vector[i].iov_base,
+ vector[i].iov_len);
+
+ if (!ctx->mr[ctx->mr_count]) {
ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd,
vector[i].iov_base,
vector[i].iov_len,
IBV_ACCESS_LOCAL_WRITE);
+ }
if (ctx->mr[ctx->mr_count] == NULL) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
"registering memory for IBV_ACCESS_LOCAL_WRITE "
@@ -4553,6 +4721,7 @@ int32_t
init (rpc_transport_t *this)
{
gf_rdma_private_t *priv = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t);
if (!priv)
@@ -4565,6 +4734,9 @@ init (rpc_transport_t *this)
"Failed to initialize IB Device");
return -1;
}
+ iobuf_pool = this->ctx->iobuf_pool;
+ iobuf_pool->rdma_registration = gf_rdma_register_arena;
+ iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena;
return 0;
}
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
index 7f76244f071..fda01aa53ef 100644
--- a/rpc/rpc-transport/rdma/src/rdma.h
+++ b/rpc/rpc-transport/rdma/src/rdma.h
@@ -34,6 +34,7 @@
/* FIXME: give appropriate values to these macros */
#define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1)
+
/* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update
* GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h .
*/
@@ -328,9 +329,18 @@ struct __gf_rdma_device {
struct mem_pool *request_ctx_pool;
struct mem_pool *ioq_pool;
struct mem_pool *reply_info_pool;
+ struct list_head all_mr;
};
typedef struct __gf_rdma_device gf_rdma_device_t;
+
+struct __gf_rdma_arena_mr {
+ struct list_head list;
+ struct iobuf_arena *iobuf_arena;
+ struct ibv_mr *mr;
+};
+
+typedef struct __gf_rdma_arena_mr gf_rdma_arena_mr;
struct __gf_rdma_ctx {
gf_rdma_device_t *device;
struct rdma_event_channel *rdma_cm_event_channel;