summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/iobuf.c38
-rw-r--r--libglusterfs/src/iobuf.h12
-rw-r--r--libglusterfs/src/mem-types.h1
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c200
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h10
5 files changed, 240 insertions, 21 deletions
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index 82ffe2dd8fd..f8f1860889b 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -50,6 +50,7 @@ gf_iobuf_get_arena_index (size_t page_size)
return i;
}
+
size_t
gf_iobuf_get_pagesize (size_t page_size)
{
@@ -138,10 +139,15 @@ out:
void
-__iobuf_arena_destroy (struct iobuf_arena *iobuf_arena)
+__iobuf_arena_destroy (struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena)
{
GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
+ if (iobuf_pool->rdma_deregistration)
+ iobuf_pool->rdma_deregistration (iobuf_pool->mr_list,
+ iobuf_arena);
+
__iobuf_arena_destroy_iobufs (iobuf_arena);
if (iobuf_arena->mem_base
@@ -169,6 +175,7 @@ __iobuf_arena_alloc (struct iobuf_pool *iobuf_pool, size_t page_size,
goto err;
INIT_LIST_HEAD (&iobuf_arena->list);
+ INIT_LIST_HEAD (&iobuf_arena->all_list);
INIT_LIST_HEAD (&iobuf_arena->active.list);
INIT_LIST_HEAD (&iobuf_arena->passive.list);
iobuf_arena->iobuf_pool = iobuf_pool;
@@ -188,6 +195,13 @@ __iobuf_arena_alloc (struct iobuf_pool *iobuf_pool, size_t page_size,
goto err;
}
+ if (iobuf_pool->rdma_registration) {
+ iobuf_pool->rdma_registration (iobuf_pool->device,
+ iobuf_arena);
+ }
+
+ list_add_tail (&iobuf_arena->all_list, &iobuf_pool->all_arenas);
+
__iobuf_arena_init_iobufs (iobuf_arena);
if (!iobuf_arena->iobufs) {
gf_log (THIS->name, GF_LOG_ERROR, "init failed");
@@ -199,7 +213,7 @@ __iobuf_arena_alloc (struct iobuf_pool *iobuf_pool, size_t page_size,
return iobuf_arena;
err:
- __iobuf_arena_destroy (iobuf_arena);
+ __iobuf_arena_destroy (iobuf_pool, iobuf_arena);
out:
return NULL;
@@ -258,8 +272,8 @@ __iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool, size_t page_size,
gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
return NULL;
}
+ list_add (&iobuf_arena->list, &iobuf_pool->arenas[index]);
- list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas[index]);
return iobuf_arena;
}
@@ -299,7 +313,8 @@ iobuf_pool_destroy (struct iobuf_pool *iobuf_pool)
&iobuf_pool->arenas[i], list) {
list_del_init (&iobuf_arena->list);
iobuf_pool->arena_cnt--;
- __iobuf_arena_destroy (iobuf_arena);
+
+ __iobuf_arena_destroy (iobuf_pool, iobuf_arena);
}
}
@@ -347,7 +362,7 @@ iobuf_pool_new (void)
gf_common_mt_iobuf_pool);
if (!iobuf_pool)
goto out;
-
+ INIT_LIST_HEAD (&iobuf_pool->all_arenas);
pthread_mutex_init (&iobuf_pool->mutex, NULL);
for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) {
INIT_LIST_HEAD (&iobuf_pool->arenas[i]);
@@ -357,6 +372,16 @@ iobuf_pool_new (void)
iobuf_pool->default_page_size = 128 * GF_UNIT_KB;
+ iobuf_pool->rdma_registration = NULL;
+ iobuf_pool->rdma_deregistration = NULL;
+
+ for (i = 0; i < GF_RDMA_DEVICE_COUNT; i++) {
+
+ iobuf_pool->device[i] = NULL;
+ iobuf_pool->mr_list[i] = NULL;
+
+ }
+
arena_size = 0;
for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
page_size = gf_iobuf_init_config[i].pagesize;
@@ -393,9 +418,10 @@ __iobuf_arena_prune (struct iobuf_pool *iobuf_pool,
/* All cases matched, destroy */
list_del_init (&iobuf_arena->list);
+ list_del_init (&iobuf_arena->all_list);
iobuf_pool->arena_cnt--;
- __iobuf_arena_destroy (iobuf_arena);
+ __iobuf_arena_destroy (iobuf_pool, iobuf_arena);
out:
return;
diff --git a/libglusterfs/src/iobuf.h b/libglusterfs/src/iobuf.h
index 4e07910d722..7e5cfe37a28 100644
--- a/libglusterfs/src/iobuf.h
+++ b/libglusterfs/src/iobuf.h
@@ -19,6 +19,8 @@
#define GF_VARIABLE_IOBUF_COUNT 32
+#define GF_RDMA_DEVICE_COUNT 8
+
/* Lets try to define the new anonymous mapping
* flag, in case the system is still using the
* now deprecated MAP_ANON flag.
@@ -81,6 +83,7 @@ struct iobuf_arena {
};
};
+ struct list_head all_list;
size_t page_size; /* size of all iobufs in this arena */
size_t arena_size; /* this is equal to
(iobuf_pool->arena_size / page_size)
@@ -110,6 +113,7 @@ struct iobuf_pool {
size_t default_page_size; /* default size of iobuf */
int arena_cnt;
+ struct list_head all_arenas;
struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
/* array of arenas. Each element of the array is a list of arenas
holding iobufs of particular page_size */
@@ -121,7 +125,13 @@ struct iobuf_pool {
/* array of of arenas which can be purged */
uint64_t request_misses; /* mostly the requests for higher
- value of iobufs */
+ value of iobufs */
+ int rdma_device_count;
+ struct list_head *mr_list[GF_RDMA_DEVICE_COUNT];
+ void *device[GF_RDMA_DEVICE_COUNT];
+ int (*rdma_registration)(void **, void*);
+ int (*rdma_deregistration)(struct list_head**, struct iobuf_arena *);
+
};
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
index dd24913278f..0db06f52c98 100644
--- a/libglusterfs/src/mem-types.h
+++ b/libglusterfs/src/mem-types.h
@@ -127,6 +127,7 @@ enum gf_common_mem_types_ {
gf_common_mt_regex_t = 111,
gf_common_mt_ereg = 112,
gf_common_mt_wr = 113,
+ gf_common_mt_rdma_arena_mr = 114,
gf_common_mt_end
};
#endif
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
index d3b9c6354b4..e6ed91e05c7 100644
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -15,6 +15,7 @@
#include "dict.h"
#include "glusterfs.h"
+#include "iobuf.h"
#include "logging.h"
#include "rdma.h"
#include "name.h"
@@ -361,6 +362,135 @@ gf_rdma_post_recv (struct ibv_srq *srq,
return ibv_post_srq_recv (srq, &wr, &bad_wr);
}
+int
+gf_rdma_deregister_arena (struct list_head **mr_list,
+ struct iobuf_arena *iobuf_arena)
+{
+ gf_rdma_arena_mr *tmp = NULL;
+ int count = 0, i = 0;
+
+ count = iobuf_arena->iobuf_pool->rdma_device_count;
+ for (i = 0; i < count; i++) {
+ list_for_each_entry(tmp, mr_list[i], list) {
+ if (tmp->iobuf_arena == iobuf_arena) {
+ if (ibv_dereg_mr(tmp->mr)) {
+ gf_log("rdma", GF_LOG_WARNING,
+ "deallocation of memory region "
+ "failed");
+ return -1;
+ }
+ list_del(&tmp->list);
+ GF_FREE(tmp);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+gf_rdma_register_arena (void **arg1, void *arg2)
+{
+ struct ibv_mr *mr = NULL;
+ gf_rdma_arena_mr *new = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
+ gf_rdma_device_t **device = (gf_rdma_device_t **)arg1;
+ struct iobuf_arena *iobuf_arena = arg2;
+ int count = 0, i = 0;
+
+ iobuf_pool = iobuf_arena->iobuf_pool;
+ count = iobuf_pool->rdma_device_count;
+ for (i = 0; i < count; i++) {
+ new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
+ gf_common_mt_rdma_arena_mr);
+ INIT_LIST_HEAD (&new->list);
+ new->iobuf_arena = iobuf_arena;
+
+ mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base,
+ iobuf_arena->arena_size,
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE
+ );
+ if (!mr)
+ gf_log("rdma", GF_LOG_WARNING,
+ "allocation of mr failed");
+
+ new->mr = mr;
+ list_add (&new->list, &device[i]->all_mr);
+ new = NULL;
+ }
+
+ return 0;
+
+}
+
+static void
+gf_rdma_register_iobuf_pool (rpc_transport_t *this)
+{
+ struct iobuf_pool *iobuf_pool = NULL;
+ struct iobuf_arena *tmp = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ struct ibv_mr *mr = NULL;
+ gf_rdma_arena_mr *new = NULL;
+
+ priv = this->private;
+ device = priv->device;
+ iobuf_pool = this->ctx->iobuf_pool;
+
+ if (!list_empty(&iobuf_pool->all_arenas)) {
+
+ list_for_each_entry (tmp, &iobuf_pool->all_arenas, all_list) {
+ new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
+ gf_common_mt_rdma_arena_mr);
+ INIT_LIST_HEAD (&new->list);
+ new->iobuf_arena = tmp;
+
+ mr = ibv_reg_mr(device->pd, tmp->mem_base,
+ tmp->arena_size,
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE);
+ if (!mr) {
+ gf_log ("rdma", GF_LOG_WARNING, "failed to pre"
+ " register buffers with rdma "
+ "devices.");
+
+ }
+ new->mr = mr;
+ list_add (&new->list, &device->all_mr);
+
+ new = NULL;
+ }
+ }
+
+ return;
+}
+
+static struct ibv_mr*
+gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size)
+{
+ gf_rdma_arena_mr *tmp = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ device = priv->device;
+
+ if (!list_empty(&device->all_mr)) {
+ list_for_each_entry (tmp, &device->all_mr, list) {
+ if (tmp->iobuf_arena->mem_base <= ptr &&
+ ptr < tmp->iobuf_arena->mem_base +
+ tmp->iobuf_arena->arena_size)
+ return tmp->mr;
+ }
+ }
+
+ return NULL;
+}
static int32_t
gf_rdma_create_posts (rpc_transport_t *this)
@@ -510,11 +640,13 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
int32_t i = 0;
gf_rdma_device_t *trav = NULL, *device = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
priv = this->private;
options = &priv->options;
ctx = this->ctx;
rdma_ctx = ctx->ib;
+ iobuf_pool = ctx->iobuf_pool;
trav = rdma_ctx->device;
@@ -530,10 +662,10 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
if (trav == NULL) {
goto out;
}
-
priv->device = trav;
trav->context = ibctx;
-
+ iobuf_pool->device[iobuf_pool->rdma_device_count] = trav;
+ iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr;
trav->request_ctx_pool
= mem_pool_new (gf_rdma_request_context_t,
GF_RDMA_POOL_SIZE);
@@ -613,6 +745,9 @@ gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
gf_rdma_queue_init (&trav->sendq);
gf_rdma_queue_init (&trav->recvq);
+ INIT_LIST_HEAD (&trav->all_mr);
+ gf_rdma_register_iobuf_pool(this);
+
if (gf_rdma_create_posts (this) < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not allocate posts for device (%s)",
@@ -1239,9 +1374,13 @@ __gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer,
readch->rc_discrim = hton32 (1);
readch->rc_position = hton32 (*pos);
+ mr = gf_rdma_get_pre_registred_mr(peer->trans,
+ (void *)vector[i].iov_base, vector[i].iov_len);
+ if (!mr) {
mr = ibv_reg_mr (device->pd, vector[i].iov_base,
vector[i].iov_len,
IBV_ACCESS_REMOTE_READ);
+ }
if (!mr) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
"memory registration failed (%s) (peer:%s)",
@@ -1374,10 +1513,16 @@ __gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer,
device = priv->device;
for (i = 0; i < count; i++) {
+
+ mr = gf_rdma_get_pre_registred_mr(peer->trans,
+ (void *)vector[i].iov_base, vector[i].iov_len);
+ if (!mr) {
mr = ibv_reg_mr (device->pd, vector[i].iov_base,
vector[i].iov_len,
IBV_ACCESS_REMOTE_WRITE
| IBV_ACCESS_LOCAL_WRITE);
+ }
+
if (!mr) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
"memory registration failed (%s) (peer:%s)",
@@ -1504,16 +1649,30 @@ out:
static inline void
-__gf_rdma_deregister_mr (struct ibv_mr **mr, int count)
+__gf_rdma_deregister_mr (gf_rdma_device_t *device,
+ struct ibv_mr **mr, int count)
{
- int i = 0;
+ gf_rdma_arena_mr *tmp = NULL;
+ int i = 0;
+ int found = 0;
- if (mr == NULL) {
+ if (mr == NULL) {
goto out;
}
for (i = 0; i < count; i++) {
- ibv_dereg_mr (mr[i]);
+ found = 0;
+ if (!list_empty(&device->all_mr)) {
+ list_for_each_entry(tmp, &device->all_mr, list) {
+ if (tmp->mr == mr[i]) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ if (!found)
+ ibv_dereg_mr (mr[i]);
+
}
out:
@@ -1558,9 +1717,10 @@ gf_rdma_quota_put (gf_rdma_peer_t *peer)
void
__gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)
{
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ int32_t ret = 0;
if (context == NULL) {
goto out;
@@ -1568,9 +1728,10 @@ __gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)
peer = context->peer;
- __gf_rdma_deregister_mr (context->mr, context->mr_count);
-
priv = peer->trans->private;
+ device = priv->device;
+ __gf_rdma_deregister_mr (device, context->mr, context->mr_count);
+
if (priv->connected) {
ret = __gf_rdma_quota_put (peer);
@@ -1602,13 +1763,14 @@ out:
void
-gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx)
+gf_rdma_post_context_destroy (gf_rdma_device_t *device,
+ gf_rdma_post_context_t *ctx)
{
if (ctx == NULL) {
goto out;
}
- __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count);
+ __gf_rdma_deregister_mr (device, ctx->mr, ctx->mr_count);
if (ctx->iobref != NULL) {
iobref_unref (ctx->iobref);
@@ -1640,7 +1802,7 @@ gf_rdma_post_unref (gf_rdma_post_t *post)
pthread_mutex_unlock (&post->lock);
if (refcount == 0) {
- gf_rdma_post_context_destroy (&post->ctx);
+ gf_rdma_post_context_destroy (post->device, &post->ctx);
if (post->type == GF_RDMA_SEND_POST) {
gf_rdma_put_post (&post->device->sendq, post);
} else {
@@ -2060,10 +2222,16 @@ __gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer,
* Infiniband Architecture Specification Volume 1
* (Release 1.2.1)
*/
+ ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr(
+ peer->trans, (void *)vector[i].iov_base,
+ vector[i].iov_len);
+
+ if (!ctx->mr[ctx->mr_count]) {
ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd,
vector[i].iov_base,
vector[i].iov_len,
IBV_ACCESS_LOCAL_WRITE);
+ }
if (ctx->mr[ctx->mr_count] == NULL) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
"registering memory for IBV_ACCESS_LOCAL_WRITE "
@@ -4553,6 +4721,7 @@ int32_t
init (rpc_transport_t *this)
{
gf_rdma_private_t *priv = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t);
if (!priv)
@@ -4565,6 +4734,9 @@ init (rpc_transport_t *this)
"Failed to initialize IB Device");
return -1;
}
+ iobuf_pool = this->ctx->iobuf_pool;
+ iobuf_pool->rdma_registration = gf_rdma_register_arena;
+ iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena;
return 0;
}
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
index 7f76244f071..fda01aa53ef 100644
--- a/rpc/rpc-transport/rdma/src/rdma.h
+++ b/rpc/rpc-transport/rdma/src/rdma.h
@@ -34,6 +34,7 @@
/* FIXME: give appropriate values to these macros */
#define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1)
+
/* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update
* GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h .
*/
@@ -328,9 +329,18 @@ struct __gf_rdma_device {
struct mem_pool *request_ctx_pool;
struct mem_pool *ioq_pool;
struct mem_pool *reply_info_pool;
+ struct list_head all_mr;
};
typedef struct __gf_rdma_device gf_rdma_device_t;
+
+struct __gf_rdma_arena_mr {
+ struct list_head list;
+ struct iobuf_arena *iobuf_arena;
+ struct ibv_mr *mr;
+};
+
+typedef struct __gf_rdma_arena_mr gf_rdma_arena_mr;
struct __gf_rdma_ctx {
gf_rdma_device_t *device;
struct rdma_event_channel *rdma_cm_event_channel;