summaryrefslogtreecommitdiffstats
path: root/rpc
diff options
context:
space:
mode:
authorAmar Tumballi <amarts@redhat.com>2019-01-04 07:04:50 +0000
committerAmar Tumballi <amarts@redhat.com>2019-01-08 11:16:03 +0000
commit37653efdc7681d1b0f255054ec2f9c9ddd4c8b14 (patch)
tree61688051f8f374ea6a1b661ef357844a477361f6 /rpc
parent054c7ea91603acfcb01db8455b25dda7e5e831b2 (diff)
Revert "iobuf: Get rid of pre allocated iobuf_pool and use per thread mem pool"
This reverts commit b87c397091bac6a4a6dec4e45a7671fad4a11770. There seems to be some performance regression with the patch and hence recommended to have it reverted. Updates: #325 Change-Id: Id85d6203173a44fad6cf51d39b3e96f37afcec09
Diffstat (limited to 'rpc')
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c251
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h1
2 files changed, 248 insertions, 4 deletions
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
index edf4e13bd48..bb36dac67f8 100644
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -344,6 +344,207 @@ gf_rdma_post_recv(struct ibv_srq *srq, gf_rdma_post_t *post)
return ibv_post_srq_recv(srq, &wr, &bad_wr);
}
+static void
+gf_rdma_deregister_iobuf_pool(gf_rdma_device_t *device)
+{
+ gf_rdma_arena_mr *arena_mr = NULL;
+ gf_rdma_arena_mr *tmp = NULL;
+
+ while (device) {
+ pthread_mutex_lock(&device->all_mr_lock);
+ {
+ if (!list_empty(&device->all_mr)) {
+ list_for_each_entry_safe(arena_mr, tmp, &device->all_mr, list)
+ {
+ if (ibv_dereg_mr(arena_mr->mr)) {
+ gf_msg("rdma", GF_LOG_WARNING, 0,
+ RDMA_MSG_DEREGISTER_ARENA_FAILED,
+ "deallocation of memory region "
+ "failed");
+ pthread_mutex_unlock(&device->all_mr_lock);
+ return;
+ }
+ list_del(&arena_mr->list);
+ GF_FREE(arena_mr);
+ }
+ }
+ }
+ pthread_mutex_unlock(&device->all_mr_lock);
+
+ device = device->next;
+ }
+}
+
+int
+gf_rdma_deregister_arena(struct list_head **mr_list,
+ struct iobuf_arena *iobuf_arena)
+{
+ gf_rdma_arena_mr *tmp = NULL;
+ gf_rdma_arena_mr *dummy = NULL;
+ gf_rdma_device_t *device = NULL;
+ int count = 0, i = 0;
+
+ count = iobuf_arena->iobuf_pool->rdma_device_count;
+ for (i = 0; i < count; i++) {
+ device = iobuf_arena->iobuf_pool->device[i];
+ pthread_mutex_lock(&device->all_mr_lock);
+ {
+ list_for_each_entry_safe(tmp, dummy, mr_list[i], list)
+ {
+ if (tmp->iobuf_arena == iobuf_arena) {
+ if (ibv_dereg_mr(tmp->mr)) {
+ gf_msg("rdma", GF_LOG_WARNING, 0,
+ RDMA_MSG_DEREGISTER_ARENA_FAILED,
+ "deallocation of memory region "
+ "failed");
+ pthread_mutex_unlock(&device->all_mr_lock);
+ return -1;
+ }
+ list_del(&tmp->list);
+ GF_FREE(tmp);
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock(&device->all_mr_lock);
+ }
+
+ return 0;
+}
+
+int
+gf_rdma_register_arena(void **arg1, void *arg2)
+{
+ struct ibv_mr *mr = NULL;
+ gf_rdma_arena_mr *new = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
+ gf_rdma_device_t **device = (gf_rdma_device_t **)arg1;
+ struct iobuf_arena *iobuf_arena = arg2;
+ int count = 0, i = 0;
+
+ iobuf_pool = iobuf_arena->iobuf_pool;
+ count = iobuf_pool->rdma_device_count;
+ for (i = 0; i < count; i++) {
+ new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
+ gf_common_mt_rdma_arena_mr);
+ if (new == NULL) {
+ gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED,
+ "Out of "
+ "memory: registering pre allocated buffer "
+ "with rdma device failed.");
+ return -1;
+ }
+ INIT_LIST_HEAD(&new->list);
+ new->iobuf_arena = iobuf_arena;
+
+ mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base,
+ iobuf_arena->arena_size,
+ IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE);
+ if (!mr)
+ gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED,
+ "allocation of mr "
+ "failed");
+
+ new->mr = mr;
+ pthread_mutex_lock(&device[i]->all_mr_lock);
+ {
+ list_add(&new->list, &device[i]->all_mr);
+ }
+ pthread_mutex_unlock(&device[i]->all_mr_lock);
+ new = NULL;
+ }
+
+ return 0;
+}
+
+static void
+gf_rdma_register_iobuf_pool(gf_rdma_device_t *device,
+ struct iobuf_pool *iobuf_pool)
+{
+ struct iobuf_arena *tmp = NULL;
+ struct iobuf_arena *dummy = NULL;
+ struct ibv_mr *mr = NULL;
+ gf_rdma_arena_mr *new = NULL;
+
+ if (!list_empty(&iobuf_pool->all_arenas)) {
+ list_for_each_entry_safe(tmp, dummy, &iobuf_pool->all_arenas, all_list)
+ {
+ new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
+ gf_common_mt_rdma_arena_mr);
+ if (new == NULL) {
+ gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED,
+ "Out of "
+ "memory: registering pre allocated "
+ "buffer with rdma device failed.");
+ return;
+ }
+ INIT_LIST_HEAD(&new->list);
+ new->iobuf_arena = tmp;
+
+ mr = ibv_reg_mr(device->pd, tmp->mem_base, tmp->arena_size,
+ IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE);
+ if (!mr) {
+ gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED,
+ "failed"
+ " to pre register buffers with rdma "
+ "devices.");
+ }
+ new->mr = mr;
+ pthread_mutex_lock(&device->all_mr_lock);
+ {
+ list_add(&new->list, &device->all_mr);
+ }
+ pthread_mutex_unlock(&device->all_mr_lock);
+
+ new = NULL;
+ }
+ }
+
+ return;
+}
+
+static void
+gf_rdma_register_iobuf_pool_with_device(gf_rdma_device_t *device,
+ struct iobuf_pool *iobuf_pool)
+{
+ while (device) {
+ gf_rdma_register_iobuf_pool(device, iobuf_pool);
+ device = device->next;
+ }
+}
+
+static struct ibv_mr *
+gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size)
+{
+ gf_rdma_arena_mr *tmp = NULL;
+ gf_rdma_arena_mr *dummy = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ device = priv->device;
+
+ pthread_mutex_lock(&device->all_mr_lock);
+ {
+ if (!list_empty(&device->all_mr)) {
+ list_for_each_entry_safe(tmp, dummy, &device->all_mr, list)
+ {
+ if (tmp->iobuf_arena->mem_base <= ptr &&
+ ptr < tmp->iobuf_arena->mem_base +
+ tmp->iobuf_arena->arena_size) {
+ pthread_mutex_unlock(&device->all_mr_lock);
+ return tmp->mr;
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock(&device->all_mr_lock);
+
+ return NULL;
+}
+
static int32_t
gf_rdma_create_posts(rpc_transport_t *this)
{
@@ -492,11 +693,13 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx,
int32_t i = 0;
gf_rdma_device_t *trav = NULL, *device = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
priv = this->private;
options = &priv->options;
ctx = this->ctx;
rdma_ctx = ctx->ib;
+ iobuf_pool = ctx->iobuf_pool;
trav = rdma_ctx->device;
@@ -517,6 +720,8 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx,
trav->next = rdma_ctx->device;
rdma_ctx->device = trav;
+ iobuf_pool->device[iobuf_pool->rdma_device_count] = trav;
+ iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr;
trav->request_ctx_pool = mem_pool_new(gf_rdma_request_context_t,
GF_RDMA_POOL_SIZE);
if (trav->request_ctx_pool == NULL) {
@@ -594,6 +799,7 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx,
INIT_LIST_HEAD(&trav->all_mr);
pthread_mutex_init(&trav->all_mr_lock, NULL);
+ gf_rdma_register_iobuf_pool(trav, iobuf_pool);
if (gf_rdma_create_posts(this) < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_ALOC_POST_FAILED,
@@ -1229,8 +1435,12 @@ __gf_rdma_create_read_chunks_from_vector(gf_rdma_peer_t *peer,
readch->rc_discrim = hton32(1);
readch->rc_position = hton32(*pos);
- mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
- IBV_ACCESS_REMOTE_READ);
+ mr = gf_rdma_get_pre_registred_mr(
+ peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
+ if (!mr) {
+ mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
+ IBV_ACCESS_REMOTE_READ);
+ }
if (!mr) {
gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
RDMA_MSG_MR_ALOC_FAILED,
@@ -1351,8 +1561,13 @@ __gf_rdma_create_write_chunks_from_vector(
device = priv->device;
for (i = 0; i < count; i++) {
- mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+ mr = gf_rdma_get_pre_registred_mr(
+ peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
+ if (!mr) {
+ mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+ }
+
if (!mr) {
gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
RDMA_MSG_MR_ALOC_FAILED,
@@ -2033,6 +2248,9 @@ __gf_rdma_register_local_mr_for_rdma(gf_rdma_peer_t *peer, struct iovec *vector,
* Infiniband Architecture Specification Volume 1
* (Release 1.2.1)
*/
+ ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr(
+ peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
+
if (!ctx->mr[ctx->mr_count]) {
ctx->mr[ctx->mr_count] = ibv_reg_mr(device->pd, vector[i].iov_base,
vector[i].iov_len,
@@ -4551,6 +4769,7 @@ init(rpc_transport_t *this)
{
gf_rdma_private_t *priv = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
priv = GF_CALLOC(1, sizeof(*priv), gf_common_mt_rdma_private_t);
if (!priv)
@@ -4569,6 +4788,18 @@ init(rpc_transport_t *this)
if (!rdma_ctx)
return -1;
+ pthread_mutex_lock(&rdma_ctx->lock);
+ {
+ if (this->dl_handle && (++(rdma_ctx->dlcount)) == 1) {
+ iobuf_pool = this->ctx->iobuf_pool;
+ iobuf_pool->rdma_registration = gf_rdma_register_arena;
+ iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena;
+ gf_rdma_register_iobuf_pool_with_device(rdma_ctx->device,
+ iobuf_pool);
+ }
+ }
+ pthread_mutex_unlock(&rdma_ctx->lock);
+
return 0;
}
@@ -4600,6 +4831,7 @@ fini(struct rpc_transport *this)
{
/* TODO: verify this function does graceful finish */
gf_rdma_private_t *priv = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
priv = this->private;
@@ -4618,6 +4850,17 @@ fini(struct rpc_transport *this)
if (!rdma_ctx)
return;
+ pthread_mutex_lock(&rdma_ctx->lock);
+ {
+ if (this->dl_handle && (--(rdma_ctx->dlcount)) == 0) {
+ iobuf_pool = this->ctx->iobuf_pool;
+ gf_rdma_deregister_iobuf_pool(rdma_ctx->device);
+ iobuf_pool->rdma_registration = NULL;
+ iobuf_pool->rdma_deregistration = NULL;
+ }
+ }
+ pthread_mutex_unlock(&rdma_ctx->lock);
+
return;
}
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
index 34a32070e49..403f5678ad8 100644
--- a/rpc/rpc-transport/rdma/src/rdma.h
+++ b/rpc/rpc-transport/rdma/src/rdma.h
@@ -325,6 +325,7 @@ typedef struct __gf_rdma_device gf_rdma_device_t;
struct __gf_rdma_arena_mr {
struct list_head list;
+ struct iobuf_arena *iobuf_arena;
struct ibv_mr *mr;
};