summaryrefslogtreecommitdiffstats
path: root/rpc/rpc-transport/rdma/src/rdma.c
diff options
context:
space:
mode:
authorPoornima G <pgurusid@redhat.com>2018-11-21 12:09:39 +0530
committerAmar Tumballi <amarts@redhat.com>2018-12-18 09:35:24 +0000
commitb87c397091bac6a4a6dec4e45a7671fad4a11770 (patch)
tree6f7eeff5be2ae69af0eba03add10103091639a6c /rpc/rpc-transport/rdma/src/rdma.c
parentd50f22e6ae410fdcde573b6015b97dc1573bbb7e (diff)
iobuf: Get rid of pre allocated iobuf_pool and use per thread mem pool
The current implementation of iobuf_pool has two problems: - prealloc of 12.5MB memory, this limits the scale factor of the gluster processes due to RAM requirements - lock contention, as the current implementation has one global iobuf_pool lock. Credits for debugging and addressing the same goes to Krutika Dhananjay <kdhananj@redhat.com>. Issue: #410 Hence changing the iobuf implementation to use per thread mem pool. This may theoritically appear to cause perf dip as there is no preallocation. But per thread mem pool will not have significant perf impact as the last allocated memory is kept alive for subsequent allocs, for some time. The worst case would be if iobufs requested are of random sizes each time. The best case is, if we get iobuf request of the same size. From the perf tests, this patch did not seem to cause any perf decrease. Note that, with this patch, the rdma performance is going to degrade drastically. In one of the previous patchsets we had fixes to not degrade rdma perf, but rdma is not supported and also not tested [1]. Hence the decision was to not have code in rdma that is not tested and not supported. [1] https://lists.gluster.org/pipermail/gluster-users.old/2018-July/034400.html Updates: #325 Change-Id: Ic2ef3bd498f9250dea25f25ba0c01fde19584b27 Signed-off-by: Poornima G <pgurusid@redhat.com>
Diffstat (limited to 'rpc/rpc-transport/rdma/src/rdma.c')
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c251
1 files changed, 4 insertions, 247 deletions
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
index 45605c8..ac984c1 100644
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -344,207 +344,6 @@ gf_rdma_post_recv(struct ibv_srq *srq, gf_rdma_post_t *post)
return ibv_post_srq_recv(srq, &wr, &bad_wr);
}
-static void
-gf_rdma_deregister_iobuf_pool(gf_rdma_device_t *device)
-{
- gf_rdma_arena_mr *arena_mr = NULL;
- gf_rdma_arena_mr *tmp = NULL;
-
- while (device) {
- pthread_mutex_lock(&device->all_mr_lock);
- {
- if (!list_empty(&device->all_mr)) {
- list_for_each_entry_safe(arena_mr, tmp, &device->all_mr, list)
- {
- if (ibv_dereg_mr(arena_mr->mr)) {
- gf_msg("rdma", GF_LOG_WARNING, 0,
- RDMA_MSG_DEREGISTER_ARENA_FAILED,
- "deallocation of memory region "
- "failed");
- pthread_mutex_unlock(&device->all_mr_lock);
- return;
- }
- list_del(&arena_mr->list);
- GF_FREE(arena_mr);
- }
- }
- }
- pthread_mutex_unlock(&device->all_mr_lock);
-
- device = device->next;
- }
-}
-
-int
-gf_rdma_deregister_arena(struct list_head **mr_list,
- struct iobuf_arena *iobuf_arena)
-{
- gf_rdma_arena_mr *tmp = NULL;
- gf_rdma_arena_mr *dummy = NULL;
- gf_rdma_device_t *device = NULL;
- int count = 0, i = 0;
-
- count = iobuf_arena->iobuf_pool->rdma_device_count;
- for (i = 0; i < count; i++) {
- device = iobuf_arena->iobuf_pool->device[i];
- pthread_mutex_lock(&device->all_mr_lock);
- {
- list_for_each_entry_safe(tmp, dummy, mr_list[i], list)
- {
- if (tmp->iobuf_arena == iobuf_arena) {
- if (ibv_dereg_mr(tmp->mr)) {
- gf_msg("rdma", GF_LOG_WARNING, 0,
- RDMA_MSG_DEREGISTER_ARENA_FAILED,
- "deallocation of memory region "
- "failed");
- pthread_mutex_unlock(&device->all_mr_lock);
- return -1;
- }
- list_del(&tmp->list);
- GF_FREE(tmp);
- break;
- }
- }
- }
- pthread_mutex_unlock(&device->all_mr_lock);
- }
-
- return 0;
-}
-
-int
-gf_rdma_register_arena(void **arg1, void *arg2)
-{
- struct ibv_mr *mr = NULL;
- gf_rdma_arena_mr *new = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
- gf_rdma_device_t **device = (gf_rdma_device_t **)arg1;
- struct iobuf_arena *iobuf_arena = arg2;
- int count = 0, i = 0;
-
- iobuf_pool = iobuf_arena->iobuf_pool;
- count = iobuf_pool->rdma_device_count;
- for (i = 0; i < count; i++) {
- new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
- gf_common_mt_rdma_arena_mr);
- if (new == NULL) {
- gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED,
- "Out of "
- "memory: registering pre allocated buffer "
- "with rdma device failed.");
- return -1;
- }
- INIT_LIST_HEAD(&new->list);
- new->iobuf_arena = iobuf_arena;
-
- mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base,
- iobuf_arena->arena_size,
- IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE);
- if (!mr)
- gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED,
- "allocation of mr "
- "failed");
-
- new->mr = mr;
- pthread_mutex_lock(&device[i]->all_mr_lock);
- {
- list_add(&new->list, &device[i]->all_mr);
- }
- pthread_mutex_unlock(&device[i]->all_mr_lock);
- new = NULL;
- }
-
- return 0;
-}
-
-static void
-gf_rdma_register_iobuf_pool(gf_rdma_device_t *device,
- struct iobuf_pool *iobuf_pool)
-{
- struct iobuf_arena *tmp = NULL;
- struct iobuf_arena *dummy = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_arena_mr *new = NULL;
-
- if (!list_empty(&iobuf_pool->all_arenas)) {
- list_for_each_entry_safe(tmp, dummy, &iobuf_pool->all_arenas, all_list)
- {
- new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
- gf_common_mt_rdma_arena_mr);
- if (new == NULL) {
- gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED,
- "Out of "
- "memory: registering pre allocated "
- "buffer with rdma device failed.");
- return;
- }
- INIT_LIST_HEAD(&new->list);
- new->iobuf_arena = tmp;
-
- mr = ibv_reg_mr(device->pd, tmp->mem_base, tmp->arena_size,
- IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE);
- if (!mr) {
- gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED,
- "failed"
- " to pre register buffers with rdma "
- "devices.");
- }
- new->mr = mr;
- pthread_mutex_lock(&device->all_mr_lock);
- {
- list_add(&new->list, &device->all_mr);
- }
- pthread_mutex_unlock(&device->all_mr_lock);
-
- new = NULL;
- }
- }
-
- return;
-}
-
-static void
-gf_rdma_register_iobuf_pool_with_device(gf_rdma_device_t *device,
- struct iobuf_pool *iobuf_pool)
-{
- while (device) {
- gf_rdma_register_iobuf_pool(device, iobuf_pool);
- device = device->next;
- }
-}
-
-static struct ibv_mr *
-gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size)
-{
- gf_rdma_arena_mr *tmp = NULL;
- gf_rdma_arena_mr *dummy = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- pthread_mutex_lock(&device->all_mr_lock);
- {
- if (!list_empty(&device->all_mr)) {
- list_for_each_entry_safe(tmp, dummy, &device->all_mr, list)
- {
- if (tmp->iobuf_arena->mem_base <= ptr &&
- ptr < tmp->iobuf_arena->mem_base +
- tmp->iobuf_arena->arena_size) {
- pthread_mutex_unlock(&device->all_mr_lock);
- return tmp->mr;
- }
- }
- }
- }
- pthread_mutex_unlock(&device->all_mr_lock);
-
- return NULL;
-}
-
static int32_t
gf_rdma_create_posts(rpc_transport_t *this)
{
@@ -693,13 +492,11 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx,
int32_t i = 0;
gf_rdma_device_t *trav = NULL, *device = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
priv = this->private;
options = &priv->options;
ctx = this->ctx;
rdma_ctx = ctx->ib;
- iobuf_pool = ctx->iobuf_pool;
trav = rdma_ctx->device;
@@ -720,8 +517,6 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx,
trav->next = rdma_ctx->device;
rdma_ctx->device = trav;
- iobuf_pool->device[iobuf_pool->rdma_device_count] = trav;
- iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr;
trav->request_ctx_pool = mem_pool_new(gf_rdma_request_context_t,
GF_RDMA_POOL_SIZE);
if (trav->request_ctx_pool == NULL) {
@@ -799,7 +594,6 @@ gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx,
INIT_LIST_HEAD(&trav->all_mr);
pthread_mutex_init(&trav->all_mr_lock, NULL);
- gf_rdma_register_iobuf_pool(trav, iobuf_pool);
if (gf_rdma_create_posts(this) < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_ALOC_POST_FAILED,
@@ -1435,12 +1229,8 @@ __gf_rdma_create_read_chunks_from_vector(gf_rdma_peer_t *peer,
readch->rc_discrim = hton32(1);
readch->rc_position = hton32(*pos);
- mr = gf_rdma_get_pre_registred_mr(
- peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
- if (!mr) {
- mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
- IBV_ACCESS_REMOTE_READ);
- }
+ mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
+ IBV_ACCESS_REMOTE_READ);
if (!mr) {
gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
RDMA_MSG_MR_ALOC_FAILED,
@@ -1561,13 +1351,8 @@ __gf_rdma_create_write_chunks_from_vector(
device = priv->device;
for (i = 0; i < count; i++) {
- mr = gf_rdma_get_pre_registred_mr(
- peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
- if (!mr) {
- mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
- }
-
+ mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
if (!mr) {
gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
RDMA_MSG_MR_ALOC_FAILED,
@@ -2248,9 +2033,6 @@ __gf_rdma_register_local_mr_for_rdma(gf_rdma_peer_t *peer, struct iovec *vector,
* Infiniband Architecture Specification Volume 1
* (Release 1.2.1)
*/
- ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr(
- peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
-
if (!ctx->mr[ctx->mr_count]) {
ctx->mr[ctx->mr_count] = ibv_reg_mr(device->pd, vector[i].iov_base,
vector[i].iov_len,
@@ -4764,7 +4546,6 @@ init(rpc_transport_t *this)
{
gf_rdma_private_t *priv = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
priv = GF_CALLOC(1, sizeof(*priv), gf_common_mt_rdma_private_t);
if (!priv)
@@ -4783,18 +4564,6 @@ init(rpc_transport_t *this)
if (!rdma_ctx)
return -1;
- pthread_mutex_lock(&rdma_ctx->lock);
- {
- if (this->dl_handle && (++(rdma_ctx->dlcount)) == 1) {
- iobuf_pool = this->ctx->iobuf_pool;
- iobuf_pool->rdma_registration = gf_rdma_register_arena;
- iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena;
- gf_rdma_register_iobuf_pool_with_device(rdma_ctx->device,
- iobuf_pool);
- }
- }
- pthread_mutex_unlock(&rdma_ctx->lock);
-
return 0;
}
@@ -4826,7 +4595,6 @@ fini(struct rpc_transport *this)
{
/* TODO: verify this function does graceful finish */
gf_rdma_private_t *priv = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
gf_rdma_ctx_t *rdma_ctx = NULL;
priv = this->private;
@@ -4845,17 +4613,6 @@ fini(struct rpc_transport *this)
if (!rdma_ctx)
return;
- pthread_mutex_lock(&rdma_ctx->lock);
- {
- if (this->dl_handle && (--(rdma_ctx->dlcount)) == 0) {
- iobuf_pool = this->ctx->iobuf_pool;
- gf_rdma_deregister_iobuf_pool(rdma_ctx->device);
- iobuf_pool->rdma_registration = NULL;
- iobuf_pool->rdma_deregistration = NULL;
- }
- }
- pthread_mutex_unlock(&rdma_ctx->lock);
-
return;
}