summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohammed Rafi KC <rkavunga@redhat.com>2014-12-22 15:16:43 +0530
committerRaghavendra Bhat <raghavendra@redhat.com>2015-03-27 04:33:28 -0700
commit8f12ed326db9e56e993947174b5048fca79dfc42 (patch)
tree21e5b9685339fc8969d3162e2ad56c958ce8cd91
parent692f1c6c92152fb592b0c35c0faa1610dbecaad5 (diff)
rdma: post multiple work request in a single call.
Back port of : http://review.gluster.org/9327 ibv_post-send will allow to send multiple work request in a single call posting as linked list. So if the payload count > 1, we can perform the data operation in a single call to ibv_post_send. Change-Id: Ib2e485cbbe6887919109e73e17d4fab595d5e65e BUG: 1202212 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> Reviewed-on: http://review.gluster.org/9327 Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Tested-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-on: http://review.gluster.org/9887 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
-rw-r--r--libglusterfs/src/mem-types.h1
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c120
2 files changed, 66 insertions, 55 deletions
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
index 1cce6db7501..4f566f9ec57 100644
--- a/libglusterfs/src/mem-types.h
+++ b/libglusterfs/src/mem-types.h
@@ -125,6 +125,7 @@ enum gf_common_mem_types_ {
gf_common_mt_strfd_t = 109,
gf_common_mt_strfd_data_t = 110,
gf_common_mt_regex_t = 111,
+ gf_common_mt_wr = 112,
gf_common_mt_end
};
#endif
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
index 599003ea9a7..3b50c1312b8 100644
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -3329,57 +3329,17 @@ gf_rdma_decode_header (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
int32_t
-__gf_rdma_read (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *to,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1;
- struct ibv_sge list = {0, };
- struct ibv_send_wr wr = {0, }, *bad_wr = NULL;
-
- ret = __gf_rdma_register_local_mr_for_rdma (peer, to, 1, &post->ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "registering local memory for rdma read failed");
- goto out;
- }
-
- list.addr = (unsigned long) to->iov_base;
- list.length = to->iov_len;
- list.lkey = post->ctx.mr[post->ctx.mr_count - 1]->lkey;
-
- wr.wr_id = (unsigned long) gf_rdma_post_ref (post);
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IBV_WR_RDMA_READ;
- wr.send_flags = IBV_SEND_SIGNALED;
- wr.wr.rdma.remote_addr = readch->rc_target.rs_offset;
- wr.wr.rdma.rkey = readch->rc_target.rs_handle;
-
- ret = ibv_post_send (peer->qp, &wr, &bad_wr);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma read from client "
- "(%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier,
- ret, (ret > 0) ? strerror (ret) : "");
- ret = -1;
- gf_rdma_post_unref (post);
- }
-out:
- return ret;
-}
-
-
-int32_t
gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
gf_rdma_read_chunk_t *readch)
{
- int32_t ret = -1, i = 0, count = 0;
- size_t size = 0;
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- gf_rdma_private_t *priv = NULL;
-
+ int32_t ret = -1, i = 0, count = 0;
+ size_t size = 0;
+ char *ptr = NULL;
+ struct iobuf *iobuf = NULL;
+ gf_rdma_private_t *priv = NULL;
+ struct ibv_sge *list = NULL;
+ struct ibv_send_wr *wr = NULL, *bad_wr = NULL;
+ int total_ref = 0;
priv = peer->trans->private;
for (i = 0; readch[i].rc_discrim != 0; i++) {
@@ -3394,7 +3354,7 @@ gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
}
post->ctx.gf_rdma_reads = i;
-
+ i = 0;
iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, size);
if (iobuf == NULL) {
goto out;
@@ -3424,32 +3384,82 @@ gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
goto unlock;
}
+ list = GF_CALLOC (post->ctx.gf_rdma_reads,
+ sizeof (struct ibv_sge), gf_common_mt_sge);
+ wr = GF_CALLOC (post->ctx.gf_rdma_reads,
+ sizeof (struct ibv_send_wr), gf_common_mt_wr);
for (i = 0; readch[i].rc_discrim != 0; i++) {
count = post->ctx.count++;
post->ctx.vector[count].iov_base = ptr;
post->ctx.vector[count].iov_len
= readch[i].rc_target.rs_length;
- ret = __gf_rdma_read (peer, post,
- &post->ctx.vector[count],
- &readch[i]);
+ ret = __gf_rdma_register_local_mr_for_rdma (peer,
+ &post->ctx.vector[count], 1, &post->ctx);
if (ret == -1) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma read from peer (%s) failed",
- peer->trans->peerinfo.identifier);
+ "registering local memory"
+ " for rdma read failed");
goto unlock;
}
+ list[i].addr = (unsigned long)
+ post->ctx.vector[count].iov_base;
+ list[i].length = post->ctx.vector[count].iov_len;
+ list[i].lkey =
+ post->ctx.mr[post->ctx.mr_count - 1]->lkey;
+
+ wr[i].wr_id =
+ (unsigned long) gf_rdma_post_ref (post);
+ wr[i].sg_list = &list[i];
+ wr[i].next = &wr[i+1];
+ wr[i].num_sge = 1;
+ wr[i].opcode = IBV_WR_RDMA_READ;
+ wr[i].send_flags = IBV_SEND_SIGNALED;
+ wr[i].wr.rdma.remote_addr =
+ readch->rc_target.rs_offset;
+ wr[i].wr.rdma.rkey = readch->rc_target.rs_handle;
+
ptr += readch[i].rc_target.rs_length;
+ total_ref++;
+ }
+ wr[i-1].next = NULL;
+ ret = ibv_post_send (peer->qp, wr, &bad_wr);
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma read from client "
+ "(%s) failed with ret = %d (%s)",
+ peer->trans->peerinfo.identifier,
+ ret, (ret > 0) ? strerror (ret) : "");
+
+ if (!bad_wr) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < post->ctx.gf_rdma_reads; i++) {
+ if (&wr[i] != bad_wr)
+ total_ref--;
+ else
+ break;
+ }
+
+ ret = -1;
}
- ret = 0;
}
unlock:
pthread_mutex_unlock (&priv->write_mutex);
out:
+ if (list)
+ GF_FREE (list);
+ if (wr)
+ GF_FREE (wr);
if (ret == -1) {
+ while (total_ref-- > 0)
+ gf_rdma_post_unref (post);
+
if (iobuf != NULL) {
iobuf_unref (iobuf);
}