On some platforms, DMA mapping part of a page is more costly than copying bytes. Indeed, not involving the I/O MMU can help the RPC/RDMA transport scale better for tiny I/Os across more RDMA devices. This is because interaction with the I/O MMU is eliminated for each of these small I/Os. Without the explicit unmapping, the NIC no longer needs to do a costly internal TLB shoot down for buffers that are just a handful of bytes. Since pull-up is now a more a frequent operation, I've introduced a trace point in the pull-up path. It can be used for debugging or user-space tools that count pull-up frequency. Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- include/trace/events/rpcrdma.h | 18 ++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 15 ++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 74b68547eefb..9238d233f8cf 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1639,6 +1639,24 @@ TRACE_EVENT(svcrdma_dma_map_rwctx, ) ); +TRACE_EVENT(svcrdma_send_pullup, + TP_PROTO( + unsigned int len + ), + + TP_ARGS(len), + + TP_STRUCT__entry( + __field(unsigned int, len) + ), + + TP_fast_assign( + __entry->len = len; + ), + + TP_printk("len=%u", __entry->len) +); + TRACE_EVENT(svcrdma_send_failed, TP_PROTO( const struct svc_rqst *rqst, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9a7317bc54c9..3669a41bf8b6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -539,6 +539,7 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, /** * svc_rdma_pull_up_needed - Determine whether to use pull-up * @rdma: controlling transport + * @sctxt: send_ctxt for the Send WR * @rctxt: Write and Reply chunks provided by client * @xdr: xdr_buf containing RPC message to transmit * @@ -547,11 +548,22 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, * %false otherwise */ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr) { int elements; + /* For small messages, copying bytes is cheaper than DMA + * mapping. + */ + if (sctxt->sc_hdrbuf.len + xdr->len < + RPCRDMA_V1_DEF_INLINE_SIZE >> 1) + return true; + + /* Check whether the xdr_buf has more elements than can + * fit in a single RDMA Send. + */ /* xdr->head */ elements = 1; @@ -634,6 +646,7 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, memcpy(dst, tailbase, taillen); sctxt->sc_sges[0].length += xdr->len; + trace_svcrdma_send_pullup(sctxt->sc_sges[0].length); return 0; } @@ -667,7 +680,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, if (rctxt && rctxt->rc_reply_chunk) return 0; - if (svc_rdma_pull_up_needed(rdma, rctxt, xdr)) + if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr)) return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr); ++sctxt->sc_cur_sge_no;