From: Chuck Lever <chuck.lever@xxxxxxxxxx> I've noticed that using the ib-comp-wq workqueue delays Send Completions anywhere between 5us and 3 or more milliseconds. For RDMA Write and Send completions, this is not a terribly significant issue, since these just release resources. They do not contribute to RPC round-trip time. However, for RDMA Read completions, it delays the start of NFS WRITE operations, adding round-trip latency. For small to moderate NFS WRITEs, using soft IRQ completion means up to 5us better latency per NFS WRITE -- this is a significant portion of average RTT for small NFS WRITEs, which is 40-75us. Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 4 ++-- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 6 +++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index e460e25a1d6d..ada164c027bc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -56,9 +56,9 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) struct svc_rdma_rw_ctxt *ctxt; struct llist_node *node; - spin_lock(&rdma->sc_rw_ctxt_lock); + spin_lock_bh(&rdma->sc_rw_ctxt_lock); node = llist_del_first(&rdma->sc_rw_ctxts); - spin_unlock(&rdma->sc_rw_ctxt_lock); + spin_unlock_bh(&rdma->sc_rw_ctxt_lock); if (node) { ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); } else { diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index e27345af6289..49a9f409bc8e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -198,12 +198,13 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) struct svc_rdma_send_ctxt *ctxt; struct llist_node *node; - spin_lock(&rdma->sc_send_lock); + spin_lock_bh(&rdma->sc_send_lock); node = llist_del_first(&rdma->sc_send_ctxts); + spin_unlock_bh(&rdma->sc_send_lock); if (!node) goto out_empty; + ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node); - spin_unlock(&rdma->sc_send_lock); out: rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); @@ -216,7 +217,6 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) return ctxt; out_empty: - spin_unlock(&rdma->sc_send_lock); ctxt = svc_rdma_send_ctxt_alloc(rdma); if (!ctxt) return NULL; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 7bd50efeeb4e..8de32927cd7d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -430,7 +430,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) goto errout; } newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, - IB_POLL_WORKQUEUE); + IB_POLL_SOFTIRQ); if (IS_ERR(newxprt->sc_sq_cq)) goto errout; newxprt->sc_rq_cq = ib_alloc_cq_any(dev, newxprt, rq_depth,