Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
---
Hi-
Please consider this NFS server-side patch for v4.21.
include/linux/sunrpc/svc_rdma.h | 1
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 63 +++++++++++++++++++++++++++++++
net/sunrpc/xprtrdma/svc_rdma_sendto.c | 53 ++++++--------------------
3 files changed, 77 insertions(+), 40 deletions(-)
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index e6e2691..7e22681 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -135,6 +135,7 @@ struct svc_rdma_recv_ctxt {
u32 rc_byte_len;
unsigned int rc_page_count;
unsigned int rc_hdr_count;
+ u32 rc_inv_rkey;
struct page *rc_pages[RPCSVC_MAXPAGES];
};
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index b24d5b8..828b149 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
return p;
}
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one R_key to invalidate.
+ *
+ * If there is exactly one distinct R_key in the received transport
+ * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
+ *
+ * Perform this operation while the received transport header is
+ * still in the CPU cache.
+ */
+static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ __be32 inv_rkey, *p;
+ u32 i, segcount;
+
+ ctxt->rc_inv_rkey = 0;
+
+ if (!rdma->sc_snd_w_inv)
+ return;
+
+ inv_rkey = xdr_zero;
+ p = ctxt->rc_recv_buf;
+ p += rpcrdma_fixed_maxsz;
+
+ /* Read list */
+ while (*p++ != xdr_zero) {
+ p++; /* position */
+ if (inv_rkey == xdr_zero)
+ inv_rkey = *p;
+ else if (inv_rkey != *p)
+ return;
+ p += 4;
+ }
+
+ /* Write list */
+ while (*p++ != xdr_zero) {
+ segcount = be32_to_cpup(p++);
+ for (i = 0; i < segcount; i++) {
+ if (inv_rkey == xdr_zero)
+ inv_rkey = *p;
+ else if (inv_rkey != *p)
+ return;
+ p += 4;
+ }
+ }
+
+ /* Reply chunk */
+ if (*p++ != xdr_zero) {
+ segcount = be32_to_cpup(p++);
+ for (i = 0; i < segcount; i++) {
+ if (inv_rkey == xdr_zero)
+ inv_rkey = *p;
+ else if (inv_rkey != *p)
+ return;
+ p += 4;
+ }
+ }
+
+ ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
+}
+
/* On entry, xdr->head[0].iov_base points to first byte in the
* RPC-over-RDMA header.
*
@@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
}
+ svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
p += rpcrdma_fixed_maxsz;
if (*p != xdr_zero)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8602a5f..d48bc6d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
*reply = NULL;
}
-/* RPC-over-RDMA Version One private extension: Remote Invalidation.
- * Responder's choice: requester signals it can handle Send With
- * Invalidate, and responder chooses one rkey to invalidate.
- *
- * Find a candidate rkey to invalidate when sending a reply. Picks the
- * first R_key it finds in the chunk lists.
- *
- * Returns zero if RPC's chunk lists are empty.
- */
-static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
- __be32 *wr_lst, __be32 *rp_ch)
-{
- __be32 *p;
-
- p = rdma_argp + rpcrdma_fixed_maxsz;
- if (*p != xdr_zero)
- p += 2;
- else if (wr_lst && be32_to_cpup(wr_lst + 1))
- p = wr_lst + 2;
- else if (rp_ch && be32_to_cpup(rp_ch + 1))
- p = rp_ch + 2;
- else
- return 0;
- return be32_to_cpup(p);
-}
-
static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
struct page *page,
@@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
*
* RDMA Send is the last step of transmitting an RPC reply. Pages
* involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the ctxt's page array. These pages are
+ * of the rqstp and into the sctxt's page array. These pages are
* DMA unmapped by each Write completion, but the subsequent Send
* completion finally releases these pages.
*
@@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* - The Reply's transport header will never be larger than a page.
*/
static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt,
- __be32 *rdma_argp,
+ struct svc_rdma_send_ctxt *sctxt,
+ struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp,
__be32 *wr_lst, __be32 *rp_ch)
{
int ret;
if (!rp_ch) {
- ret = svc_rdma_map_reply_msg(rdma, ctxt,
+ ret = svc_rdma_map_reply_msg(rdma, sctxt,
&rqstp->rq_res, wr_lst);
if (ret < 0)
return ret;
}
- svc_rdma_save_io_pages(rqstp, ctxt);
+ svc_rdma_save_io_pages(rqstp, sctxt);
- ctxt->sc_send_wr.opcode = IB_WR_SEND;
- if (rdma->sc_snd_w_inv) {
- ctxt->sc_send_wr.ex.invalidate_rkey =
- svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
- if (ctxt->sc_send_wr.ex.invalidate_rkey)
- ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+ if (rctxt->rc_inv_rkey) {
+ sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+ sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+ } else {
+ sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
dprintk("svcrdma: posting Send WR with %u sge(s)\n",
- ctxt->sc_send_wr.num_sge);
- return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+ sctxt->sc_send_wr.num_sge);
+ return svc_rdma_send(rdma, &sctxt->sc_send_wr);
}
/* Given the client-provided Write and Reply chunks, the server was not
@@ -809,7 +782,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
}
svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
- ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
+ ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp,
wr_lst, rp_ch);
if (ret < 0)
goto err1;