> On Nov 27, 2018, at 11:29 AM, bfields@xxxxxxxxxxxx wrote: > > On Tue, Nov 27, 2018 at 11:11:35AM -0500, Chuck Lever wrote: >> o Select the R_key to invalidate while the CPU cache still contains >> the received RPC Call transport header, rather than waiting until >> we're about to send the RPC Reply. >> >> o Choose Send With Invalidate if there is exactly one distinct R_key >> in the received transport header. If there's more than one, the >> client will have to perform local invalidation after it has >> already waited for remote invalidation. >> >> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> >> --- >> Hi- >> >> Please consider this NFS server-side patch for v4.21. > > OK, thanks, applying. > > (By the way, I appreciate it if patch submissions have > bfields@xxxxxxxxxxxx on the To: line, my filters handle that a little > differently than mailing list traffic.) I've been told not to include To: when the patch is being presented for review. This is a v1. If you feel it is ready to go in, great! But I purposely did not include To: Bruce because it has not had any review yet. > --b. > >> >> >> include/linux/sunrpc/svc_rdma.h | 1 >> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 63 +++++++++++++++++++++++++++++++ >> net/sunrpc/xprtrdma/svc_rdma_sendto.c | 53 ++++++-------------------- >> 3 files changed, 77 insertions(+), 40 deletions(-) >> >> diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h >> index e6e2691..7e22681 100644 >> --- a/include/linux/sunrpc/svc_rdma.h >> +++ b/include/linux/sunrpc/svc_rdma.h >> @@ -135,6 +135,7 @@ struct svc_rdma_recv_ctxt { >> u32 rc_byte_len; >> unsigned int rc_page_count; >> unsigned int rc_hdr_count; >> + u32 rc_inv_rkey; >> struct page *rc_pages[RPCSVC_MAXPAGES]; >> }; >> >> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c >> index b24d5b8..828b149 100644 >> --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c >> +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c >> @@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) >> return p; >> } >> >> +/* RPC-over-RDMA Version One private extension: Remote Invalidation. >> + * Responder's choice: requester signals it can handle Send With >> + * Invalidate, and responder chooses one R_key to invalidate. >> + * >> + * If there is exactly one distinct R_key in the received transport >> + * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero. >> + * >> + * Perform this operation while the received transport header is >> + * still in the CPU cache. >> + */ >> +static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, >> + struct svc_rdma_recv_ctxt *ctxt) >> +{ >> + __be32 inv_rkey, *p; >> + u32 i, segcount; >> + >> + ctxt->rc_inv_rkey = 0; >> + >> + if (!rdma->sc_snd_w_inv) >> + return; >> + >> + inv_rkey = xdr_zero; >> + p = ctxt->rc_recv_buf; >> + p += rpcrdma_fixed_maxsz; >> + >> + /* Read list */ >> + while (*p++ != xdr_zero) { >> + p++; /* position */ >> + if (inv_rkey == xdr_zero) >> + inv_rkey = *p; >> + else if (inv_rkey != *p) >> + return; >> + p += 4; >> + } >> + >> + /* Write list */ >> + while (*p++ != xdr_zero) { >> + segcount = be32_to_cpup(p++); >> + for (i = 0; i < segcount; i++) { >> + if (inv_rkey == xdr_zero) >> + inv_rkey = *p; >> + else if (inv_rkey != *p) >> + return; >> + p += 4; >> + } >> + } >> + >> + /* Reply chunk */ >> + if (*p++ != xdr_zero) { >> + segcount = be32_to_cpup(p++); >> + for (i = 0; i < segcount; i++) { >> + if (inv_rkey == xdr_zero) >> + inv_rkey = *p; >> + else if (inv_rkey != *p) >> + return; >> + p += 4; >> + } >> + } >> + >> + ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); >> +} >> + >> /* On entry, xdr->head[0].iov_base points to first byte in the >> * RPC-over-RDMA header. >> * >> @@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) >> svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); >> return ret; >> } >> + svc_rdma_get_inv_rkey(rdma_xprt, ctxt); >> >> p += rpcrdma_fixed_maxsz; >> if (*p != xdr_zero) >> diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c >> index 8602a5f..d48bc6d 100644 >> --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c >> +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c >> @@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp, >> *reply = NULL; >> } >> >> -/* RPC-over-RDMA Version One private extension: Remote Invalidation. >> - * Responder's choice: requester signals it can handle Send With >> - * Invalidate, and responder chooses one rkey to invalidate. >> - * >> - * Find a candidate rkey to invalidate when sending a reply. Picks the >> - * first R_key it finds in the chunk lists. >> - * >> - * Returns zero if RPC's chunk lists are empty. >> - */ >> -static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp, >> - __be32 *wr_lst, __be32 *rp_ch) >> -{ >> - __be32 *p; >> - >> - p = rdma_argp + rpcrdma_fixed_maxsz; >> - if (*p != xdr_zero) >> - p += 2; >> - else if (wr_lst && be32_to_cpup(wr_lst + 1)) >> - p = wr_lst + 2; >> - else if (rp_ch && be32_to_cpup(rp_ch + 1)) >> - p = rp_ch + 2; >> - else >> - return 0; >> - return be32_to_cpup(p); >> -} >> - >> static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, >> struct svc_rdma_send_ctxt *ctxt, >> struct page *page, >> @@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, >> * >> * RDMA Send is the last step of transmitting an RPC reply. Pages >> * involved in the earlier RDMA Writes are here transferred out >> - * of the rqstp and into the ctxt's page array. These pages are >> + * of the rqstp and into the sctxt's page array. These pages are >> * DMA unmapped by each Write completion, but the subsequent Send >> * completion finally releases these pages. >> * >> @@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, >> * - The Reply's transport header will never be larger than a page. >> */ >> static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, >> - struct svc_rdma_send_ctxt *ctxt, >> - __be32 *rdma_argp, >> + struct svc_rdma_send_ctxt *sctxt, >> + struct svc_rdma_recv_ctxt *rctxt, >> struct svc_rqst *rqstp, >> __be32 *wr_lst, __be32 *rp_ch) >> { >> int ret; >> >> if (!rp_ch) { >> - ret = svc_rdma_map_reply_msg(rdma, ctxt, >> + ret = svc_rdma_map_reply_msg(rdma, sctxt, >> &rqstp->rq_res, wr_lst); >> if (ret < 0) >> return ret; >> } >> >> - svc_rdma_save_io_pages(rqstp, ctxt); >> + svc_rdma_save_io_pages(rqstp, sctxt); >> >> - ctxt->sc_send_wr.opcode = IB_WR_SEND; >> - if (rdma->sc_snd_w_inv) { >> - ctxt->sc_send_wr.ex.invalidate_rkey = >> - svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); >> - if (ctxt->sc_send_wr.ex.invalidate_rkey) >> - ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; >> + if (rctxt->rc_inv_rkey) { >> + sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; >> + sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; >> + } else { >> + sctxt->sc_send_wr.opcode = IB_WR_SEND; >> } >> dprintk("svcrdma: posting Send WR with %u sge(s)\n", >> - ctxt->sc_send_wr.num_sge); >> - return svc_rdma_send(rdma, &ctxt->sc_send_wr); >> + sctxt->sc_send_wr.num_sge); >> + return svc_rdma_send(rdma, &sctxt->sc_send_wr); >> } >> >> /* Given the client-provided Write and Reply chunks, the server was not >> @@ -809,7 +782,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) >> } >> >> svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); >> - ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp, >> + ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp, >> wr_lst, rp_ch); >> if (ret < 0) >> goto err1; -- Chuck Lever