On Tue, Nov 27, 2018 at 11:11:35AM -0500, Chuck Lever wrote: > o Select the R_key to invalidate while the CPU cache still contains > the received RPC Call transport header, rather than waiting until > we're about to send the RPC Reply. > > o Choose Send With Invalidate if there is exactly one distinct R_key > in the received transport header. If there's more than one, the > client will have to perform local invalidation after it has > already waited for remote invalidation. > > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > --- > Hi- > > Please consider this NFS server-side patch for v4.21. OK, thanks, applying. (By the way, I appreciate it if patch submissions have bfields@xxxxxxxxxxxx on the To: line, my filters handle that a little differently than mailing list traffic.) --b. > > > include/linux/sunrpc/svc_rdma.h | 1 > net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 63 +++++++++++++++++++++++++++++++ > net/sunrpc/xprtrdma/svc_rdma_sendto.c | 53 ++++++-------------------- > 3 files changed, 77 insertions(+), 40 deletions(-) > > diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h > index e6e2691..7e22681 100644 > --- a/include/linux/sunrpc/svc_rdma.h > +++ b/include/linux/sunrpc/svc_rdma.h > @@ -135,6 +135,7 @@ struct svc_rdma_recv_ctxt { > u32 rc_byte_len; > unsigned int rc_page_count; > unsigned int rc_hdr_count; > + u32 rc_inv_rkey; > struct page *rc_pages[RPCSVC_MAXPAGES]; > }; > > diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c > index b24d5b8..828b149 100644 > --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c > +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c > @@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) > return p; > } > > +/* RPC-over-RDMA Version One private extension: Remote Invalidation. > + * Responder's choice: requester signals it can handle Send With > + * Invalidate, and responder chooses one R_key to invalidate. > + * > + * If there is exactly one distinct R_key in the received transport > + * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero. > + * > + * Perform this operation while the received transport header is > + * still in the CPU cache. > + */ > +static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, > + struct svc_rdma_recv_ctxt *ctxt) > +{ > + __be32 inv_rkey, *p; > + u32 i, segcount; > + > + ctxt->rc_inv_rkey = 0; > + > + if (!rdma->sc_snd_w_inv) > + return; > + > + inv_rkey = xdr_zero; > + p = ctxt->rc_recv_buf; > + p += rpcrdma_fixed_maxsz; > + > + /* Read list */ > + while (*p++ != xdr_zero) { > + p++; /* position */ > + if (inv_rkey == xdr_zero) > + inv_rkey = *p; > + else if (inv_rkey != *p) > + return; > + p += 4; > + } > + > + /* Write list */ > + while (*p++ != xdr_zero) { > + segcount = be32_to_cpup(p++); > + for (i = 0; i < segcount; i++) { > + if (inv_rkey == xdr_zero) > + inv_rkey = *p; > + else if (inv_rkey != *p) > + return; > + p += 4; > + } > + } > + > + /* Reply chunk */ > + if (*p++ != xdr_zero) { > + segcount = be32_to_cpup(p++); > + for (i = 0; i < segcount; i++) { > + if (inv_rkey == xdr_zero) > + inv_rkey = *p; > + else if (inv_rkey != *p) > + return; > + p += 4; > + } > + } > + > + ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); > +} > + > /* On entry, xdr->head[0].iov_base points to first byte in the > * RPC-over-RDMA header. > * > @@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) > svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); > return ret; > } > + svc_rdma_get_inv_rkey(rdma_xprt, ctxt); > > p += rpcrdma_fixed_maxsz; > if (*p != xdr_zero) > diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c > index 8602a5f..d48bc6d 100644 > --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c > +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c > @@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp, > *reply = NULL; > } > > -/* RPC-over-RDMA Version One private extension: Remote Invalidation. > - * Responder's choice: requester signals it can handle Send With > - * Invalidate, and responder chooses one rkey to invalidate. > - * > - * Find a candidate rkey to invalidate when sending a reply. Picks the > - * first R_key it finds in the chunk lists. > - * > - * Returns zero if RPC's chunk lists are empty. > - */ > -static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp, > - __be32 *wr_lst, __be32 *rp_ch) > -{ > - __be32 *p; > - > - p = rdma_argp + rpcrdma_fixed_maxsz; > - if (*p != xdr_zero) > - p += 2; > - else if (wr_lst && be32_to_cpup(wr_lst + 1)) > - p = wr_lst + 2; > - else if (rp_ch && be32_to_cpup(rp_ch + 1)) > - p = rp_ch + 2; > - else > - return 0; > - return be32_to_cpup(p); > -} > - > static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, > struct svc_rdma_send_ctxt *ctxt, > struct page *page, > @@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, > * > * RDMA Send is the last step of transmitting an RPC reply. Pages > * involved in the earlier RDMA Writes are here transferred out > - * of the rqstp and into the ctxt's page array. These pages are > + * of the rqstp and into the sctxt's page array. These pages are > * DMA unmapped by each Write completion, but the subsequent Send > * completion finally releases these pages. > * > @@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, > * - The Reply's transport header will never be larger than a page. > */ > static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, > - struct svc_rdma_send_ctxt *ctxt, > - __be32 *rdma_argp, > + struct svc_rdma_send_ctxt *sctxt, > + struct svc_rdma_recv_ctxt *rctxt, > struct svc_rqst *rqstp, > __be32 *wr_lst, __be32 *rp_ch) > { > int ret; > > if (!rp_ch) { > - ret = svc_rdma_map_reply_msg(rdma, ctxt, > + ret = svc_rdma_map_reply_msg(rdma, sctxt, > &rqstp->rq_res, wr_lst); > if (ret < 0) > return ret; > } > > - svc_rdma_save_io_pages(rqstp, ctxt); > + svc_rdma_save_io_pages(rqstp, sctxt); > > - ctxt->sc_send_wr.opcode = IB_WR_SEND; > - if (rdma->sc_snd_w_inv) { > - ctxt->sc_send_wr.ex.invalidate_rkey = > - svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); > - if (ctxt->sc_send_wr.ex.invalidate_rkey) > - ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; > + if (rctxt->rc_inv_rkey) { > + sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; > + sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; > + } else { > + sctxt->sc_send_wr.opcode = IB_WR_SEND; > } > dprintk("svcrdma: posting Send WR with %u sge(s)\n", > - ctxt->sc_send_wr.num_sge); > - return svc_rdma_send(rdma, &ctxt->sc_send_wr); > + sctxt->sc_send_wr.num_sge); > + return svc_rdma_send(rdma, &sctxt->sc_send_wr); > } > > /* Given the client-provided Write and Reply chunks, the server was not > @@ -809,7 +782,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) > } > > svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); > - ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp, > + ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp, > wr_lst, rp_ch); > if (ret < 0) > goto err1;