Re: [PATCH v1] svcrdma: Optimize the logic that selects the R_key to invalidate

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On Nov 27, 2018, at 11:29 AM, bfields@xxxxxxxxxxxx wrote:
> 
> On Tue, Nov 27, 2018 at 11:11:35AM -0500, Chuck Lever wrote:
>> o Select the R_key to invalidate while the CPU cache still contains
>>  the received RPC Call transport header, rather than waiting until
>>  we're about to send the RPC Reply.
>> 
>> o Choose Send With Invalidate if there is exactly one distinct R_key
>>  in the received transport header. If there's more than one, the
>>  client will have to perform local invalidation after it has
>>  already waited for remote invalidation.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
>> ---
>> Hi-
>> 
>> Please consider this NFS server-side patch for v4.21.
> 
> OK, thanks, applying.
> 
> (By the way, I appreciate it if patch submissions have
> bfields@xxxxxxxxxxxx on the To: line, my filters handle that a little
> differently than mailing list traffic.)

I've been told not to include To: when the patch is being
presented for review. This is a v1. If you feel it is ready
to go in, great! But I purposely did not include To: Bruce
because it has not had any review yet.


> --b.
> 
>> 
>> 
>> include/linux/sunrpc/svc_rdma.h         |    1 
>> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |   63 +++++++++++++++++++++++++++++++
>> net/sunrpc/xprtrdma/svc_rdma_sendto.c   |   53 ++++++--------------------
>> 3 files changed, 77 insertions(+), 40 deletions(-)
>> 
>> diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
>> index e6e2691..7e22681 100644
>> --- a/include/linux/sunrpc/svc_rdma.h
>> +++ b/include/linux/sunrpc/svc_rdma.h
>> @@ -135,6 +135,7 @@ struct svc_rdma_recv_ctxt {
>> 	u32			rc_byte_len;
>> 	unsigned int		rc_page_count;
>> 	unsigned int		rc_hdr_count;
>> +	u32			rc_inv_rkey;
>> 	struct page		*rc_pages[RPCSVC_MAXPAGES];
>> };
>> 
>> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
>> index b24d5b8..828b149 100644
>> --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
>> +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
>> @@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
>> 	return p;
>> }
>> 
>> +/* RPC-over-RDMA Version One private extension: Remote Invalidation.
>> + * Responder's choice: requester signals it can handle Send With
>> + * Invalidate, and responder chooses one R_key to invalidate.
>> + *
>> + * If there is exactly one distinct R_key in the received transport
>> + * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
>> + *
>> + * Perform this operation while the received transport header is
>> + * still in the CPU cache.
>> + */
>> +static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
>> +				  struct svc_rdma_recv_ctxt *ctxt)
>> +{
>> +	__be32 inv_rkey, *p;
>> +	u32 i, segcount;
>> +
>> +	ctxt->rc_inv_rkey = 0;
>> +
>> +	if (!rdma->sc_snd_w_inv)
>> +		return;
>> +
>> +	inv_rkey = xdr_zero;
>> +	p = ctxt->rc_recv_buf;
>> +	p += rpcrdma_fixed_maxsz;
>> +
>> +	/* Read list */
>> +	while (*p++ != xdr_zero) {
>> +		p++;	/* position */
>> +		if (inv_rkey == xdr_zero)
>> +			inv_rkey = *p;
>> +		else if (inv_rkey != *p)
>> +			return;
>> +		p += 4;
>> +	}
>> +
>> +	/* Write list */
>> +	while (*p++ != xdr_zero) {
>> +		segcount = be32_to_cpup(p++);
>> +		for (i = 0; i < segcount; i++) {
>> +			if (inv_rkey == xdr_zero)
>> +				inv_rkey = *p;
>> +			else if (inv_rkey != *p)
>> +				return;
>> +			p += 4;
>> +		}
>> +	}
>> +
>> +	/* Reply chunk */
>> +	if (*p++ != xdr_zero) {
>> +		segcount = be32_to_cpup(p++);
>> +		for (i = 0; i < segcount; i++) {
>> +			if (inv_rkey == xdr_zero)
>> +				inv_rkey = *p;
>> +			else if (inv_rkey != *p)
>> +				return;
>> +			p += 4;
>> +		}
>> +	}
>> +
>> +	ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
>> +}
>> +
>> /* On entry, xdr->head[0].iov_base points to first byte in the
>>  * RPC-over-RDMA header.
>>  *
>> @@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
>> 		svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
>> 		return ret;
>> 	}
>> +	svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
>> 
>> 	p += rpcrdma_fixed_maxsz;
>> 	if (*p != xdr_zero)
>> diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
>> index 8602a5f..d48bc6d 100644
>> --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
>> +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
>> @@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
>> 		*reply = NULL;
>> }
>> 
>> -/* RPC-over-RDMA Version One private extension: Remote Invalidation.
>> - * Responder's choice: requester signals it can handle Send With
>> - * Invalidate, and responder chooses one rkey to invalidate.
>> - *
>> - * Find a candidate rkey to invalidate when sending a reply.  Picks the
>> - * first R_key it finds in the chunk lists.
>> - *
>> - * Returns zero if RPC's chunk lists are empty.
>> - */
>> -static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
>> -				 __be32 *wr_lst, __be32 *rp_ch)
>> -{
>> -	__be32 *p;
>> -
>> -	p = rdma_argp + rpcrdma_fixed_maxsz;
>> -	if (*p != xdr_zero)
>> -		p += 2;
>> -	else if (wr_lst && be32_to_cpup(wr_lst + 1))
>> -		p = wr_lst + 2;
>> -	else if (rp_ch && be32_to_cpup(rp_ch + 1))
>> -		p = rp_ch + 2;
>> -	else
>> -		return 0;
>> -	return be32_to_cpup(p);
>> -}
>> -
>> static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
>> 				 struct svc_rdma_send_ctxt *ctxt,
>> 				 struct page *page,
>> @@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
>>  *
>>  * RDMA Send is the last step of transmitting an RPC reply. Pages
>>  * involved in the earlier RDMA Writes are here transferred out
>> - * of the rqstp and into the ctxt's page array. These pages are
>> + * of the rqstp and into the sctxt's page array. These pages are
>>  * DMA unmapped by each Write completion, but the subsequent Send
>>  * completion finally releases these pages.
>>  *
>> @@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
>>  * - The Reply's transport header will never be larger than a page.
>>  */
>> static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
>> -				   struct svc_rdma_send_ctxt *ctxt,
>> -				   __be32 *rdma_argp,
>> +				   struct svc_rdma_send_ctxt *sctxt,
>> +				   struct svc_rdma_recv_ctxt *rctxt,
>> 				   struct svc_rqst *rqstp,
>> 				   __be32 *wr_lst, __be32 *rp_ch)
>> {
>> 	int ret;
>> 
>> 	if (!rp_ch) {
>> -		ret = svc_rdma_map_reply_msg(rdma, ctxt,
>> +		ret = svc_rdma_map_reply_msg(rdma, sctxt,
>> 					     &rqstp->rq_res, wr_lst);
>> 		if (ret < 0)
>> 			return ret;
>> 	}
>> 
>> -	svc_rdma_save_io_pages(rqstp, ctxt);
>> +	svc_rdma_save_io_pages(rqstp, sctxt);
>> 
>> -	ctxt->sc_send_wr.opcode = IB_WR_SEND;
>> -	if (rdma->sc_snd_w_inv) {
>> -		ctxt->sc_send_wr.ex.invalidate_rkey =
>> -			svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
>> -		if (ctxt->sc_send_wr.ex.invalidate_rkey)
>> -			ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
>> +	if (rctxt->rc_inv_rkey) {
>> +		sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
>> +		sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
>> +	} else {
>> +		sctxt->sc_send_wr.opcode = IB_WR_SEND;
>> 	}
>> 	dprintk("svcrdma: posting Send WR with %u sge(s)\n",
>> -		ctxt->sc_send_wr.num_sge);
>> -	return svc_rdma_send(rdma, &ctxt->sc_send_wr);
>> +		sctxt->sc_send_wr.num_sge);
>> +	return svc_rdma_send(rdma, &sctxt->sc_send_wr);
>> }
>> 
>> /* Given the client-provided Write and Reply chunks, the server was not
>> @@ -809,7 +782,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
>> 	}
>> 
>> 	svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
>> -	ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
>> +	ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp,
>> 				      wr_lst, rp_ch);
>> 	if (ret < 0)
>> 		goto err1;

--
Chuck Lever







[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux