Re: [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mar 24, 2015, at 7:27 AM, Devesh Sharma <Devesh.Sharma@xxxxxxxxxx> wrote:

>> -----Original Message-----
>> From: linux-rdma-owner@xxxxxxxxxxxxxxx [mailto:linux-rdma-
>> owner@xxxxxxxxxxxxxxx] On Behalf Of Chuck Lever
>> Sent: Saturday, March 14, 2015 2:58 AM
>> To: linux-rdma@xxxxxxxxxxxxxxx
>> Subject: [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op
>> 
>> This method is invoked when a transport instance is about to be reconnected.
>> Each Memory Region object is reset to its initial state.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
>> ---
>> net/sunrpc/xprtrdma/fmr_ops.c      |   23 ++++++++
>> net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++
>> net/sunrpc/xprtrdma/physical_ops.c |    6 ++
>> net/sunrpc/xprtrdma/verbs.c        |  103 +-----------------------------------
>> net/sunrpc/xprtrdma/xprt_rdma.h    |    1
>> 5 files changed, 78 insertions(+), 101 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 1501db0..1ccb3de 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -156,10 +156,33 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct
>> rpcrdma_req *req,
>> 		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);  }
>> 
>> +/* After a disconnect, unmap all FMRs.
>> + *
>> + * This is invoked only in the transport connect worker in order
>> + * to serialize with rpcrdma_register_fmr_external().
>> + */
>> +static void
>> +fmr_op_reset(struct rpcrdma_xprt *r_xprt) {
>> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> +	struct rpcrdma_mw *r;
>> +	LIST_HEAD(list);
>> +	int rc;
>> +
>> +	list_for_each_entry(r, &buf->rb_all, mw_all)
>> +		list_add(&r->r.fmr->list, &list);
>> +
>> +	rc = ib_unmap_fmr(&list);
>> +	if (rc)
>> +		dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
>> +			__func__, rc);
>> +}
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>> 	.ro_map				= fmr_op_map,
>> 	.ro_unmap			= fmr_op_unmap,
>> 	.ro_maxpages			= fmr_op_maxpages,
>> 	.ro_init			= fmr_op_init,
>> +	.ro_reset			= fmr_op_reset,
>> 	.ro_displayname			= "fmr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 975372c..b4ce0e5 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -81,6 +81,18 @@ out_err:
>> 	return nsegs;
>> }
>> 
>> +static void
>> +__frwr_release(struct rpcrdma_mw *r)
>> +{
>> +	int rc;
>> +
>> +	rc = ib_dereg_mr(r->r.frmr.fr_mr);
>> +	if (rc)
>> +		dprintk("RPC:       %s: ib_dereg_mr status %i\n",
>> +			__func__, rc);
>> +	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>> +}
>> +
>> /* FRWR mode conveys a list of pages per chunk segment. The
>>  * maximum length of that list is the FRWR page list depth.
>>  */
>> @@ -226,10 +238,44 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct
>> rpcrdma_req *req,
>> 		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);  }
>> 
>> +/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
>> + * an unusable state. Find FRMRs in this state and dereg / reg
>> + * each.  FRMRs that are VALID and attached to an rpcrdma_req are
>> + * also torn down.
>> + *
>> + * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
>> + *
>> + * This is invoked only in the transport connect worker in order
>> + * to serialize with rpcrdma_register_frmr_external().
>> + */
>> +static void
>> +frwr_op_reset(struct rpcrdma_xprt *r_xprt) {
>> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> +	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
>> +	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
>> +	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
>> +	struct rpcrdma_mw *r;
>> +	int rc;
>> +
>> +	list_for_each_entry(r, &buf->rb_all, mw_all) {
>> +		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
>> +			continue;
>> +
>> +		__frwr_release(r);
>> +		rc = __frwr_init(r, pd, device, depth);
>> +		if (rc)
>> +			continue;
> 
> Should we print something here e.g. "failed to allocate frmr, mount will work with less number of frmr, performance hit is expected”?

I can’t remember why we skip the FRWR in this case. I think the
transport will eventually recover it (if needed, by a second
reconnect).

>> +
>> +		r->r.frmr.fr_state = FRMR_IS_INVALID;
>> +	}
>> +}
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>> 	.ro_map				= frwr_op_map,
>> 	.ro_unmap			= frwr_op_unmap,
>> 	.ro_maxpages			= frwr_op_maxpages,
>> 	.ro_init			= frwr_op_init,
>> +	.ro_reset			= frwr_op_reset,
>> 	.ro_displayname			= "frwr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
>> b/net/sunrpc/xprtrdma/physical_ops.c
>> index ae2b0bc..0afc691 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -62,10 +62,16 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct
>> rpcrdma_req *req,
>> 		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);  }
>> 
>> +static void
>> +physical_op_reset(struct rpcrdma_xprt *r_xprt) { }
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>> 	.ro_map				= physical_op_map,
>> 	.ro_unmap			= physical_op_unmap,
>> 	.ro_maxpages			= physical_op_maxpages,
>> 	.ro_init			= physical_op_init,
>> +	.ro_reset			= physical_op_reset,
>> 	.ro_displayname			= "physical",
>> };
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
>> d7810d6..e17d91a 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -63,9 +63,6 @@
>> # define RPCDBG_FACILITY	RPCDBG_TRANS
>> #endif
>> 
>> -static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); -static void
>> rpcrdma_reset_fmrs(struct rpcrdma_ia *);
>> -
>> /*
>>  * internal functions
>>  */
>> @@ -944,21 +941,9 @@ retry:
>> 		rpcrdma_ep_disconnect(ep, ia);
>> 		rpcrdma_flush_cqs(ep);
>> 
>> -		switch (ia->ri_memreg_strategy) {
>> -		case RPCRDMA_FRMR:
>> -			rpcrdma_reset_frmrs(ia);
>> -			break;
>> -		case RPCRDMA_MTHCAFMR:
>> -			rpcrdma_reset_fmrs(ia);
>> -			break;
>> -		case RPCRDMA_ALLPHYSICAL:
>> -			break;
>> -		default:
>> -			rc = -EIO;
>> -			goto out;
>> -		}
>> -
>> 		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
>> +		ia->ri_ops->ro_reset(xprt);
>> +
>> 		id = rpcrdma_create_id(xprt, ia,
>> 				(struct sockaddr *)&xprt->rx_data.addr);
>> 		if (IS_ERR(id)) {
>> @@ -1288,90 +1273,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer
>> *buf)
>> 	kfree(buf->rb_pool);
>> }
>> 
>> -/* After a disconnect, unmap all FMRs.
>> - *
>> - * This is invoked only in the transport connect worker in order
>> - * to serialize with rpcrdma_register_fmr_external().
>> - */
>> -static void
>> -rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) -{
>> -	struct rpcrdma_xprt *r_xprt =
>> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
>> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> -	struct list_head *pos;
>> -	struct rpcrdma_mw *r;
>> -	LIST_HEAD(l);
>> -	int rc;
>> -
>> -	list_for_each(pos, &buf->rb_all) {
>> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
>> -
>> -		INIT_LIST_HEAD(&l);
>> -		list_add(&r->r.fmr->list, &l);
>> -		rc = ib_unmap_fmr(&l);
>> -		if (rc)
>> -			dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
>> -				__func__, rc);
>> -	}
>> -}
>> -
>> -/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
>> - * an unusable state. Find FRMRs in this state and dereg / reg
>> - * each.  FRMRs that are VALID and attached to an rpcrdma_req are
>> - * also torn down.
>> - *
>> - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
>> - *
>> - * This is invoked only in the transport connect worker in order
>> - * to serialize with rpcrdma_register_frmr_external().
>> - */
>> -static void
>> -rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) -{
>> -	struct rpcrdma_xprt *r_xprt =
>> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
>> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> -	struct list_head *pos;
>> -	struct rpcrdma_mw *r;
>> -	int rc;
>> -
>> -	list_for_each(pos, &buf->rb_all) {
>> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
>> -
>> -		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
>> -			continue;
>> -
>> -		rc = ib_dereg_mr(r->r.frmr.fr_mr);
>> -		if (rc)
>> -			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
>> -				__func__, rc);
>> -		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>> -
>> -		r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
>> -					ia->ri_max_frmr_depth);
>> -		if (IS_ERR(r->r.frmr.fr_mr)) {
>> -			rc = PTR_ERR(r->r.frmr.fr_mr);
>> -			dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
>> -				" failed %i\n", __func__, rc);
>> -			continue;
>> -		}
>> -		r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
>> -					ia->ri_id->device,
>> -					ia->ri_max_frmr_depth);
>> -		if (IS_ERR(r->r.frmr.fr_pgl)) {
>> -			rc = PTR_ERR(r->r.frmr.fr_pgl);
>> -			dprintk("RPC:       %s: "
>> -				"ib_alloc_fast_reg_page_list "
>> -				"failed %i\n", __func__, rc);
>> -
>> -			ib_dereg_mr(r->r.frmr.fr_mr);
>> -			continue;
>> -		}
>> -		r->r.frmr.fr_state = FRMR_IS_INVALID;
>> -	}
>> -}
>> -
>> /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
>>  * some req segments uninitialized.
>>  */
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
>> b/net/sunrpc/xprtrdma/xprt_rdma.h index 4fe3c38..cdf6763 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -342,6 +342,7 @@ struct rpcrdma_memreg_ops {
>> 				    struct rpcrdma_req *, unsigned int);
>> 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>> 	int		(*ro_init)(struct rpcrdma_xprt *);
>> +	void		(*ro_reset)(struct rpcrdma_xprt *);
>> 	const char	*ro_displayname;
>> };
>> 
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
>> of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at
>> http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux