[no subject]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Librdmacm supports rdma_get_local_addr(),rdma_get_peer_addr(),rdma_get_src_port() and
rdma_get_dst_port() at userspace.
So if needed, it's easy to support them in linux kernel.

Rpcrdma and nvme use the src_addr/dst_addr directly as 
(struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr or
(struct sockaddr *)&queue->cm_id->route.addr.dst_addr.
Call helpers may be better then directly access.

I think, there is a key question for rpcrdma.
Is the port in rpcrdma connect the same meaning as tcp/udp port?
If it is, we need support the reuseport.

Thanks,
Kinglong Mee

> 
> But perhaps my recollection is stale.
> 
> 
>> Maybe someone needs the src port at rpcrdma connect, I made those patches. 
>>
>> For the knfsd and nfs client, I don't meet any problem.
>>
>> Thanks,
>> Kinglong Mee
>>>
>>>
>>>> Signed-off-by: Kinglong Mee <kinglongmee@xxxxxxxxx>
>>>> ---
>>>>  net/sunrpc/xprtrdma/transport.c |  36 ++++++++++++
>>>>  net/sunrpc/xprtrdma/verbs.c     | 100 ++++++++++++++++++++++++++++++++
>>>>  net/sunrpc/xprtrdma/xprt_rdma.h |   5 ++
>>>>  3 files changed, 141 insertions(+)
>>>>
>>>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>>>> index 9a8ce5df83ca..fee3b562932b 100644
>>>> --- a/net/sunrpc/xprtrdma/transport.c
>>>> +++ b/net/sunrpc/xprtrdma/transport.c
>>>> @@ -70,6 +70,10 @@ unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
>>>>  unsigned int xprt_rdma_memreg_strategy		= RPCRDMA_FRWR;
>>>>  int xprt_rdma_pad_optimize;
>>>>  static struct xprt_class xprt_rdma;
>>>> +static unsigned int xprt_rdma_min_resvport_limit = RPC_MIN_RESVPORT;
>>>> +static unsigned int xprt_rdma_max_resvport_limit = RPC_MAX_RESVPORT;
>>>> +unsigned int xprt_rdma_min_resvport = RPC_DEF_MIN_RESVPORT;
>>>> +unsigned int xprt_rdma_max_resvport = RPC_DEF_MAX_RESVPORT;
>>>>  
>>>>  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
>>>>  
>>>> @@ -137,6 +141,24 @@ static struct ctl_table xr_tunables_table[] = {
>>>>  		.mode		= 0644,
>>>>  		.proc_handler	= proc_dointvec,
>>>>  	},
>>>> +	{
>>>> +		.procname	= "rdma_min_resvport",
>>>> +		.data		= &xprt_rdma_min_resvport,
>>>> +		.maxlen		= sizeof(unsigned int),
>>>> +		.mode		= 0644,
>>>> +		.proc_handler	= proc_dointvec_minmax,
>>>> +		.extra1		= &xprt_rdma_min_resvport_limit,
>>>> +		.extra2		= &xprt_rdma_max_resvport_limit
>>>> +	},
>>>> +	{
>>>> +		.procname	= "rdma_max_resvport",
>>>> +		.data		= &xprt_rdma_max_resvport,
>>>> +		.maxlen		= sizeof(unsigned int),
>>>> +		.mode		= 0644,
>>>> +		.proc_handler	= proc_dointvec_minmax,
>>>> +		.extra1		= &xprt_rdma_min_resvport_limit,
>>>> +		.extra2		= &xprt_rdma_max_resvport_limit
>>>> +	},
>>>>  };
>>>>  
>>>>  #endif
>>>> @@ -346,6 +368,20 @@ xprt_setup_rdma(struct xprt_create *args)
>>>>  	xprt_rdma_format_addresses(xprt, sap);
>>>>  
>>>>  	new_xprt = rpcx_to_rdmax(xprt);
>>>> +
>>>> +	if (args->srcaddr)
>>>> +		memcpy(&new_xprt->rx_srcaddr, args->srcaddr, args->addrlen);
>>>> +	else {
>>>> +		rc = rpc_init_anyaddr(args->dstaddr->sa_family,
>>>> +					(struct sockaddr *)&new_xprt->rx_srcaddr);
>>>> +		if (rc != 0) {
>>>> +			xprt_rdma_free_addresses(xprt);
>>>> +			xprt_free(xprt);
>>>> +			module_put(THIS_MODULE);
>>>> +			return ERR_PTR(rc);
>>>> +		}
>>>> +	}
>>>> +
>>>>  	rc = rpcrdma_buffer_create(new_xprt);
>>>>  	if (rc) {
>>>>  		xprt_rdma_free_addresses(xprt);
>>>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>>>> index 63262ef0c2e3..0ce5123d799b 100644
>>>> --- a/net/sunrpc/xprtrdma/verbs.c
>>>> +++ b/net/sunrpc/xprtrdma/verbs.c
>>>> @@ -285,6 +285,98 @@ static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn)
>>>>  	xprt_force_disconnect(ep->re_xprt);
>>>>  }
>>>>  
>>>> +static int rpcrdma_get_random_port(void)
>>>> +{
>>>> +	unsigned short min = xprt_rdma_min_resvport, max = xprt_rdma_max_resvport;
>>>> +	unsigned short range;
>>>> +	unsigned short rand;
>>>> +
>>>> +	if (max < min)
>>>> +		return -EADDRINUSE;
>>>> +	range = max - min + 1;
>>>> +	rand = get_random_u32_below(range);
>>>> +	return rand + min;
>>>> +}
>>>> +
>>>> +static void rpcrdma_set_srcport(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id)
>>>> +{
>>>> +        struct sockaddr *sap = (struct sockaddr *)&id->route.addr.src_addr;
>>>> +
>>>> +	if (r_xprt->rx_srcport == 0 && r_xprt->rx_xprt.reuseport) {
>>>> +		switch (sap->sa_family) {
>>>> +		case AF_INET6:
>>>> +			r_xprt->rx_srcport = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
>>>> +			break;
>>>> +		case AF_INET:
>>>> +			r_xprt->rx_srcport = ntohs(((struct sockaddr_in *)sap)->sin_port);
>>>> +			break;
>>>> +		}
>>>> +	}
>>>> +}
>>>> +
>>>> +static int rpcrdma_get_srcport(struct rpcrdma_xprt *r_xprt)
>>>> +{
>>>> +	int port = r_xprt->rx_srcport;
>>>> +
>>>> +	if (port == 0 && r_xprt->rx_xprt.resvport)
>>>> +		port = rpcrdma_get_random_port();
>>>> +	return port;
>>>> +}
>>>> +
>>>> +static unsigned short rpcrdma_next_srcport(struct rpcrdma_xprt *r_xprt, unsigned short port)
>>>> +{
>>>> +	if (r_xprt->rx_srcport != 0)
>>>> +		r_xprt->rx_srcport = 0;
>>>> +	if (!r_xprt->rx_xprt.resvport)
>>>> +		return 0;
>>>> +	if (port <= xprt_rdma_min_resvport || port > xprt_rdma_max_resvport)
>>>> +		return xprt_rdma_max_resvport;
>>>> +	return --port;
>>>> +}
>>>> +
>>>> +static int rpcrdma_bind(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id)
>>>> +{
>>>> +	struct sockaddr_storage myaddr;
>>>> +	int err, nloop = 0;
>>>> +	int port = rpcrdma_get_srcport(r_xprt);
>>>> +	unsigned short last;
>>>> +
>>>> +	/*
>>>> +	 * If we are asking for any ephemeral port (i.e. port == 0 &&
>>>> +	 * r_xprt->rx_xprt.resvport == 0), don't bind.  Let the local
>>>> +	 * port selection happen implicitly when the socket is used
>>>> +	 * (for example at connect time).
>>>> +	 *
>>>> +	 * This ensures that we can continue to establish TCP
>>>> +	 * connections even when all local ephemeral ports are already
>>>> +	 * a part of some TCP connection.  This makes no difference
>>>> +	 * for UDP sockets, but also doesn't harm them.
>>>> +	 *
>>>> +	 * If we're asking for any reserved port (i.e. port == 0 &&
>>>> +	 * r_xprt->rx_xprt.resvport == 1) rpcrdma_get_srcport above will
>>>> +	 * ensure that port is non-zero and we will bind as needed.
>>>> +	 */
>>>> +	if (port <= 0)
>>>> +		return port;
>>>> +
>>>> +	memcpy(&myaddr, &r_xprt->rx_srcaddr, r_xprt->rx_xprt.addrlen);
>>>> +	do {
>>>> +		rpc_set_port((struct sockaddr *)&myaddr, port);
>>>> +		err = rdma_bind_addr(id, (struct sockaddr *)&myaddr);
>>>> +		if (err == 0) {
>>>> +			if (r_xprt->rx_xprt.reuseport)
>>>> +				r_xprt->rx_srcport = port;
>>>> +			break;
>>>> +		}
>>>> +		last = port;
>>>> +		port = rpcrdma_next_srcport(r_xprt, port);
>>>> +		if (port > last)
>>>> +			nloop++;
>>>> +	} while (err == -EADDRINUSE && nloop != 2);
>>>> +
>>>> +	return err;
>>>> +}
>>>> +
>>>>  static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
>>>>  					    struct rpcrdma_ep *ep)
>>>>  {
>>>> @@ -300,6 +392,12 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
>>>>  	if (IS_ERR(id))
>>>>  		return id;
>>>>  
>>>> +	rc = rpcrdma_bind(r_xprt, id);
>>>> +	if (rc) {
>>>> +		rc = -ENOTCONN;
>>>> +		goto out;
>>>> +	}
>>>> +
>>>>  	ep->re_async_rc = -ETIMEDOUT;
>>>>  	rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr,
>>>>  			       RDMA_RESOLVE_TIMEOUT);
>>>> @@ -328,6 +426,8 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
>>>>  	if (rc)
>>>>  		goto out;
>>>>  
>>>> +	rpcrdma_set_srcport(r_xprt, id);
>>>> +
>>>>  	return id;
>>>>  
>>>>  out:
>>>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>>>> index 8147d2b41494..9c7bcb541267 100644
>>>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>>>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>>>> @@ -433,6 +433,9 @@ struct rpcrdma_xprt {
>>>>  	struct delayed_work	rx_connect_worker;
>>>>  	struct rpc_timeout	rx_timeout;
>>>>  	struct rpcrdma_stats	rx_stats;
>>>> +
>>>> +	struct sockaddr_storage	rx_srcaddr;
>>>> +	unsigned short		rx_srcport;
>>>>  };
>>>>  
>>>>  #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
>>>> @@ -581,6 +584,8 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
>>>>   */
>>>>  extern unsigned int xprt_rdma_max_inline_read;
>>>>  extern unsigned int xprt_rdma_max_inline_write;
>>>> +extern unsigned int xprt_rdma_min_resvport;
>>>> +extern unsigned int xprt_rdma_max_resvport;
>>>>  void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
>>>>  void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
>>>>  void xprt_rdma_close(struct rpc_xprt *xprt);
>>>> -- 
>>>> 2.47.0
>>>>
>>>
>>
> 





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux