[RFC PATCH 4/5] SUNRPC: support resvport and reuseport for rpcrdma

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



NFSd's DRC key contains peer port, but rpcrdma does't support port resue now.
This patch supports resvport and resueport as tcp/udp for rpcrdma.
 
Signed-off-by: Kinglong Mee <kinglongmee@xxxxxxxxx>
---
 net/sunrpc/xprtrdma/transport.c |  36 ++++++++++++
 net/sunrpc/xprtrdma/verbs.c     | 100 ++++++++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/xprt_rdma.h |   5 ++
 3 files changed, 141 insertions(+)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9a8ce5df83ca..fee3b562932b 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -70,6 +70,10 @@ unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 unsigned int xprt_rdma_memreg_strategy		= RPCRDMA_FRWR;
 int xprt_rdma_pad_optimize;
 static struct xprt_class xprt_rdma;
+static unsigned int xprt_rdma_min_resvport_limit = RPC_MIN_RESVPORT;
+static unsigned int xprt_rdma_max_resvport_limit = RPC_MAX_RESVPORT;
+unsigned int xprt_rdma_min_resvport = RPC_DEF_MIN_RESVPORT;
+unsigned int xprt_rdma_max_resvport = RPC_DEF_MAX_RESVPORT;
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
@@ -137,6 +141,24 @@ static struct ctl_table xr_tunables_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "rdma_min_resvport",
+		.data		= &xprt_rdma_min_resvport,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xprt_rdma_min_resvport_limit,
+		.extra2		= &xprt_rdma_max_resvport_limit
+	},
+	{
+		.procname	= "rdma_max_resvport",
+		.data		= &xprt_rdma_max_resvport,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xprt_rdma_min_resvport_limit,
+		.extra2		= &xprt_rdma_max_resvport_limit
+	},
 };
 
 #endif
@@ -346,6 +368,20 @@ xprt_setup_rdma(struct xprt_create *args)
 	xprt_rdma_format_addresses(xprt, sap);
 
 	new_xprt = rpcx_to_rdmax(xprt);
+
+	if (args->srcaddr)
+		memcpy(&new_xprt->rx_srcaddr, args->srcaddr, args->addrlen);
+	else {
+		rc = rpc_init_anyaddr(args->dstaddr->sa_family,
+					(struct sockaddr *)&new_xprt->rx_srcaddr);
+		if (rc != 0) {
+			xprt_rdma_free_addresses(xprt);
+			xprt_free(xprt);
+			module_put(THIS_MODULE);
+			return ERR_PTR(rc);
+		}
+	}
+
 	rc = rpcrdma_buffer_create(new_xprt);
 	if (rc) {
 		xprt_rdma_free_addresses(xprt);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 63262ef0c2e3..0ce5123d799b 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -285,6 +285,98 @@ static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn)
 	xprt_force_disconnect(ep->re_xprt);
 }
 
+static int rpcrdma_get_random_port(void)
+{
+	unsigned short min = xprt_rdma_min_resvport, max = xprt_rdma_max_resvport;
+	unsigned short range;
+	unsigned short rand;
+
+	if (max < min)
+		return -EADDRINUSE;
+	range = max - min + 1;
+	rand = get_random_u32_below(range);
+	return rand + min;
+}
+
+static void rpcrdma_set_srcport(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id)
+{
+        struct sockaddr *sap = (struct sockaddr *)&id->route.addr.src_addr;
+
+	if (r_xprt->rx_srcport == 0 && r_xprt->rx_xprt.reuseport) {
+		switch (sap->sa_family) {
+		case AF_INET6:
+			r_xprt->rx_srcport = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+			break;
+		case AF_INET:
+			r_xprt->rx_srcport = ntohs(((struct sockaddr_in *)sap)->sin_port);
+			break;
+		}
+	}
+}
+
+static int rpcrdma_get_srcport(struct rpcrdma_xprt *r_xprt)
+{
+	int port = r_xprt->rx_srcport;
+
+	if (port == 0 && r_xprt->rx_xprt.resvport)
+		port = rpcrdma_get_random_port();
+	return port;
+}
+
+static unsigned short rpcrdma_next_srcport(struct rpcrdma_xprt *r_xprt, unsigned short port)
+{
+	if (r_xprt->rx_srcport != 0)
+		r_xprt->rx_srcport = 0;
+	if (!r_xprt->rx_xprt.resvport)
+		return 0;
+	if (port <= xprt_rdma_min_resvport || port > xprt_rdma_max_resvport)
+		return xprt_rdma_max_resvport;
+	return --port;
+}
+
+static int rpcrdma_bind(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id)
+{
+	struct sockaddr_storage myaddr;
+	int err, nloop = 0;
+	int port = rpcrdma_get_srcport(r_xprt);
+	unsigned short last;
+
+	/*
+	 * If we are asking for any ephemeral port (i.e. port == 0 &&
+	 * r_xprt->rx_xprt.resvport == 0), don't bind.  Let the local
+	 * port selection happen implicitly when the socket is used
+	 * (for example at connect time).
+	 *
+	 * This ensures that we can continue to establish TCP
+	 * connections even when all local ephemeral ports are already
+	 * a part of some TCP connection.  This makes no difference
+	 * for UDP sockets, but also doesn't harm them.
+	 *
+	 * If we're asking for any reserved port (i.e. port == 0 &&
+	 * r_xprt->rx_xprt.resvport == 1) rpcrdma_get_srcport above will
+	 * ensure that port is non-zero and we will bind as needed.
+	 */
+	if (port <= 0)
+		return port;
+
+	memcpy(&myaddr, &r_xprt->rx_srcaddr, r_xprt->rx_xprt.addrlen);
+	do {
+		rpc_set_port((struct sockaddr *)&myaddr, port);
+		err = rdma_bind_addr(id, (struct sockaddr *)&myaddr);
+		if (err == 0) {
+			if (r_xprt->rx_xprt.reuseport)
+				r_xprt->rx_srcport = port;
+			break;
+		}
+		last = port;
+		port = rpcrdma_next_srcport(r_xprt, port);
+		if (port > last)
+			nloop++;
+	} while (err == -EADDRINUSE && nloop != 2);
+
+	return err;
+}
+
 static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
 					    struct rpcrdma_ep *ep)
 {
@@ -300,6 +392,12 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
 	if (IS_ERR(id))
 		return id;
 
+	rc = rpcrdma_bind(r_xprt, id);
+	if (rc) {
+		rc = -ENOTCONN;
+		goto out;
+	}
+
 	ep->re_async_rc = -ETIMEDOUT;
 	rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr,
 			       RDMA_RESOLVE_TIMEOUT);
@@ -328,6 +426,8 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
 	if (rc)
 		goto out;
 
+	rpcrdma_set_srcport(r_xprt, id);
+
 	return id;
 
 out:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 8147d2b41494..9c7bcb541267 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -433,6 +433,9 @@ struct rpcrdma_xprt {
 	struct delayed_work	rx_connect_worker;
 	struct rpc_timeout	rx_timeout;
 	struct rpcrdma_stats	rx_stats;
+
+	struct sockaddr_storage	rx_srcaddr;
+	unsigned short		rx_srcport;
 };
 
 #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
@@ -581,6 +584,8 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
  */
 extern unsigned int xprt_rdma_max_inline_read;
 extern unsigned int xprt_rdma_max_inline_write;
+extern unsigned int xprt_rdma_min_resvport;
+extern unsigned int xprt_rdma_max_resvport;
 void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
 void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
 void xprt_rdma_close(struct rpc_xprt *xprt);
-- 
2.47.0





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux