Librdmacm supports rdma_get_local_addr(),rdma_get_peer_addr(),rdma_get_src_port() and rdma_get_dst_port() at userspace. So if needed, it's easy to support them in linux kernel. Rpcrdma and nvme use the src_addr/dst_addr directly as (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr or (struct sockaddr *)&queue->cm_id->route.addr.dst_addr. Call helpers may be better then directly access. I think, there is a key question for rpcrdma. Is the port in rpcrdma connect the same meaning as tcp/udp port? If it is, we need support the reuseport. Thanks, Kinglong Mee > > But perhaps my recollection is stale. > > >> Maybe someone needs the src port at rpcrdma connect, I made those patches. >> >> For the knfsd and nfs client, I don't meet any problem. >> >> Thanks, >> Kinglong Mee >>> >>> >>>> Signed-off-by: Kinglong Mee <kinglongmee@xxxxxxxxx> >>>> --- >>>> net/sunrpc/xprtrdma/transport.c | 36 ++++++++++++ >>>> net/sunrpc/xprtrdma/verbs.c | 100 ++++++++++++++++++++++++++++++++ >>>> net/sunrpc/xprtrdma/xprt_rdma.h | 5 ++ >>>> 3 files changed, 141 insertions(+) >>>> >>>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c >>>> index 9a8ce5df83ca..fee3b562932b 100644 >>>> --- a/net/sunrpc/xprtrdma/transport.c >>>> +++ b/net/sunrpc/xprtrdma/transport.c >>>> @@ -70,6 +70,10 @@ unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; >>>> unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; >>>> int xprt_rdma_pad_optimize; >>>> static struct xprt_class xprt_rdma; >>>> +static unsigned int xprt_rdma_min_resvport_limit = RPC_MIN_RESVPORT; >>>> +static unsigned int xprt_rdma_max_resvport_limit = RPC_MAX_RESVPORT; >>>> +unsigned int xprt_rdma_min_resvport = RPC_DEF_MIN_RESVPORT; >>>> +unsigned int xprt_rdma_max_resvport = RPC_DEF_MAX_RESVPORT; >>>> >>>> #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) >>>> >>>> @@ -137,6 +141,24 @@ static struct ctl_table xr_tunables_table[] = { >>>> .mode = 0644, >>>> .proc_handler = proc_dointvec, >>>> }, >>>> + { >>>> + .procname = "rdma_min_resvport", >>>> + .data = &xprt_rdma_min_resvport, >>>> + .maxlen = sizeof(unsigned int), >>>> + .mode = 0644, >>>> + .proc_handler = proc_dointvec_minmax, >>>> + .extra1 = &xprt_rdma_min_resvport_limit, >>>> + .extra2 = &xprt_rdma_max_resvport_limit >>>> + }, >>>> + { >>>> + .procname = "rdma_max_resvport", >>>> + .data = &xprt_rdma_max_resvport, >>>> + .maxlen = sizeof(unsigned int), >>>> + .mode = 0644, >>>> + .proc_handler = proc_dointvec_minmax, >>>> + .extra1 = &xprt_rdma_min_resvport_limit, >>>> + .extra2 = &xprt_rdma_max_resvport_limit >>>> + }, >>>> }; >>>> >>>> #endif >>>> @@ -346,6 +368,20 @@ xprt_setup_rdma(struct xprt_create *args) >>>> xprt_rdma_format_addresses(xprt, sap); >>>> >>>> new_xprt = rpcx_to_rdmax(xprt); >>>> + >>>> + if (args->srcaddr) >>>> + memcpy(&new_xprt->rx_srcaddr, args->srcaddr, args->addrlen); >>>> + else { >>>> + rc = rpc_init_anyaddr(args->dstaddr->sa_family, >>>> + (struct sockaddr *)&new_xprt->rx_srcaddr); >>>> + if (rc != 0) { >>>> + xprt_rdma_free_addresses(xprt); >>>> + xprt_free(xprt); >>>> + module_put(THIS_MODULE); >>>> + return ERR_PTR(rc); >>>> + } >>>> + } >>>> + >>>> rc = rpcrdma_buffer_create(new_xprt); >>>> if (rc) { >>>> xprt_rdma_free_addresses(xprt); >>>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c >>>> index 63262ef0c2e3..0ce5123d799b 100644 >>>> --- a/net/sunrpc/xprtrdma/verbs.c >>>> +++ b/net/sunrpc/xprtrdma/verbs.c >>>> @@ -285,6 +285,98 @@ static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn) >>>> xprt_force_disconnect(ep->re_xprt); >>>> } >>>> >>>> +static int rpcrdma_get_random_port(void) >>>> +{ >>>> + unsigned short min = xprt_rdma_min_resvport, max = xprt_rdma_max_resvport; >>>> + unsigned short range; >>>> + unsigned short rand; >>>> + >>>> + if (max < min) >>>> + return -EADDRINUSE; >>>> + range = max - min + 1; >>>> + rand = get_random_u32_below(range); >>>> + return rand + min; >>>> +} >>>> + >>>> +static void rpcrdma_set_srcport(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id) >>>> +{ >>>> + struct sockaddr *sap = (struct sockaddr *)&id->route.addr.src_addr; >>>> + >>>> + if (r_xprt->rx_srcport == 0 && r_xprt->rx_xprt.reuseport) { >>>> + switch (sap->sa_family) { >>>> + case AF_INET6: >>>> + r_xprt->rx_srcport = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); >>>> + break; >>>> + case AF_INET: >>>> + r_xprt->rx_srcport = ntohs(((struct sockaddr_in *)sap)->sin_port); >>>> + break; >>>> + } >>>> + } >>>> +} >>>> + >>>> +static int rpcrdma_get_srcport(struct rpcrdma_xprt *r_xprt) >>>> +{ >>>> + int port = r_xprt->rx_srcport; >>>> + >>>> + if (port == 0 && r_xprt->rx_xprt.resvport) >>>> + port = rpcrdma_get_random_port(); >>>> + return port; >>>> +} >>>> + >>>> +static unsigned short rpcrdma_next_srcport(struct rpcrdma_xprt *r_xprt, unsigned short port) >>>> +{ >>>> + if (r_xprt->rx_srcport != 0) >>>> + r_xprt->rx_srcport = 0; >>>> + if (!r_xprt->rx_xprt.resvport) >>>> + return 0; >>>> + if (port <= xprt_rdma_min_resvport || port > xprt_rdma_max_resvport) >>>> + return xprt_rdma_max_resvport; >>>> + return --port; >>>> +} >>>> + >>>> +static int rpcrdma_bind(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id) >>>> +{ >>>> + struct sockaddr_storage myaddr; >>>> + int err, nloop = 0; >>>> + int port = rpcrdma_get_srcport(r_xprt); >>>> + unsigned short last; >>>> + >>>> + /* >>>> + * If we are asking for any ephemeral port (i.e. port == 0 && >>>> + * r_xprt->rx_xprt.resvport == 0), don't bind. Let the local >>>> + * port selection happen implicitly when the socket is used >>>> + * (for example at connect time). >>>> + * >>>> + * This ensures that we can continue to establish TCP >>>> + * connections even when all local ephemeral ports are already >>>> + * a part of some TCP connection. This makes no difference >>>> + * for UDP sockets, but also doesn't harm them. >>>> + * >>>> + * If we're asking for any reserved port (i.e. port == 0 && >>>> + * r_xprt->rx_xprt.resvport == 1) rpcrdma_get_srcport above will >>>> + * ensure that port is non-zero and we will bind as needed. >>>> + */ >>>> + if (port <= 0) >>>> + return port; >>>> + >>>> + memcpy(&myaddr, &r_xprt->rx_srcaddr, r_xprt->rx_xprt.addrlen); >>>> + do { >>>> + rpc_set_port((struct sockaddr *)&myaddr, port); >>>> + err = rdma_bind_addr(id, (struct sockaddr *)&myaddr); >>>> + if (err == 0) { >>>> + if (r_xprt->rx_xprt.reuseport) >>>> + r_xprt->rx_srcport = port; >>>> + break; >>>> + } >>>> + last = port; >>>> + port = rpcrdma_next_srcport(r_xprt, port); >>>> + if (port > last) >>>> + nloop++; >>>> + } while (err == -EADDRINUSE && nloop != 2); >>>> + >>>> + return err; >>>> +} >>>> + >>>> static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, >>>> struct rpcrdma_ep *ep) >>>> { >>>> @@ -300,6 +392,12 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, >>>> if (IS_ERR(id)) >>>> return id; >>>> >>>> + rc = rpcrdma_bind(r_xprt, id); >>>> + if (rc) { >>>> + rc = -ENOTCONN; >>>> + goto out; >>>> + } >>>> + >>>> ep->re_async_rc = -ETIMEDOUT; >>>> rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, >>>> RDMA_RESOLVE_TIMEOUT); >>>> @@ -328,6 +426,8 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, >>>> if (rc) >>>> goto out; >>>> >>>> + rpcrdma_set_srcport(r_xprt, id); >>>> + >>>> return id; >>>> >>>> out: >>>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h >>>> index 8147d2b41494..9c7bcb541267 100644 >>>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h >>>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h >>>> @@ -433,6 +433,9 @@ struct rpcrdma_xprt { >>>> struct delayed_work rx_connect_worker; >>>> struct rpc_timeout rx_timeout; >>>> struct rpcrdma_stats rx_stats; >>>> + >>>> + struct sockaddr_storage rx_srcaddr; >>>> + unsigned short rx_srcport; >>>> }; >>>> >>>> #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) >>>> @@ -581,6 +584,8 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) >>>> */ >>>> extern unsigned int xprt_rdma_max_inline_read; >>>> extern unsigned int xprt_rdma_max_inline_write; >>>> +extern unsigned int xprt_rdma_min_resvport; >>>> +extern unsigned int xprt_rdma_max_resvport; >>>> void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); >>>> void xprt_rdma_free_addresses(struct rpc_xprt *xprt); >>>> void xprt_rdma_close(struct rpc_xprt *xprt); >>>> -- >>>> 2.47.0 >>>> >>> >> >