From: "D. Wythe" <alibuda@xxxxxxxxxxxxxxxxx> This patch implements a dummy version of inet smc sock, and register it into the inet protocols, which allows us to create a inet smc sock. Note that, the ops is forked from tcp ops. The vast majority of fields are consistent with TCP, and those cannot be consistent, mainly including, 1. obj_size 2. tw_prot and rsk_prot 3. function than need to be override, explicitly set to NULL. Signed-off-by: D. Wythe <alibuda@xxxxxxxxxxxxxxxxx> --- net/smc/Makefile | 1 + net/smc/af_smc.c | 46 +++++++- net/smc/smc_inet.c | 315 +++++++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_inet.h | 86 +++++++++++++++ 4 files changed, 447 insertions(+), 1 deletion(-) create mode 100644 net/smc/smc_inet.c create mode 100644 net/smc/smc_inet.h diff --git a/net/smc/Makefile b/net/smc/Makefile index 875efcd..4f10c3b 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o smc-y += smc_tracepoint.o +smc-y += smc_inet.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 97e3951..390fe6c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -36,6 +36,9 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/protocol.h> +#include <net/inet_common.h> +#include <net/transp_v6.h> #include "smc_netns.h" #include "smc.h" @@ -53,6 +56,7 @@ #include "smc_stats.h" #include "smc_tracepoint.h" #include "smc_sysctl.h" +#include "smc_inet.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3658,9 +3662,36 @@ static int __init smc_init(void) goto out_ib; } + /* init smc inet sock related proto and proto_ops */ + rc = smc_inet_sock_init(); + if (!rc) { + /* registe smc inet proto */ + rc = proto_register(&smc_inet_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); + goto out_ulp; + } + /* no return value */ + inet_register_protosw(&smc_inet_protosw); +#if IS_ENABLED(CONFIG_IPV6) + /* register smc inet6 proto */ + rc = proto_register(&smc_inet6_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); + goto out_proto_register; + } + /* no return value */ + inet6_register_protosw(&smc_inet6_protosw); +#endif + } + static_branch_enable(&tcp_have_smc); return 0; - +out_proto_register: + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +out_ulp: + tcp_unregister_ulp(&smc_ulp_ops); out_ib: smc_ib_unregister_client(); out_sock: @@ -3695,6 +3726,10 @@ static int __init smc_init(void) static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + inet_unregister_protosw(&smc_inet_protosw); +#if IS_ENABLED(CONFIG_IPV6) + inet6_unregister_protosw(&smc_inet6_protosw); +#endif tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); @@ -3705,6 +3740,10 @@ static void __exit smc_exit(void) destroy_workqueue(smc_hs_wq); proto_unregister(&smc_proto6); proto_unregister(&smc_proto); + proto_unregister(&smc_inet_prot); +#if IS_ENABLED(CONFIG_IPV6) + proto_unregister(&smc_inet6_prot); +#endif smc_pnet_exit(); smc_nl_exit(); smc_clc_exit(); @@ -3720,5 +3759,10 @@ static void __exit smc_exit(void) MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); +/* It seems that this macro has different + * understanding of enum type(IPPROTO_SMC or SOCK_STREAM) + */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); MODULE_ALIAS_TCP_ULP("smc"); MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c new file mode 100644 index 00000000..d35b567 --- /dev/null +++ b/net/smc/smc_inet.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * AF_SMC protocol family socket handler keeping the AF_INET sock address type + * applies to SOCK_STREAM sockets only + * offers an alternative communication option for TCP-protocol sockets + * applicable with RoCE-cards only + * + * Initial restrictions: + * - support for alternate links postponed + * + * Copyright IBM Corp. 2016, 2018 + * + */ + +#include <net/sock.h> +#include <net/inet_common.h> + +#include "smc_inet.h" +#include "smc.h" + +static struct timewait_sock_ops smc_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor = tcp_twsk_destructor, +}; + +static struct timewait_sock_ops smc6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor = tcp_twsk_destructor, +}; + +struct proto smc_inet_prot = { + .name = "SMC", + .owner = THIS_MODULE, + .close = tcp_close, + .pre_connect = NULL, + .connect = tcp_v4_connect, + .disconnect = tcp_disconnect, + .accept = smc_inet_csk_accept, + .ioctl = tcp_ioctl, + .init = smc_inet_init_sock, + .destroy = tcp_v4_destroy_sock, + .shutdown = tcp_shutdown, + .setsockopt = tcp_setsockopt, + .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, + .recvmsg = tcp_recvmsg, + .sendmsg = tcp_sendmsg, + .backlog_rcv = tcp_v4_do_rcv, + .release_cb = smc_inet_sock_proto_release_cb, + .hash = inet_hash, + .unhash = inet_unhash, + .get_port = inet_csk_get_port, + .enter_memory_pressure = tcp_enter_memory_pressure, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .leave_memory_pressure = tcp_leave_memory_pressure, + .stream_memory_free = tcp_stream_memory_free, + .sockets_allocated = &tcp_sockets_allocated, + .orphan_count = &tcp_orphan_count, + .memory_allocated = &tcp_memory_allocated, + .memory_pressure = &tcp_memory_pressure, + .sysctl_mem = sysctl_tcp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), + .max_header = MAX_TCP_HEADER, + .obj_size = sizeof(struct smc_sock), + .slab_flags = SLAB_TYPESAFE_BY_RCU, + .twsk_prot = &smc_timewait_sock_ops, + /* tcp_conn_request will use tcp_request_sock_ops */ + .rsk_prot = NULL, + .h.hashinfo = &tcp_hashinfo, + .no_autobind = true, + .diag_destroy = tcp_abort, +}; +EXPORT_SYMBOL_GPL(smc_inet_prot); + +const struct proto_ops smc_inet_stream_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = smc_inet_release, + .bind = inet_bind, + .connect = smc_inet_connect, + .socketpair = sock_no_socketpair, + .accept = inet_accept, + .getname = inet_getname, + .poll = smc_inet_poll, + .ioctl = smc_inet_ioctl, + .gettstamp = sock_gettstamp, + .listen = smc_inet_listen, + .shutdown = smc_inet_shutdown, + .setsockopt = smc_inet_setsockopt, + .getsockopt = smc_inet_getsockopt, + .sendmsg = smc_inet_sendmsg, + .recvmsg = smc_inet_recvmsg, +#ifdef CONFIG_MMU + .mmap = tcp_mmap, +#endif + .splice_read = smc_inet_splice_read, + .read_sock = tcp_read_sock, + .sendmsg_locked = tcp_sendmsg_locked, + .peek_len = tcp_peek_len, +#ifdef CONFIG_COMPAT + .compat_ioctl = inet_compat_ioctl, +#endif + .set_rcvlowat = tcp_set_rcvlowat, +}; + +struct inet_protosw smc_inet_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet_prot, + .ops = &smc_inet_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; + +#if IS_ENABLED(CONFIG_IPV6) +struct proto smc_inet6_prot = { + .name = "SMCv6", + .owner = THIS_MODULE, + .close = tcp_close, + .pre_connect = NULL, + .connect = NULL, + .disconnect = tcp_disconnect, + .accept = smc_inet_csk_accept, + .ioctl = tcp_ioctl, + .init = smc_inet_init_sock, + .destroy = NULL, + .shutdown = tcp_shutdown, + .setsockopt = tcp_setsockopt, + .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, + .recvmsg = tcp_recvmsg, + .sendmsg = tcp_sendmsg, + .backlog_rcv = NULL, + .release_cb = smc_inet_sock_proto_release_cb, + .hash = NULL, + .unhash = inet_unhash, + .get_port = inet_csk_get_port, + .enter_memory_pressure = tcp_enter_memory_pressure, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .leave_memory_pressure = tcp_leave_memory_pressure, + .stream_memory_free = tcp_stream_memory_free, + .sockets_allocated = &tcp_sockets_allocated, + .memory_allocated = &tcp_memory_allocated, + .memory_pressure = &tcp_memory_pressure, + .orphan_count = &tcp_orphan_count, + .sysctl_mem = sysctl_tcp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), + .max_header = MAX_TCP_HEADER, + .obj_size = sizeof(struct smc_sock), + .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), + .slab_flags = SLAB_TYPESAFE_BY_RCU, + .twsk_prot = &smc6_timewait_sock_ops, + /* tcp_conn_request will use tcp_request_sock_ops */ + .rsk_prot = NULL, + .h.hashinfo = &tcp_hashinfo, + .no_autobind = true, + .diag_destroy = tcp_abort, +}; +EXPORT_SYMBOL_GPL(smc_inet6_prot); + +const struct proto_ops smc_inet6_stream_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = smc_inet_release, + .bind = inet6_bind, + .connect = smc_inet_connect, /* ok */ + .socketpair = sock_no_socketpair, /* a do nothing */ + .accept = inet_accept, /* ok */ + .getname = inet6_getname, + .poll = smc_inet_poll, /* ok */ + .ioctl = smc_inet_ioctl, /* must change */ + .gettstamp = sock_gettstamp, + .listen = smc_inet_listen, /* ok */ + .shutdown = smc_inet_shutdown, /* ok */ + .setsockopt = smc_inet_setsockopt, /* ok */ + .getsockopt = smc_inet_getsockopt, /* ok */ + .sendmsg = smc_inet_sendmsg, /* retpoline's sake */ + .recvmsg = smc_inet_recvmsg, /* retpoline's sake */ +#ifdef CONFIG_MMU + .mmap = tcp_mmap, +#endif + .sendmsg_locked = tcp_sendmsg_locked, + .splice_read = smc_inet_splice_read, + .read_sock = tcp_read_sock, + .peek_len = tcp_peek_len, +#ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, +#endif + .set_rcvlowat = tcp_set_rcvlowat, +}; + +struct inet_protosw smc_inet6_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet6_prot, + .ops = &smc_inet6_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; +#endif + +int smc_inet_sock_init(void) +{ + struct proto *tcp_v4prot; +#if IS_ENABLED(CONFIG_IPV6) + struct proto *tcp_v6prot; +#endif + + tcp_v4prot = smc_inet_get_tcp_prot(PF_INET); + if (unlikely(!tcp_v4prot)) + return -EINVAL; + +#if IS_ENABLED(CONFIG_IPV6) + tcp_v6prot = smc_inet_get_tcp_prot(PF_INET6); + if (unlikely(!tcp_v6prot)) + return -EINVAL; +#endif + + /* INET sock has a issues here. twsk will hold the reference of the this module, + * so it may be found that the SMC module cannot be uninstalled after the test program ends, + * But eventually, twsk will release the reference of the module. + * This may affect some old test cases if they try to remove the module immediately after + * completing their test. + */ + + /* Complete the full prot and proto_ops to + * ensure consistency with TCP. Some symbols here have not been exported, + * so that we have to assign it here. + */ + smc_inet_prot.pre_connect = tcp_v4prot->pre_connect; + +#if IS_ENABLED(CONFIG_IPV6) + smc_inet6_prot.pre_connect = tcp_v6prot->pre_connect; + smc_inet6_prot.connect = tcp_v6prot->connect; + smc_inet6_prot.destroy = tcp_v6prot->destroy; + smc_inet6_prot.backlog_rcv = tcp_v6prot->backlog_rcv; + smc_inet6_prot.hash = tcp_v6prot->hash; +#endif + return 0; +} + +int smc_inet_init_sock(struct sock *sk) { return 0; } + +void smc_inet_sock_proto_release_cb(struct sock *sk) {} + +int smc_inet_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + return -EOPNOTSUPP; +} + +int smc_inet_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + return -EOPNOTSUPP; +} + +int smc_inet_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +int smc_inet_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + return -EOPNOTSUPP; +} + +int smc_inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) +{ + return -EOPNOTSUPP; +} + +int smc_inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + return -EOPNOTSUPP; +} + +ssize_t smc_inet_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +__poll_t smc_inet_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + return 0; +} + +struct sock *smc_inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) +{ + return NULL; +} + +int smc_inet_listen(struct socket *sock, int backlog) +{ + return -EOPNOTSUPP; +} + +int smc_inet_shutdown(struct socket *sock, int how) +{ + return -EOPNOTSUPP; +} + +int smc_inet_release(struct socket *sock) +{ + return -EOPNOTSUPP; +} diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h new file mode 100644 index 00000000..68ecfa0 --- /dev/null +++ b/net/smc/smc_inet.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the SMC module (socket related) + * + * Copyright IBM Corp. 2016 + * + */ + +#ifndef __SMC_INET +#define __SMC_INET + +#include <net/protocol.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <net/ipv6.h> +/* MUST after net/tcp.h or warning */ +#include <net/transp_v6.h> + +extern struct proto smc_inet_prot; +extern struct proto smc_inet6_prot; + +extern const struct proto_ops smc_inet_stream_ops; +extern const struct proto_ops smc_inet6_stream_ops; + +extern struct inet_protosw smc_inet_protosw; +extern struct inet_protosw smc_inet6_protosw; + +/* obtain TCP proto via sock family */ +static __always_inline struct proto *smc_inet_get_tcp_prot(int family) +{ + switch (family) { + case AF_INET: + return &tcp_prot; + case AF_INET6: + return &tcpv6_prot; + default: + pr_warn_once("smc: %s(unknown family %d)\n", __func__, family); + break; + } + return NULL; +} + +/* This function initializes the inet related structures. + * If initialization is successful, it returns 0; + * otherwise, it returns a non-zero value. + */ +int smc_inet_sock_init(void); + +int smc_inet_init_sock(struct sock *sk); +void smc_inet_sock_proto_release_cb(struct sock *sk); + +int smc_inet_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags); + +int smc_inet_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen); + +int smc_inet_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen); + +int smc_inet_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg); + +int smc_inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); + +int smc_inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); + +ssize_t smc_inet_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags); + +ssize_t smc_inet_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); + +__poll_t smc_inet_poll(struct file *file, struct socket *sock, poll_table *wait); + +struct sock *smc_inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); +int smc_inet_listen(struct socket *sock, int backlog); + +int smc_inet_shutdown(struct socket *sock, int how); +int smc_inet_release(struct socket *sock); + +#endif // __SMC_INET -- 1.8.3.1