On Thu, Jun 13, 2024 at 8:00 PM D. Wythe <alibuda@xxxxxxxxxxxxxxxxx> wrote: > > From: "D. Wythe" <alibuda@xxxxxxxxxxxxxxxxx> > > This patch allows to create smc socket via AF_INET, > similar to the following code, > > /* create v4 smc sock */ > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); > > /* create v6 smc sock */ > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); > > There are several reasons why we believe it is appropriate here: > > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) > address. There is no AF_SMC address at all. > > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse > the infrastructure of AF_INET(6) path, such as common ebpf hooks. > Otherwise, smc have to implement it again in AF_SMC path. > > Signed-off-by: D. Wythe <alibuda@xxxxxxxxxxxxxxxxx> > Reviewed-by: Wenjia Zhang <wenjia@xxxxxxxxxxxxx> > Reviewed-by: Dust Li <dust.li@xxxxxxxxxxxxxxxxx> > Tested-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx> > Tested-by: Wenjia Zhang <wenjia@xxxxxxxxxxxxx> > --- > include/uapi/linux/in.h | 2 + > net/smc/Makefile | 2 +- > net/smc/af_smc.c | 16 ++++- > net/smc/smc_inet.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ > net/smc/smc_inet.h | 22 +++++++ > 5 files changed, 198 insertions(+), 3 deletions(-) > create mode 100644 net/smc/smc_inet.c > create mode 100644 net/smc/smc_inet.h > > diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h > index e682ab6..d358add 100644 > --- a/include/uapi/linux/in.h > +++ b/include/uapi/linux/in.h > @@ -81,6 +81,8 @@ enum { > #define IPPROTO_ETHERNET IPPROTO_ETHERNET > IPPROTO_RAW = 255, /* Raw IP packets */ > #define IPPROTO_RAW IPPROTO_RAW > + IPPROTO_SMC = 256, /* Shared Memory Communications */ > +#define IPPROTO_SMC IPPROTO_SMC > IPPROTO_MPTCP = 262, /* Multipath TCP connection */ > #define IPPROTO_MPTCP IPPROTO_MPTCP > IPPROTO_MAX > diff --git a/net/smc/Makefile b/net/smc/Makefile > index 2c510d54..60f1c87 100644 > --- a/net/smc/Makefile > +++ b/net/smc/Makefile > @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o > obj-$(CONFIG_SMC_DIAG) += smc_diag.o > smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o > smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o > -smc-y += smc_tracepoint.o > +smc-y += smc_tracepoint.o smc_inet.o > smc-$(CONFIG_SYSCTL) += smc_sysctl.o > smc-$(CONFIG_SMC_LO) += smc_loopback.o > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c > index 8e3ce76..435f38b 100644 > --- a/net/smc/af_smc.c > +++ b/net/smc/af_smc.c > @@ -54,6 +54,7 @@ > #include "smc_tracepoint.h" > #include "smc_sysctl.h" > #include "smc_loopback.h" > +#include "smc_inet.h" > > static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group > * creation on server > @@ -3593,10 +3594,15 @@ static int __init smc_init(void) > pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); > goto out_lo; > } > - > + rc = smc_inet_init(); > + if (rc) { > + pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); > + goto out_ulp; > + } > static_branch_enable(&tcp_have_smc); > return 0; > - > +out_ulp: > + tcp_unregister_ulp(&smc_ulp_ops); > out_lo: > smc_loopback_exit(); > out_ib: > @@ -3633,6 +3639,7 @@ static int __init smc_init(void) > static void __exit smc_exit(void) > { > static_branch_disable(&tcp_have_smc); > + smc_inet_exit(); > tcp_unregister_ulp(&smc_ulp_ops); > sock_unregister(PF_SMC); > smc_core_exit(); > @@ -3660,4 +3667,9 @@ static void __exit smc_exit(void) > MODULE_LICENSE("GPL"); > MODULE_ALIAS_NETPROTO(PF_SMC); > MODULE_ALIAS_TCP_ULP("smc"); > +/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */ > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1); > +#if IS_ENABLED(CONFIG_IPV6) > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1); > +#endif /* CONFIG_IPV6 */ > MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); > diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c > new file mode 100644 > index 00000000..bece346 > --- /dev/null > +++ b/net/smc/smc_inet.c > @@ -0,0 +1,159 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Shared Memory Communications over RDMA (SMC-R) and RoCE > + * > + * Definitions for the IPPROTO_SMC (socket related) > + * > + * Copyright IBM Corp. 2016, 2018 > + * Copyright (c) 2024, Alibaba Inc. > + * > + * Author: D. Wythe <alibuda@xxxxxxxxxxxxxxxxx> > + */ > + > +#include <net/protocol.h> > +#include <net/sock.h> > + > +#include "smc_inet.h" > +#include "smc.h" > + > +static int smc_inet_init_sock(struct sock *sk); > + > +static struct proto smc_inet_prot = { > + .name = "INET_SMC", > + .owner = THIS_MODULE, > + .init = smc_inet_init_sock, > + .hash = smc_hash_sk, > + .unhash = smc_unhash_sk, > + .release_cb = smc_release_cb, > + .obj_size = sizeof(struct smc_sock), > + .h.smc_hash = &smc_v4_hashinfo, > + .slab_flags = SLAB_TYPESAFE_BY_RCU, > +}; > + > +static const struct proto_ops smc_inet_stream_ops = { > + .family = PF_INET, > + .owner = THIS_MODULE, > + .release = smc_release, > + .bind = smc_bind, > + .connect = smc_connect, > + .socketpair = sock_no_socketpair, > + .accept = smc_accept, > + .getname = smc_getname, > + .poll = smc_poll, > + .ioctl = smc_ioctl, > + .listen = smc_listen, > + .shutdown = smc_shutdown, > + .setsockopt = smc_setsockopt, > + .getsockopt = smc_getsockopt, > + .sendmsg = smc_sendmsg, > + .recvmsg = smc_recvmsg, > + .mmap = sock_no_mmap, > + .splice_read = smc_splice_read, > +}; > + > +static struct inet_protosw smc_inet_protosw = { > + .type = SOCK_STREAM, > + .protocol = IPPROTO_SMC, > + .prot = &smc_inet_prot, > + .ops = &smc_inet_stream_ops, > + .flags = INET_PROTOSW_ICSK, When this flag is set, icsk->icsk_sync_mss must be set. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000005 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=00000001195d1000 [0000000000000000] pgd=0800000109c46003, p4d=0800000109c46003, pud=0000000000000000 Internal error: Oops: 0000000086000005 [#1] PREEMPT SMP Modules linked in: CPU: 1 UID: 0 PID: 8037 Comm: syz.3.265 Not tainted 6.11.0-rc7-syzkaller-g5f5673607153 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : cipso_v4_sock_setattr+0x2a8/0x3c0 net/ipv4/cipso_ipv4.c:1910 sp : ffff80009b887a90 x29: ffff80009b887aa0 x28: ffff80008db94050 x27: 0000000000000000 x26: 1fffe0001aa6f5b3 x25: dfff800000000000 x24: ffff0000db75da00 x23: 0000000000000000 x22: ffff0000d8b78518 x21: 0000000000000000 x20: ffff0000d537ad80 x19: ffff0000d8b78000 x18: 1fffe000366d79ee x17: ffff8000800614a8 x16: ffff800080569b84 x15: 0000000000000001 x14: 000000008b336894 x13: 00000000cd96feaa x12: 0000000000000003 x11: 0000000000040000 x10: 00000000000020a3 x9 : 1fffe0001b16f0f1 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f x5 : 0000000000000040 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000002 x1 : 0000000000000000 x0 : ffff0000d8b78000 Call trace: 0x0 netlbl_sock_setattr+0x2e4/0x338 net/netlabel/netlabel_kapi.c:1000 smack_netlbl_add+0xa4/0x154 security/smack/smack_lsm.c:2593 smack_socket_post_create+0xa8/0x14c security/smack/smack_lsm.c:2973 security_socket_post_create+0x94/0xd4 security/security.c:4425 __sock_create+0x4c8/0x884 net/socket.c:1587 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x134/0x340 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __arm64_sys_socket+0x7c/0x94 net/socket.c:1718 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Code: ???????? ???????? ???????? ???????? (????????) ---[ end trace 0000000000000000 ]---