On Tue, Feb 25, 2020 at 01:56:32PM +0000, Lorenz Bauer wrote: > The sockmap works by overriding some of the callbacks in sk->sk_prot, while > leaving others untouched. This means that we need access to the struct proto > for any protocol we want to support. For IPv4 this is trivial, since both > TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a > module, so the existing TCP sockmap hooks use some trickery to lazily > initialize the modified struct proto for TCPv6. > > Pull this logic into a standalone struct sk_psock_hooks, so that it can be > re-used by UDP sockmap. > > Signed-off-by: Lorenz Bauer <lmb@xxxxxxxxxxxxxx> > --- > include/linux/skmsg.h | 36 ++++++++----- > include/net/tcp.h | 1 - > net/core/skmsg.c | 52 +++++++++++++++++++ > net/core/sock_map.c | 24 ++++----- > net/ipv4/tcp_bpf.c | 114 ++++++++++++------------------------------ > 5 files changed, 116 insertions(+), 111 deletions(-) > > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h > index c881094387db..70d65ab10b5c 100644 > --- a/include/linux/skmsg.h > +++ b/include/linux/skmsg.h > @@ -109,6 +109,16 @@ struct sk_psock { > }; > }; > > +struct sk_psock_hooks { > + struct proto *base_ipv6; > + struct proto *ipv4; > + struct proto *ipv6; > + spinlock_t ipv6_lock; > + int (*rebuild_proto)(struct proto prot[], struct proto *base); > + struct proto *(*choose_proto)(struct proto prot[], > + struct sk_psock *psock); > +}; > + > int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, > int elem_first_coalesce); > int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, > @@ -335,23 +345,14 @@ static inline void sk_psock_cork_free(struct sk_psock *psock) > } > } > > -static inline void sk_psock_update_proto(struct sock *sk, > - struct sk_psock *psock, > - struct proto *ops) > -{ > - psock->saved_unhash = sk->sk_prot->unhash; > - psock->saved_close = sk->sk_prot->close; > - psock->saved_write_space = sk->sk_write_space; > - > - psock->sk_proto = sk->sk_prot; > - /* Pairs with lockless read in sk_clone_lock() */ > - WRITE_ONCE(sk->sk_prot, ops); > -} > - > static inline void sk_psock_restore_proto(struct sock *sk, > struct sk_psock *psock) > { > + if (!psock->sk_proto) > + return; > + > sk->sk_prot->unhash = psock->saved_unhash; > + > if (inet_sk(sk)->is_icsk) { > tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); > } else { > @@ -424,4 +425,13 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs) > psock_set_prog(&progs->skb_verdict, NULL); > } > > +static inline int sk_psock_hooks_init(struct sk_psock_hooks *hooks, > + struct proto *ipv4_base) > +{ > + hooks->ipv6_lock = __SPIN_LOCK_UNLOCKED(); > + return hooks->rebuild_proto(hooks->ipv4, ipv4_base); > +} > + > +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk); > + > #endif /* _LINUX_SKMSG_H */ > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 07f947cc80e6..ccf39d80b695 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -2196,7 +2196,6 @@ struct sk_msg; > struct sk_psock; > > int tcp_bpf_init(struct sock *sk); > -void tcp_bpf_reinit(struct sock *sk); > int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, > int flags); > int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, > diff --git a/net/core/skmsg.c b/net/core/skmsg.c > index eeb28cb85664..a9bdf02c2539 100644 > --- a/net/core/skmsg.c > +++ b/net/core/skmsg.c > @@ -844,3 +844,55 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) > strp_stop(&parser->strp); > parser->enabled = false; > } > + > +static inline int sk_psock_hooks_init_ipv6(struct sk_psock_hooks *hooks, > + struct proto *base) > +{ > + int ret = 0; > + > + if (likely(base == smp_load_acquire(&hooks->base_ipv6))) > + return 0; > + > + spin_lock_bh(&hooks->ipv6_lock); > + if (likely(base != hooks->base_ipv6)) { > + ret = hooks->rebuild_proto(hooks->ipv6, base); > + if (!ret) > + smp_store_release(&hooks->base_ipv6, base); > + } > + spin_unlock_bh(&hooks->ipv6_lock); > + return ret; > +} > + > +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk) > +{ > + struct sk_psock *psock = sk_psock(sk); > + struct proto *prot_base; > + > + WARN_ON_ONCE(!rcu_read_lock_held()); Is this only for the earlier sk_psock(sk)? > + > + if (unlikely(!psock)) When will this happen? > + return -EINVAL; > + > + /* Initialize saved callbacks and original proto only once. > + * Since we've not installed the hooks, psock is not yet in use and > + * we can initialize it without synchronization. > + */ > + if (!psock->sk_proto) { If I read it correctly, this is to replace the tcp_bpf_reinit_sk_prot()? I think some of the current reinit comment is useful to keep also: /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed ... */ > + struct proto *prot = READ_ONCE(sk->sk_prot); > + > + if (sk->sk_family == AF_INET6 && > + sk_psock_hooks_init_ipv6(hooks, prot)) > + return -EINVAL; > + > + psock->saved_unhash = prot->unhash; > + psock->saved_close = prot->close; > + psock->saved_write_space = sk->sk_write_space; > + > + psock->sk_proto = prot; > + } > + > + /* Pairs with lockless read in sk_clone_lock() */ > + prot_base = sk->sk_family == AF_INET ? hooks->ipv4 : hooks->ipv6; > + WRITE_ONCE(sk->sk_prot, hooks->choose_proto(prot_base, psock)); > + return 0; > +}