Re: [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Feb 25, 2020 at 01:56:32PM +0000, Lorenz Bauer wrote:
> The sockmap works by overriding some of the callbacks in sk->sk_prot, while
> leaving others untouched. This means that we need access to the struct proto
> for any protocol we want to support. For IPv4 this is trivial, since both
> TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a
> module, so the existing TCP sockmap hooks use some trickery to lazily
> initialize the modified struct proto for TCPv6.
> 
> Pull this logic into a standalone struct sk_psock_hooks, so that it can be
> re-used by UDP sockmap.
> 
> Signed-off-by: Lorenz Bauer <lmb@xxxxxxxxxxxxxx>
> ---
>  include/linux/skmsg.h |  36 ++++++++-----
>  include/net/tcp.h     |   1 -
>  net/core/skmsg.c      |  52 +++++++++++++++++++
>  net/core/sock_map.c   |  24 ++++-----
>  net/ipv4/tcp_bpf.c    | 114 ++++++++++++------------------------------
>  5 files changed, 116 insertions(+), 111 deletions(-)
> 
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index c881094387db..70d65ab10b5c 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -109,6 +109,16 @@ struct sk_psock {
>  	};
>  };
>  
> +struct sk_psock_hooks {
> +	struct proto *base_ipv6;
> +	struct proto *ipv4;
> +	struct proto *ipv6;
> +	spinlock_t ipv6_lock;
> +	int (*rebuild_proto)(struct proto prot[], struct proto *base);
> +	struct proto *(*choose_proto)(struct proto prot[],
> +				      struct sk_psock *psock);
> +};
> +
>  int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
>  		 int elem_first_coalesce);
>  int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
> @@ -335,23 +345,14 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
>  	}
>  }
>  
> -static inline void sk_psock_update_proto(struct sock *sk,
> -					 struct sk_psock *psock,
> -					 struct proto *ops)
> -{
> -	psock->saved_unhash = sk->sk_prot->unhash;
> -	psock->saved_close = sk->sk_prot->close;
> -	psock->saved_write_space = sk->sk_write_space;
> -
> -	psock->sk_proto = sk->sk_prot;
> -	/* Pairs with lockless read in sk_clone_lock() */
> -	WRITE_ONCE(sk->sk_prot, ops);
> -}
> -
>  static inline void sk_psock_restore_proto(struct sock *sk,
>  					  struct sk_psock *psock)
>  {
> +	if (!psock->sk_proto)
> +		return;
> +
>  	sk->sk_prot->unhash = psock->saved_unhash;
> +
>  	if (inet_sk(sk)->is_icsk) {
>  		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
>  	} else {
> @@ -424,4 +425,13 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
>  	psock_set_prog(&progs->skb_verdict, NULL);
>  }
>  
> +static inline int sk_psock_hooks_init(struct sk_psock_hooks *hooks,
> +				       struct proto *ipv4_base)
> +{
> +	hooks->ipv6_lock = __SPIN_LOCK_UNLOCKED();
> +	return hooks->rebuild_proto(hooks->ipv4, ipv4_base);
> +}
> +
> +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk);
> +
>  #endif /* _LINUX_SKMSG_H */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 07f947cc80e6..ccf39d80b695 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2196,7 +2196,6 @@ struct sk_msg;
>  struct sk_psock;
>  
>  int tcp_bpf_init(struct sock *sk);
> -void tcp_bpf_reinit(struct sock *sk);
>  int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
>  			  int flags);
>  int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> diff --git a/net/core/skmsg.c b/net/core/skmsg.c
> index eeb28cb85664..a9bdf02c2539 100644
> --- a/net/core/skmsg.c
> +++ b/net/core/skmsg.c
> @@ -844,3 +844,55 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
>  	strp_stop(&parser->strp);
>  	parser->enabled = false;
>  }
> +
> +static inline int sk_psock_hooks_init_ipv6(struct sk_psock_hooks *hooks,
> +					    struct proto *base)
> +{
> +	int ret = 0;
> +
> +	if (likely(base == smp_load_acquire(&hooks->base_ipv6)))
> +		return 0;
> +
> +	spin_lock_bh(&hooks->ipv6_lock);
> +	if (likely(base != hooks->base_ipv6)) {
> +		ret = hooks->rebuild_proto(hooks->ipv6, base);
> +		if (!ret)
> +			smp_store_release(&hooks->base_ipv6, base);
> +	}
> +	spin_unlock_bh(&hooks->ipv6_lock);
> +	return ret;
> +}
> +
> +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk)
> +{
> +	struct sk_psock *psock = sk_psock(sk);
> +	struct proto *prot_base;
> +
> +	WARN_ON_ONCE(!rcu_read_lock_held());
Is this only for the earlier sk_psock(sk)?

> +
> +	if (unlikely(!psock))
When will this happen?

> +		return -EINVAL;
> +
> +	/* Initialize saved callbacks and original proto only once.
> +	 * Since we've not installed the hooks, psock is not yet in use and
> +	 * we can initialize it without synchronization.
> +	 */
> +	if (!psock->sk_proto) {
If I read it correctly, this is to replace the tcp_bpf_reinit_sk_prot()?

I think some of the current reinit comment is useful to keep also:

/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed ... */

> +		struct proto *prot = READ_ONCE(sk->sk_prot);
> +
> +		if (sk->sk_family == AF_INET6 &&
> +		    sk_psock_hooks_init_ipv6(hooks, prot))
> +			return -EINVAL;
> +
> +		psock->saved_unhash = prot->unhash;
> +		psock->saved_close = prot->close;
> +		psock->saved_write_space = sk->sk_write_space;
> +
> +		psock->sk_proto = prot;
> +	}
> +
> +	/* Pairs with lockless read in sk_clone_lock() */
> +	prot_base = sk->sk_family == AF_INET ? hooks->ipv4 : hooks->ipv6;
> +	WRITE_ONCE(sk->sk_prot, hooks->choose_proto(prot_base, psock));
> +	return 0;
> +}



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux