Re: [PATCH bpf-next] bpf: add SO_KEEPALIVE and related options to bpf_setsockopt

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 5/27/20 8:05 AM, Dmitry Yakunin wrote:
> This patch adds support of SO_KEEPALIVE flag and TCP related options
> to bpf_setsockopt() routine. This is helpful if we want to enable or tune
> TCP keepalive for applications which don't do it in the userspace code.
> In order to avoid copy-paste, common code from classic setsockopt was moved
> to auxiliary functions in the headers.


Please split this in two patches :
- one adding the helpers, a pure TCP patch.
- one for BPF additions.


> 
> Signed-off-by: Dmitry Yakunin <zeil@xxxxxxxxxxxxxx>
> ---
>  include/net/sock.h |  9 +++++++++
>  include/net/tcp.h  | 18 ++++++++++++++++++
>  net/core/filter.c  | 39 ++++++++++++++++++++++++++++++++++++++-
>  net/core/sock.c    |  9 ---------
>  net/ipv4/tcp.c     | 15 ++-------------
>  5 files changed, 67 insertions(+), 23 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 3e8c6d4..ee35dea 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -879,6 +879,15 @@ static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag)
>  	__clear_bit(flag, &sk->sk_flags);
>  }
>  
> +static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
> +				     int valbool)
> +{
> +	if (valbool)
> +		sock_set_flag(sk, bit);
> +	else
> +		sock_reset_flag(sk, bit);
> +}
> +
>  static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
>  {
>  	return test_bit(flag, &sk->sk_flags);
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index b681338..ae6a495 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1465,6 +1465,24 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
>  			  tcp_jiffies32 - tp->rcv_tstamp);
>  }
>  
> +/* val must be validated at the top level function */
> +static inline void keepalive_time_set(struct tcp_sock *tp, int val)
> +{
> +	struct sock *sk = (struct sock *)tp;

We prefer the other way to avoid a cast unless really needed :

static inline tcp_keepalive_time_set(struct sock *sk, int val)
{
      stuct tcp_sock *tp = tcp_sk(sk);




> +
> +	tp->keepalive_time = val * HZ;
> +	if (sock_flag(sk, SOCK_KEEPOPEN) &&
> +	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
> +		u32 elapsed = keepalive_time_elapsed(tp);
> +
> +		if (tp->keepalive_time > elapsed)
> +			elapsed = tp->keepalive_time - elapsed;
> +		else
> +			elapsed = 0;
> +		inet_csk_reset_keepalive_timer(sk, elapsed);
> +	}
> +}
> +
>  static inline int tcp_fin_time(const struct sock *sk)
>  {
>  	int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
> diff --git a/net/core/filter.c b/net/core/filter.c
> index a6fc234..1035e43 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4248,8 +4248,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
>  static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>  			   char *optval, int optlen, u32 flags)
>  {
> +	int val, valbool;
>  	int ret = 0;
> -	int val;
>  
>  	if (!sk_fullsock(sk))
>  		return -EINVAL;
> @@ -4260,6 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>  		if (optlen != sizeof(int))
>  			return -EINVAL;
>  		val = *((int *)optval);
> +		valbool = val ? 1 : 0;
>  
>  		/* Only some socketops are supported */
>  		switch (optname) {
> @@ -4298,6 +4299,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>  				sk_dst_reset(sk);
>  			}
>  			break;
> +		case SO_KEEPALIVE:
> +			if (sk->sk_prot->keepalive)
> +				sk->sk_prot->keepalive(sk, valbool);
> +			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
> +			break;
>  		default:
>  			ret = -EINVAL;
>  		}
> @@ -4358,6 +4364,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>  			ret = tcp_set_congestion_control(sk, name, false,
>  							 reinit, true);
>  		} else {
> +			struct inet_connection_sock *icsk = inet_csk(sk);
>  			struct tcp_sock *tp = tcp_sk(sk);
>  
>  			if (optlen != sizeof(int))
> @@ -4386,6 +4393,36 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>  				else
>  					tp->save_syn = val;
>  				break;
> +			case TCP_KEEPIDLE:
> +				if (val < 1 || val > MAX_TCP_KEEPIDLE)
> +					ret = -EINVAL;
> +				else
> +					keepalive_time_set(tp, val);
> +				break;
> +			case TCP_KEEPINTVL:
> +				if (val < 1 || val > MAX_TCP_KEEPINTVL)
> +					ret = -EINVAL;
> +				else
> +					tp->keepalive_intvl = val * HZ;
> +				break;
> +			case TCP_KEEPCNT:
> +				if (val < 1 || val > MAX_TCP_KEEPCNT)
> +					ret = -EINVAL;
> +				else
> +					tp->keepalive_probes = val;
> +				break;
> +			case TCP_SYNCNT:
> +				if (val < 1 || val > MAX_TCP_SYNCNT)
> +					ret = -EINVAL;
> +				else
> +					icsk->icsk_syn_retries = val;
> +				break;
> +			case TCP_USER_TIMEOUT:
> +				if (val < 0)
> +					ret = -EINVAL;
> +				else
> +					icsk->icsk_user_timeout = val;
> +				break;
>  			default:
>  				ret = -EINVAL;
>  			}
> diff --git a/net/core/sock.c b/net/core/sock.c
> index fd85e65..9836b01 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -684,15 +684,6 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
>  	return ret;
>  }
>  
> -static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
> -				     int valbool)
> -{
> -	if (valbool)
> -		sock_set_flag(sk, bit);
> -	else
> -		sock_reset_flag(sk, bit);
> -}
> -
>  bool sk_mc_loop(struct sock *sk)
>  {
>  	if (dev_recursion_level())
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 9700649..7b239e8 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -3003,19 +3003,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
>  	case TCP_KEEPIDLE:
>  		if (val < 1 || val > MAX_TCP_KEEPIDLE)
>  			err = -EINVAL;
> -		else {
> -			tp->keepalive_time = val * HZ;
> -			if (sock_flag(sk, SOCK_KEEPOPEN) &&
> -			    !((1 << sk->sk_state) &
> -			      (TCPF_CLOSE | TCPF_LISTEN))) {
> -				u32 elapsed = keepalive_time_elapsed(tp);
> -				if (tp->keepalive_time > elapsed)
> -					elapsed = tp->keepalive_time - elapsed;
> -				else
> -					elapsed = 0;
> -				inet_csk_reset_keepalive_timer(sk, elapsed);
> -			}
> -		}
> +		else
> +			keepalive_time_set(tp, val);
>  		break;
>  	case TCP_KEEPINTVL:
>  		if (val < 1 || val > MAX_TCP_KEEPINTVL)
> 



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux