Re: [PATCH net-next v3 03/14] net-timestamp: open gate for bpf_setsockopt/_getsockopt

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/28/24 4:05 AM, Jason Xing wrote:
From: Jason Xing <kernelxing@xxxxxxxxxxx>

For now, we support bpf_setsockopt to set or clear timestamps flags.

Users can use something like this in bpf program to turn on the feature:
flags = SOF_TIMESTAMPING_TX_SCHED;
bpf_setsockopt(skops, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof(flags));
The specific use cases can be seen in the bpf selftest in this series.

Later, I will support each flags one by one based on this.

Signed-off-by: Jason Xing <kernelxing@xxxxxxxxxxx>
---
  include/net/sock.h              |  4 ++--
  include/uapi/linux/net_tstamp.h |  7 +++++++
  net/core/filter.c               |  7 +++++--
  net/core/sock.c                 | 34 ++++++++++++++++++++++++++-------
  net/ipv4/udp.c                  |  2 +-
  net/mptcp/sockopt.c             |  2 +-
  net/socket.c                    |  2 +-
  7 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 5384f1e49f5c..062f405c744e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1775,7 +1775,7 @@ static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk)
  #endif
int sk_setsockopt(struct sock *sk, int level, int optname,
-		  sockptr_t optval, unsigned int optlen);
+		  sockptr_t optval, unsigned int optlen, bool bpf_timetamping);
  int sock_setsockopt(struct socket *sock, int level, int op,
  		    sockptr_t optval, unsigned int optlen);
  int do_sock_setsockopt(struct socket *sock, bool compat, int level,
@@ -1784,7 +1784,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
  		       int optname, sockptr_t optval, sockptr_t optlen);
int sk_getsockopt(struct sock *sk, int level, int optname,
-		  sockptr_t optval, sockptr_t optlen);
+		  sockptr_t optval, sockptr_t optlen, bool bpf_timetamping);
  int sock_gettstamp(struct socket *sock, void __user *userstamp,
  		   bool timeval, bool time32);
  struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 858339d1c1c4..0696699cf964 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -49,6 +49,13 @@ enum {
  					 SOF_TIMESTAMPING_TX_SCHED | \
  					 SOF_TIMESTAMPING_TX_ACK)
+#define SOF_TIMESTAMPING_BPF_SUPPPORTED_MASK (SOF_TIMESTAMPING_SOFTWARE | \

hmm... so we are allowing it but SOF_TIMESTAMPING_SOFTWARE won't do anything (meaning set and not-set are both no-op) ?

+					      SOF_TIMESTAMPING_TX_SCHED | \
+					      SOF_TIMESTAMPING_TX_SOFTWARE | \
+					      SOF_TIMESTAMPING_TX_ACK | \
+					      SOF_TIMESTAMPING_OPT_ID | \
+					      SOF_TIMESTAMPING_OPT_ID_TCP)
+
  /**
   * struct so_timestamping - SO_TIMESTAMPING parameter
   *
diff --git a/net/core/filter.c b/net/core/filter.c
index 58761263176c..dc8ecf899ced 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5238,6 +5238,9 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
  		break;
  	case SO_BINDTODEVICE:
  		break;
+	case SO_TIMESTAMPING_NEW:

How about only allow bpf_setsockopt(SO_TIMESTAMPING_NEW) instead of bpf_setsockopt(SO_TIMESTAMPING). Does it solve the issue reported in v2?

+	case SO_TIMESTAMPING_OLD:
+		break;
  	default:
  		return -EINVAL;
  	}
@@ -5247,11 +5250,11 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
  			return -EINVAL;
  		return sk_getsockopt(sk, SOL_SOCKET, optname,
  				     KERNEL_SOCKPTR(optval),
-				     KERNEL_SOCKPTR(optlen));
+				     KERNEL_SOCKPTR(optlen), true);
  	}
return sk_setsockopt(sk, SOL_SOCKET, optname,
-			     KERNEL_SOCKPTR(optval), *optlen);
+			     KERNEL_SOCKPTR(optval), *optlen, true);
  }
static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
diff --git a/net/core/sock.c b/net/core/sock.c
index 7f398bd07fb7..7e05748b1a06 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -941,6 +941,19 @@ int sock_set_timestamping(struct sock *sk, int optname,
  	return 0;
  }
+static int sock_set_timestamping_bpf(struct sock *sk,
+				     struct so_timestamping timestamping)
+{
+	u32 flags = timestamping.flags;
+
+	if (flags & ~SOF_TIMESTAMPING_BPF_SUPPPORTED_MASK)
+		return -EINVAL;
+
+	WRITE_ONCE(sk->sk_tsflags_bpf, flags);

I think it is cleaner to directly "WRITE_ONCE(sk->sk_tsflags_bpf, flags);" in sol_socket_sockopt() instead of adding "bool bpf_timestamping" to sk_setsockopt. sk_tsflags_bpf is a separate u32 anyway, so not a lot of code to share. The same for getsockopt.

[ will continue the remaining patches a little later ]

+
+	return 0;
+}
+
  void sock_set_keepalive(struct sock *sk)
  {
  	lock_sock(sk);
@@ -1159,7 +1172,7 @@ static int sockopt_validate_clockid(__kernel_clockid_t value)
   */
int sk_setsockopt(struct sock *sk, int level, int optname,
-		  sockptr_t optval, unsigned int optlen)
+		  sockptr_t optval, unsigned int optlen, bool bpf_timetamping)
  {
  	struct so_timestamping timestamping;
  	struct socket *sock = sk->sk_socket;
@@ -1409,7 +1422,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
  			memset(&timestamping, 0, sizeof(timestamping));
  			timestamping.flags = val;
  		}
-		ret = sock_set_timestamping(sk, optname, timestamping);
+		if (!bpf_timetamping)
+			ret = sock_set_timestamping(sk, optname, timestamping);
+		else
+			ret = sock_set_timestamping_bpf(sk, timestamping);
  		break;
case SO_RCVLOWAT:
@@ -1626,7 +1642,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
  		    sockptr_t optval, unsigned int optlen)
  {
  	return sk_setsockopt(sock->sk, level, optname,
-			     optval, optlen);
+			     optval, optlen, false);
  }
  EXPORT_SYMBOL(sock_setsockopt);
@@ -1670,7 +1686,7 @@ static int groups_to_user(sockptr_t dst, const struct group_info *src)
  }
int sk_getsockopt(struct sock *sk, int level, int optname,
-		  sockptr_t optval, sockptr_t optlen)
+		  sockptr_t optval, sockptr_t optlen, bool bpf_timetamping)
  {
  	struct socket *sock = sk->sk_socket;
@@ -1793,9 +1809,13 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
  		 * returning the flags when they were set through the same option.
  		 * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
  		 */
-		if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
-			v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
-			v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
+		if (!bpf_timetamping) {
+			if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
+				v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+				v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
+			}
+		} else {
+			v.timestamping.flags = READ_ONCE(sk->sk_tsflags_bpf);
  		}
  		break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0e24916b39d4..9a20af41e272 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2679,7 +2679,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
  	int is_udplite = IS_UDPLITE(sk);
if (level == SOL_SOCKET) {
-		err = sk_setsockopt(sk, level, optname, optval, optlen);
+		err = sk_setsockopt(sk, level, optname, optval, optlen, false);
if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) {
  			sockopt_lock_sock(sk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 505445a9598f..7b12cc2db136 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -306,7 +306,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
  			return PTR_ERR(ssk);
  		}
- ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
+		ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen, false);
  		if (ret == 0) {
  			if (optname == SO_REUSEPORT)
  				sk->sk_reuseport = ssk->sk_reuseport;
diff --git a/net/socket.c b/net/socket.c
index 9a8e4452b9b2..4bdca39685a6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2385,7 +2385,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
ops = READ_ONCE(sock->ops);
  	if (level == SOL_SOCKET) {
-		err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
+		err = sk_getsockopt(sock->sk, level, optname, optval, optlen, false);
  	} else if (unlikely(!ops->getsockopt)) {
  		err = -EOPNOTSUPP;
  	} else {





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux