Re: [PATCH v7 bpf-next 06/10] bpf: Add bpf_sock_destroy kfunc

Martin KaFai Lau <martin.lau@xxxxxxxxx> · Thu, 4 May 2023 17:13:29 -0700

On 5/3/23 3:53 PM, Aditi Ghag wrote:

diff --git a/net/core/filter.c b/net/core/filter.c
index 727c5269867d..97d70b7959a1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11715,3 +11715,60 @@ static int __init bpf_kfunc_init(void)
  	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
  }
  late_initcall(bpf_kfunc_init);
+
+/* Disables missing prototype warnings */
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in vmlinux BTF");
+
+/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
+ *
+ * The function expects a non-NULL pointer to a socket, and invokes the
+ * protocol specific socket destroy handlers.
+ *
+ * The helper can only be called from BPF contexts that have acquired the socket
+ * locks.
+ *
+ * Parameters:
+ * @sock: Pointer to socket to be destroyed
+ *
+ * Return:
+ * On error, may return EPROTONOSUPPORT, EINVAL.
+ * EPROTONOSUPPORT if protocol specific destroy handler is not supported.
+ * 0 otherwise
+ */
+__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
+{
+	struct sock *sk = (struct sock *)sock;
+
+	if (!sk)

If the kfunc has the KF_TRUSTED_ARGS flag, this NULL test is no longer needed. 
More details below.

+		return -EINVAL;
+
+	/* The locking semantics that allow for synchronous execution of the
+	 * destroy handlers are only supported for TCP and UDP.
+	 * Supporting protocols will need to acquire lock_sock in the BPF context
+	 * prior to invoking this kfunc.
+	 */
+	if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
+					   sk->sk_protocol != IPPROTO_UDP))
+		return -EOPNOTSUPP;
+
+	return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
+}
+
+__diag_pop()
+
+BTF_SET8_START(sock_destroy_kfunc_set)

nit. Rename it to a more generic name for future sk_iter related kfunc.
May be bpf_sk_iter_kfunc_set ?

+BTF_ID_FLAGS(func, bpf_sock_destroy)

Follow up on the v6 patch-set regarding KF_TRUSTED_ARGS.
KF_TRUSTED_ARGS is needed here to avoid the cases where a PTR_TO_BTF_ID sk is 
obtained by following another pointer. eg. getting a sk pointer (may be even 
NULL) by following another sk pointer. The recent PTR_TRUSTED concept in the 
verifier can guard this. I tried and the following should do:

diff --git i/net/core/filter.c w/net/core/filter.c
index 68b228f3eca6..d82e038da0e3 100644
--- i/net/core/filter.c
+++ w/net/core/filter.c
@@ -11767,7 +11767,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
 __diag_pop()

 BTF_SET8_START(sock_destroy_kfunc_set)
-BTF_ID_FLAGS(func, bpf_sock_destroy)
+BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
 BTF_SET8_END(sock_destroy_kfunc_set)

 static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
diff --git i/net/ipv4/tcp_ipv4.c w/net/ipv4/tcp_ipv4.c
index 887f83a90d85..a769284e8291 100644
--- i/net/ipv4/tcp_ipv4.c
+++ w/net/ipv4/tcp_ipv4.c
@@ -3354,7 +3354,7 @@ static struct bpf_iter_reg tcp_reg_info = {
 	.ctx_arg_info_size	= 1,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__tcp, sk_common),
-		  PTR_TO_BTF_ID_OR_NULL },
+		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
 	},
 	.get_func_proto		= bpf_iter_tcp_get_func_proto,
 	.seq_info		= &tcp_seq_info,
diff --git i/net/ipv4/udp.c w/net/ipv4/udp.c
index 746c85f2bb03..945b641b363b 100644
--- i/net/ipv4/udp.c
+++ w/net/ipv4/udp.c
@@ -3646,7 +3646,7 @@ static struct bpf_iter_reg udp_reg_info = {
 	.ctx_arg_info_size	= 1,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__udp, udp_sk),
-		  PTR_TO_BTF_ID_OR_NULL },
+		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
 	},
 	.seq_info		= &udp_seq_info,
 };

Please take a look and run through the test_progs. If you agree on the changes, 
this should be included in the same patch 6.

+BTF_SET8_END(sock_destroy_kfunc_set)
+
+static const struct btf_kfunc_id_set bpf_sock_destroy_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &sock_destroy_kfunc_set,
+};
+
+static int init_subsystem(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sock_destroy_kfunc_set);
+}
+late_initcall(init_subsystem);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 288693981b00..2259b4facc2f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4679,8 +4679,10 @@ int tcp_abort(struct sock *sk, int err)
  		return 0;
  	}
  
-	/* Don't race with userspace socket closes such as tcp_close. */
-	lock_sock(sk);
+	/* BPF context ensures sock locking. */
+	if (!has_current_bpf_ctx())
+		/* Don't race with userspace socket closes such as tcp_close. */
+		lock_sock(sk);
  
  	if (sk->sk_state == TCP_LISTEN) {
  		tcp_set_state(sk, TCP_CLOSE);
@@ -4702,9 +4704,11 @@ int tcp_abort(struct sock *sk, int err)
  	}
  
  	bh_unlock_sock(sk);
+

nit. unnecessary new line change.

  	local_bh_enable();
  	tcp_write_queue_purge(sk);
-	release_sock(sk);
+	if (!has_current_bpf_ctx())
+		release_sock(sk);
  	return 0;
  }
  EXPORT_SYMBOL_GPL(tcp_abort);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 150551acab9d..5f48cdf82a45 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2925,7 +2925,8 @@ EXPORT_SYMBOL(udp_poll);
  
  int udp_abort(struct sock *sk, int err)
  {
-	lock_sock(sk);
+	if (!has_current_bpf_ctx())
+		lock_sock(sk);
  
  	/* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
  	 * with close()
@@ -2938,7 +2939,8 @@ int udp_abort(struct sock *sk, int err)
  	__udp_disconnect(sk, 0);
  
  out:
-	release_sock(sk);
+	if (!has_current_bpf_ctx())
+		release_sock(sk);
  
  	return 0;
  }