On 5/3/23 3:53 PM, Aditi Ghag wrote:
diff --git a/net/core/filter.c b/net/core/filter.c
index 727c5269867d..97d70b7959a1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11715,3 +11715,60 @@ static int __init bpf_kfunc_init(void)
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
}
late_initcall(bpf_kfunc_init);
+
+/* Disables missing prototype warnings */
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
+ *
+ * The function expects a non-NULL pointer to a socket, and invokes the
+ * protocol specific socket destroy handlers.
+ *
+ * The helper can only be called from BPF contexts that have acquired the socket
+ * locks.
+ *
+ * Parameters:
+ * @sock: Pointer to socket to be destroyed
+ *
+ * Return:
+ * On error, may return EPROTONOSUPPORT, EINVAL.
+ * EPROTONOSUPPORT if protocol specific destroy handler is not supported.
+ * 0 otherwise
+ */
+__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
+{
+ struct sock *sk = (struct sock *)sock;
+
+ if (!sk)
If the kfunc has the KF_TRUSTED_ARGS flag, this NULL test is no longer needed.
More details below.
+ return -EINVAL;
+
+ /* The locking semantics that allow for synchronous execution of the
+ * destroy handlers are only supported for TCP and UDP.
+ * Supporting protocols will need to acquire lock_sock in the BPF context
+ * prior to invoking this kfunc.
+ */
+ if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
+ sk->sk_protocol != IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
+}
+
+__diag_pop()
+
+BTF_SET8_START(sock_destroy_kfunc_set)
nit. Rename it to a more generic name for future sk_iter related kfunc.
May be bpf_sk_iter_kfunc_set ?
+BTF_ID_FLAGS(func, bpf_sock_destroy)
Follow up on the v6 patch-set regarding KF_TRUSTED_ARGS.
KF_TRUSTED_ARGS is needed here to avoid the cases where a PTR_TO_BTF_ID sk is
obtained by following another pointer. eg. getting a sk pointer (may be even
NULL) by following another sk pointer. The recent PTR_TRUSTED concept in the
verifier can guard this. I tried and the following should do:
diff --git i/net/core/filter.c w/net/core/filter.c
index 68b228f3eca6..d82e038da0e3 100644
--- i/net/core/filter.c
+++ w/net/core/filter.c
@@ -11767,7 +11767,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
__diag_pop()
BTF_SET8_START(sock_destroy_kfunc_set)
-BTF_ID_FLAGS(func, bpf_sock_destroy)
+BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
BTF_SET8_END(sock_destroy_kfunc_set)
static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
diff --git i/net/ipv4/tcp_ipv4.c w/net/ipv4/tcp_ipv4.c
index 887f83a90d85..a769284e8291 100644
--- i/net/ipv4/tcp_ipv4.c
+++ w/net/ipv4/tcp_ipv4.c
@@ -3354,7 +3354,7 @@ static struct bpf_iter_reg tcp_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
- PTR_TO_BTF_ID_OR_NULL },
+ PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.get_func_proto = bpf_iter_tcp_get_func_proto,
.seq_info = &tcp_seq_info,
diff --git i/net/ipv4/udp.c w/net/ipv4/udp.c
index 746c85f2bb03..945b641b363b 100644
--- i/net/ipv4/udp.c
+++ w/net/ipv4/udp.c
@@ -3646,7 +3646,7 @@ static struct bpf_iter_reg udp_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__udp, udp_sk),
- PTR_TO_BTF_ID_OR_NULL },
+ PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.seq_info = &udp_seq_info,
};
Please take a look and run through the test_progs. If you agree on the changes,
this should be included in the same patch 6.
+BTF_SET8_END(sock_destroy_kfunc_set)
+
+static const struct btf_kfunc_id_set bpf_sock_destroy_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &sock_destroy_kfunc_set,
+};
+
+static int init_subsystem(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sock_destroy_kfunc_set);
+}
+late_initcall(init_subsystem);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 288693981b00..2259b4facc2f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4679,8 +4679,10 @@ int tcp_abort(struct sock *sk, int err)
return 0;
}
- /* Don't race with userspace socket closes such as tcp_close. */
- lock_sock(sk);
+ /* BPF context ensures sock locking. */
+ if (!has_current_bpf_ctx())
+ /* Don't race with userspace socket closes such as tcp_close. */
+ lock_sock(sk);
if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
@@ -4702,9 +4704,11 @@ int tcp_abort(struct sock *sk, int err)
}
bh_unlock_sock(sk);
+
nit. unnecessary new line change.
local_bh_enable();
tcp_write_queue_purge(sk);
- release_sock(sk);
+ if (!has_current_bpf_ctx())
+ release_sock(sk);
return 0;
}
EXPORT_SYMBOL_GPL(tcp_abort);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 150551acab9d..5f48cdf82a45 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2925,7 +2925,8 @@ EXPORT_SYMBOL(udp_poll);
int udp_abort(struct sock *sk, int err)
{
- lock_sock(sk);
+ if (!has_current_bpf_ctx())
+ lock_sock(sk);
/* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
* with close()
@@ -2938,7 +2939,8 @@ int udp_abort(struct sock *sk, int err)
__udp_disconnect(sk, 0);
out:
- release_sock(sk);
+ if (!has_current_bpf_ctx())
+ release_sock(sk);
return 0;
}