> On Mar 21, 2023, at 4:43 PM, Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote: > > On 3/21/23 11:45 AM, Aditi Ghag wrote: >> diff --git a/include/net/udp.h b/include/net/udp.h >> index de4b528522bb..d2999447d3f2 100644 >> --- a/include/net/udp.h >> +++ b/include/net/udp.h >> @@ -437,6 +437,7 @@ struct udp_seq_afinfo { >> struct udp_iter_state { >> struct seq_net_private p; >> int bucket; >> + int offset; > > All offset change is easier to review with patch 1 together, so please move them to patch 1. Thanks for the quick review! Oh boy... Absolutely! Looks like I misplaced the changes during interactive rebase. Can I fix this up in this patch itself instead of creating a new patch series? That way, I can batch things up in the next revision. > >> struct udp_seq_afinfo *bpf_seq_afinfo; >> }; >> diff --git a/net/core/filter.c b/net/core/filter.c >> index 1d6f165923bf..ba3e0dac119c 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -11621,3 +11621,57 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) >> return func; >> } >> + >> +/* Disables missing prototype warnings */ >> +__diag_push(); >> +__diag_ignore_all("-Wmissing-prototypes", >> + "Global functions as their definitions will be in vmlinux BTF"); >> + >> +/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. >> + * >> + * The helper expects a non-NULL pointer to a socket. It invokes the >> + * protocol specific socket destroy handlers. >> + * >> + * The helper can only be called from BPF contexts that have acquired the socket >> + * locks. >> + * >> + * Parameters: >> + * @sock: Pointer to socket to be destroyed >> + * >> + * Return: >> + * On error, may return EPROTONOSUPPORT, EINVAL. >> + * EPROTONOSUPPORT if protocol specific destroy handler is not implemented. >> + * 0 otherwise >> + */ >> +__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) >> +{ >> + struct sock *sk = (struct sock *)sock; >> + >> + if (!sk) >> + return -EINVAL; >> + >> + /* The locking semantics that allow for synchronous execution of the >> + * destroy handlers are only supported for TCP and UDP. >> + */ >> + if (!sk->sk_prot->diag_destroy || sk->sk_protocol == IPPROTO_RAW) >> + return -EOPNOTSUPP; >> + >> + return sk->sk_prot->diag_destroy(sk, ECONNABORTED); >> +} >> + >> +__diag_pop() >> + >> +BTF_SET8_START(sock_destroy_kfunc_set) >> +BTF_ID_FLAGS(func, bpf_sock_destroy) >> +BTF_SET8_END(sock_destroy_kfunc_set) >> + >> +static const struct btf_kfunc_id_set bpf_sock_destroy_kfunc_set = { >> + .owner = THIS_MODULE, >> + .set = &sock_destroy_kfunc_set, >> +}; >> + >> +static int init_subsystem(void) >> +{ >> + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sock_destroy_kfunc_set); > > It still needs a way to guarantee the running context is safe to use this kfunc. BPF_PROG_TYPE_TRACING is too broad. Trying to brainstorm ways here instead of needing to filter by expected_attach_type. I wonder using KF_TRUSTED_ARGS like this: > > BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) > > is enough or it needs some care to filter out BPF_TRACE_RAW_TP after looking at prog_args_trusted(). > or it needs another tag to specify this kfunc requires the arg to be locked also. > Agreed. We had some open ended discussion in the earlier patch. >> +} >> +late_initcall(init_subsystem); >> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c >> index 33f559f491c8..59a833c0c872 100644 >> --- a/net/ipv4/tcp.c >> +++ b/net/ipv4/tcp.c >> @@ -4678,8 +4678,10 @@ int tcp_abort(struct sock *sk, int err) >> return 0; >> } >> - /* Don't race with userspace socket closes such as tcp_close. */ >> - lock_sock(sk); >> + /* BPF context ensures sock locking. */ >> + if (!has_current_bpf_ctx()) >> + /* Don't race with userspace socket closes such as tcp_close. */ >> + lock_sock(sk); > > This is ok. > >> if (sk->sk_state == TCP_LISTEN) { >> tcp_set_state(sk, TCP_CLOSE); >> @@ -4688,7 +4690,8 @@ int tcp_abort(struct sock *sk, int err) >> /* Don't race with BH socket closes such as inet_csk_listen_stop. */ >> local_bh_disable(); >> - bh_lock_sock(sk); >> + if (!has_current_bpf_ctx()) >> + bh_lock_sock(sk); > > This may or may not work, depending on the earlier lock_sock_fast() done in bpf_iter_tcp_seq_show() returned true or false. It is probably a good reason to replace the lock_sock_fast() with lock_sock() in bpf_iter_tcp_seq_show(). :) We would have to replace lock_sock_fast with lock_sock anyway, else this causes problems in inet_unhash while destroying TCP listening socket, see - https://lore.kernel.org/bpf/20230321184541.1857363-1-aditi.ghag@xxxxxxxxxxxxx/T/#m0bb85df6482e7eae296b00394c482254db544748. > >> if (!sock_flag(sk, SOCK_DEAD)) { >> sk->sk_err = err; >> @@ -4700,10 +4703,13 @@ int tcp_abort(struct sock *sk, int err) >> tcp_done(sk); >> } >> - bh_unlock_sock(sk); >> + if (!has_current_bpf_ctx()) >> + bh_unlock_sock(sk); >> + >> local_bh_enable(); >> tcp_write_queue_purge(sk); >> - release_sock(sk); >> + if (!has_current_bpf_ctx()) >> + release_sock(sk); >> return 0; >> } >> EXPORT_SYMBOL_GPL(tcp_abort); >