Jakub Sitnicki <jakub@xxxxxxxxxxxxxx> wrote: > On Wed, Nov 23, 2022 at 02:01 PM +08, Pengcheng Yang wrote: > > John Fastabend <john.fastabend@xxxxxxxxx> wrote: > >> > >> if (!psock->apply_bytes) { > >> /* Clean up before releasing the sock lock. */ > >> eval = psock->eval; > >> psock->eval = __SK_NONE; > >> psock->sk_redir = NULL; > >> } > >> > >> Now that we have a psock->flags we should clera that as > >> well right? > > > > According to my understanding, it is not necessary (but can) to clear > > psock->flags here, because psock->flags will be overwritten by msg->flags > > at the beginning of each redirection (in sk_psock_msg_verdict()). > > 1. We should at least document that psock->flags value can be garbage > (undefined) if psock->sk_redir is null. > > 2. 'flags' is amiguous (flags for what?). I'd suggest to rename to > something like redir_flags. > > Also, since we don't care about all flags, but just the ingress bit, > we should store just that. It's not about size. Less state passed > around is easier to reason about. See patch below. > > 3. Alternatively, I'd turn psock->sk_redir into a tagged pointer, like > skb->_sk_redir is. This way all state (pointer & flags) is bundled > and managed together. It would be a bigger change. Would have to be > split out from this patch set. > > --8<-- > > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h > index 70d6cb94e580..84f787416a54 100644 > --- a/include/linux/skmsg.h > +++ b/include/linux/skmsg.h > @@ -82,6 +82,7 @@ struct sk_psock { > u32 apply_bytes; > u32 cork_bytes; > u32 eval; > + bool redir_ingress; /* undefined if sk_redir is null */ > struct sk_msg *cork; > struct sk_psock_progs progs; > #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 14d45661a84d..5b70b241ce71 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -2291,8 +2291,8 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); > void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); > #endif /* CONFIG_BPF_SYSCALL */ > > -int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, > - int flags); > +int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress, > + struct sk_msg *msg, u32 bytes, int flags); > #endif /* CONFIG_NET_SOCK_MSG */ > > #if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG) > diff --git a/net/core/skmsg.c b/net/core/skmsg.c > index e6b9ced3eda8..53d0251788aa 100644 > --- a/net/core/skmsg.c > +++ b/net/core/skmsg.c > @@ -886,13 +886,16 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, > ret = sk_psock_map_verd(ret, msg->sk_redir); > psock->apply_bytes = msg->apply_bytes; > if (ret == __SK_REDIRECT) { > - if (psock->sk_redir) > + if (psock->sk_redir) { > sock_put(psock->sk_redir); > - psock->sk_redir = msg->sk_redir; > - if (!psock->sk_redir) { > + psock->sk_redir = NULL; > + } > + if (!msg->sk_redir) { > ret = __SK_DROP; > goto out; > } > + psock->redir_ingress = sk_msg_to_ingress(msg); > + psock->sk_redir = msg->sk_redir; > sock_hold(psock->sk_redir); > } > out: > diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c > index cf9c3e8f7ccb..490b359dc814 100644 > --- a/net/ipv4/tcp_bpf.c > +++ b/net/ipv4/tcp_bpf.c > @@ -131,10 +131,9 @@ static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg, > return ret; > } > > -int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, > - u32 bytes, int flags) > +int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress, > + struct sk_msg *msg, u32 bytes, int flags) > { > - bool ingress = sk_msg_to_ingress(msg); > struct sk_psock *psock = sk_psock_get(sk); > int ret; > > @@ -337,7 +336,8 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, > release_sock(sk); > > origsize = msg->sg.size; > - ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); > + ret = tcp_bpf_sendmsg_redir(sk_redir, psock->redir_ingress, ^^^^^^^ Thanks for such detailed advice. Here it looks like we need to pre-save the redir_ingress before setting psock->sk_redir to NULL and release_sock. > + msg, tosend, flags); > sent = origsize - msg->sg.size; > > if (eval == __SK_REDIRECT) > diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c > index 264cf367e265..b22d97610b9a 100644 > --- a/net/tls/tls_sw.c > +++ b/net/tls/tls_sw.c > @@ -846,7 +846,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, > sk_msg_return_zero(sk, msg, send); > msg->sg.size -= send; > release_sock(sk); > - err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags); > + err = tcp_bpf_sendmsg_redir(sk_redir, psock->redir_ingress, > + &msg_redir, send, flags); > lock_sock(sk); > if (err < 0) { > *copied -= sk_msg_free_nocharge(sk, &msg_redir);