We use sk_msg to redirect with sock hash, like this: skA redirect skB Tx <-----------> Rx And construct a scenario where the packet sending speed is high, the packet receiving speed is slow, so the packets are stacked in the ingress queue on the receiving side. After a period of time, the memory is exhausted and the system ooms. To fix, we add sk_rmem_alloc while sk_msg queued in the ingress queue and subtract sk_rmem_alloc while sk_msg dequeued from the ingress queue and check sk_rmem_alloc at the beginning of bpf_tcp_ingress(). Signed-off-by: Wang Yufen <wangyufen@xxxxxxxxxx> --- include/linux/skmsg.h | 9 ++++++--- net/core/skmsg.c | 2 ++ net/ipv4/tcp_bpf.c | 6 ++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c5a2d6f50f25..d2cfd5fa2274 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -308,9 +308,10 @@ static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); - if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { list_add_tail(&msg->list, &psock->ingress_msg); - else { + atomic_add(msg->sg.size, &psock->sk->sk_rmem_alloc); + } else { sk_msg_free(psock->sk, msg); kfree(msg); } @@ -323,8 +324,10 @@ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) spin_lock_bh(&psock->ingress_lock); msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); - if (msg) + if (msg) { list_del(&msg->list); + atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc); + } spin_unlock_bh(&psock->ingress_lock); return msg; } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cc381165ea08..b19a3c49564f 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -445,6 +445,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (!msg_rx->skb) sk_mem_uncharge(sk, copy); msg_rx->sg.size -= copy; + atomic_sub(copy, &sk->sk_rmem_alloc); if (!sge->length) { sk_msg_iter_var_next(i); @@ -754,6 +755,7 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { list_del(&msg->list); + atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc); sk_msg_free(psock->sk, msg); kfree(msg); } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 1cdcb4df0eb7..dd099875414c 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -24,6 +24,12 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, return -ENOMEM; lock_sock(sk); + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { + release_sock(sk); + kfree(tmp); + return -EAGAIN; + } + tmp->sg.start = msg->sg.start; i = msg->sg.start; do { -- 2.25.1