Hi Pavel, In the tests I made I used this version of IORING_CQE_F_COPIED: https://git.samba.org/?p=metze/linux/wip.git;a=commitdiff;h=645d3b584c417a247d92d71baa6266a5f3d0d17d (also inlined at the end) Would that something we want for 6.1? (Should I post that with a useful commit message, after doing some more tests) metze include/uapi/linux/io_uring.h | 1 + io_uring/notif.c | 5 +++++ net/ipv4/ip_output.c | 3 ++- net/ipv4/tcp.c | 2 ++ net/ipv6/ip6_output.c | 3 ++- 5 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 04729989e6ee..efeab6a9b4f3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -341,6 +341,7 @@ struct io_uring_cqe { #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) #define IORING_CQE_F_NOTIF (1U << 3) +#define IORING_CQE_F_COPIED (1U << 4) enum { IORING_CQE_BUFFER_SHIFT = 16, diff --git a/io_uring/notif.c b/io_uring/notif.c index e37c6569d82e..2162d1af0b60 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -18,6 +18,8 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked) __io_unaccount_mem(ctx->user, nd->account_pages); nd->account_pages = 0; } + if (!nd->uarg.zerocopy) + notif->cqe.flags |= IORING_CQE_F_COPIED; io_req_task_complete(notif, locked); } @@ -28,6 +30,8 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); struct io_kiocb *notif = cmd_to_io_kiocb(nd); + uarg->zerocopy = uarg->zerocopy & success; + if (refcount_dec_and_test(&uarg->refcnt)) { notif->io_task_work.func = __io_notif_complete_tw; io_req_task_work_add(notif); @@ -53,6 +57,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) nd = io_notif_to_data(notif); nd->account_pages = 0; + nd->uarg.zerocopy = 1; nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; nd->uarg.callback = io_uring_tx_zerocopy_callback; refcount_set(&nd->uarg.refcnt, 1); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04e2034f2f8e..64d263a8ece8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1032,7 +1032,8 @@ static int __ip_append_data(struct sock *sk, paged = true; zc = true; uarg = msg->msg_ubuf; - } + } else + msg->msg_ubuf->zerocopy = 0; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cdf26724d7db..d3a2ed9f22df 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1247,6 +1247,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) uarg = msg->msg_ubuf; net_zcopy_get(uarg); zc = sk->sk_route_caps & NETIF_F_SG; + if (!zc) + uarg->zerocopy = 0; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb)); if (!uarg) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bb0f469a5247..3d75dd05ff98 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1559,7 +1559,8 @@ static int __ip6_append_data(struct sock *sk, paged = true; zc = true; uarg = msg->msg_ubuf; - } + } else + msg->msg_ubuf->zerocopy = 0; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg)