Add an interface for the user to notify the kernel that it is done reading the NET_RX dmabuf pages returned as cmsg. The kernel will drop the reference on the NET_RX pages to make them available for re-use. Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx> Signed-off-by: Kaiyuan Zhang <kaiyuanz@xxxxxxxxxx> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx> --- include/uapi/asm-generic/socket.h | 1 + include/uapi/linux/uio.h | 4 ++++ net/core/sock.c | 36 +++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index aacb97f16b78..eb93b43394d4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -135,6 +135,7 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SO_DEVMEM_DONTNEED 97 #define SO_DEVMEM_HEADER 98 #define SCM_DEVMEM_HEADER SO_DEVMEM_HEADER #define SO_DEVMEM_OFFSET 99 diff --git a/include/uapi/linux/uio.h b/include/uapi/linux/uio.h index ae94763b1963..71314bf41590 100644 --- a/include/uapi/linux/uio.h +++ b/include/uapi/linux/uio.h @@ -26,6 +26,10 @@ struct cmsg_devmem { __u32 frag_token; }; +struct devmemtoken { + __u32 token_start; + __u32 token_count; +}; /* * UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1) */ diff --git a/net/core/sock.c b/net/core/sock.c index ab1e8d1bd5a1..2736b770a399 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1049,6 +1049,39 @@ static int sock_reserve_memory(struct sock *sk, int bytes) return 0; } +static noinline_for_stack int +sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + struct devmemtoken tokens[128]; + unsigned int num_tokens, i, j; + int ret; + + if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP) + return -EBADF; + + if (optlen % sizeof(struct devmemtoken) || optlen > sizeof(tokens)) + return -EINVAL; + + num_tokens = optlen / sizeof(struct devmemtoken); + if (copy_from_sockptr(tokens, optval, optlen)) + return -EFAULT; + + ret = 0; + for (i = 0; i < num_tokens; i++) { + for (j = 0; j < tokens[i].token_count; j++) { + struct page *page = xa_erase(&sk->sk_user_pages, + tokens[i].token_start + j); + + if (page) { + page_pool_page_put_many(page, 1); + ret++; + } + } + } + + return ret; +} + void sockopt_lock_sock(struct sock *sk) { /* When current->bpf_ctx is set, the setsockopt is called from @@ -1528,6 +1561,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(sk->sk_txrehash, (u8)val); break; + case SO_DEVMEM_DONTNEED: + ret = sock_devmem_dontneed(sk, optval, optlen); + break; default: ret = -ENOPROTOOPT; break; -- 2.41.0.640.ga95def55d0-goog