Declare MSG_SPLICE_PAGES, an internal sendmsg() flag, that hints to a network protocol that it should splice pages from the source iterator rather than copying the data if it can. This flag is added to a list that is cleared by sendmsg and recvmsg syscalls on entry. This is intended as a replacement for the ->sendpage() op, allowing a way to splice in several multipage folios in one go. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Willem de Bruijn <willemdebruijn.kernel@xxxxxxxxx> cc: "David S. Miller" <davem@xxxxxxxxxxxxx> cc: Eric Dumazet <edumazet@xxxxxxxxxx> cc: Jakub Kicinski <kuba@xxxxxxxxxx> cc: Paolo Abeni <pabeni@xxxxxxxxxx> cc: Jens Axboe <axboe@xxxxxxxxx> cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> cc: netdev@xxxxxxxxxxxxxxx --- include/linux/socket.h | 3 +++ net/socket.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/include/linux/socket.h b/include/linux/socket.h index 13c3a237b9c9..c2fa0f800999 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -327,6 +327,7 @@ struct ucred { */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ +#define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through @@ -337,6 +338,8 @@ struct ucred { #define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ #endif +/* Flags to be cleared on entry by sendmsg, recvmsg, sendmmsg and recvmmsg syscalls */ +#define MSG_INTERNAL_FLAGS (MSG_SPLICE_PAGES) /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ #define SOL_IP 0 diff --git a/net/socket.c b/net/socket.c index 6bae8ce7059e..dfb912bbed62 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2139,6 +2139,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, msg.msg_name = (struct sockaddr *)&address; msg.msg_namelen = addr_len; } + flags &= ~MSG_INTERNAL_FLAGS; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; @@ -2192,6 +2193,7 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, if (!sock) goto out; + flags &= ~MSG_INTERNAL_FLAGS; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); @@ -2579,6 +2581,7 @@ long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) return -EINVAL; + flags &= ~MSG_INTERNAL_FLAGS; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -2627,6 +2630,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; err = 0; + flags &= ~MSG_INTERNAL_FLAGS; flags |= MSG_BATCH; while (datagrams < vlen) { @@ -2775,6 +2779,7 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, struct user_msghdr __user *umsg, struct sockaddr __user *uaddr, unsigned int flags) { + flags &= ~MSG_INTERNAL_FLAGS; return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0); } @@ -2787,6 +2792,7 @@ long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) return -EINVAL; + flags &= ~MSG_INTERNAL_FLAGS; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -2839,6 +2845,7 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, goto out_put; } } + flags &= ~MSG_INTERNAL_FLAGS; entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg;