On Thu, Jan 23, 2025 at 1:18 AM Jens Axboe <axboe@xxxxxxxxx> wrote: > On 1/22/25 12:38 PM, Jens Axboe wrote: > > > > On Tue, 21 Jan 2025 17:09:59 +0100, Jann Horn wrote: > >> cmd->sqe seems to point to shared memory here; so values should only be > >> read from it with READ_ONCE(). To ensure that the compiler won't generate > >> code that assumes the value in memory will stay constant, add a > >> READ_ONCE(). > >> The callees io_uring_cmd_getsockopt() and io_uring_cmd_setsockopt() already > >> do this correctly. > >> > >> [...] > > > > Applied, thanks! > > > > [1/1] io_uring/uring_cmd: add missing READ_ONCE() on shared memory read > > commit: 0963dba3dc006b454c54fd019bbbdb931e7a7c70 > > I took a closer look and this isn't necessary. Either ->sqe is a full > copy at this point. Should probably be renamed as such... If we want to > make this clearer, then we should do: Are you sure? On mainline (at commit 21266b8df522), I applied the attached diff that basically adds some printf debugging and adds this in io_uring_cmd_sock(): pr_warn("%s: [first read] cmd->sqe->cmd_op = 0x%x\n", __func__, READ_ONCE(cmd->sqe->cmd_op)); mdelay(2000); pr_warn("%s: [second read] cmd->sqe->cmd_op = 0x%x\n", __func__, READ_ONCE(cmd->sqe->cmd_op)); Then I ran the attached testcase, which submits a SQE and then modifies the ->cmd_op of the SQE while it is being submitted. Resulting dmesg output, showing that cmd->sqe->cmd_op changes when userspace modifies the SQE: [ 180.415944][ T1110] io_submit_sqes: SQE = ffff888010bcc000 [ 180.418731][ T1110] io_submit_sqe: SQE = ffff888010bcc000 [ 180.421191][ T1110] io_queue_sqe [ 180.422160][ T1110] io_issue_sqe [ 180.423101][ T1110] io_uring_cmd: SQE = ffff888010bcc000 [ 180.424570][ T1110] io_uring_cmd_sock: cmd->sqe = ffff888010bcc000 [ 180.426272][ T1110] io_uring_cmd_sock: [first read] cmd->sqe->cmd_op = 0x1234 [ 182.429036][ T1110] io_uring_cmd_sock: [second read] cmd->sqe->cmd_op = 0x5678
#define _GNU_SOURCE #include <pthread.h> #include <err.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <poll.h> #include <unistd.h> #include <sys/mman.h> #include <sys/prctl.h> #include <sys/eventfd.h> #include <sys/syscall.h> #include <sys/socket.h> #include <netinet/in.h> #include <netinet/tcp.h> #include <linux/io_uring.h> #define SYSCHK(x) ({ \ typeof(x) __res = (x); \ if (__res == (typeof(x))-1) \ err(1, "SYSCHK(" #x ")"); \ __res; \ }) #define NUM_SQ_PAGES 4 static int uring_fd; static struct io_uring_sqe *sq_data; static void *thread_fn(void *dummy) { sleep(1); sq_data->cmd_op = 0x5678; return NULL; } int main(void) { printf("main pid: %d\n", getpid()); // sq sq_data = SYSCHK(mmap(NULL, NUM_SQ_PAGES*0x1000, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0)); // cq void *cq_data = SYSCHK(mmap(NULL, NUM_SQ_PAGES*0x1000, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0)); *(volatile unsigned int *)(cq_data+4) = 64 * NUM_SQ_PAGES; // initialize uring struct io_uring_params params = { .flags = IORING_SETUP_DEFER_TASKRUN|IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_NO_MMAP|IORING_SETUP_NO_SQARRAY, .sq_off = { .user_addr = (unsigned long)sq_data }, .cq_off = { .user_addr = (unsigned long)cq_data } }; uring_fd = SYSCHK(syscall(__NR_io_uring_setup, /*entries=*/10, ¶ms)); int sockfd = SYSCHK(socket(AF_INET, SOCK_STREAM, 0)); sq_data[0] = (struct io_uring_sqe) { .opcode = IORING_OP_URING_CMD, .flags = 0, .ioprio = 0, .fd = sockfd, .cmd_op = 0x1234, .user_data = 123 }; pthread_t thread; if (pthread_create(&thread, NULL, thread_fn, NULL)) errx(1, "pthread_create"); int submitted = SYSCHK(syscall(__NR_io_uring_enter, uring_fd, /*to_submit=*/1, /*min_complete=*/1, /*flags=*/IORING_ENTER_GETEVENTS, /*sig=*/NULL, /*sigsz=*/0)); printf("submitted %d\n", submitted); return 0; }
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7bfbc7c22367..0ae830faf0d9 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1723,6 +1723,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) const struct cred *creds = NULL; int ret; + pr_warn("%s\n", __func__); + if (unlikely(!io_assign_file(req, def, issue_flags))) return -EBADF; @@ -1942,6 +1944,8 @@ static inline void io_queue_sqe(struct io_kiocb *req) { int ret; + pr_warn("%s\n", __func__); + ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); /* @@ -2159,6 +2163,8 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, struct io_submit_link *link = &ctx->submit_state.link; int ret; + pr_warn("%s: SQE = %px\n", __func__, sqe); + ret = io_init_req(ctx, req, sqe); if (unlikely(ret)) return io_submit_fail_init(sqe, req, ret); @@ -2187,6 +2193,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, } else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS | REQ_F_FORCE_ASYNC | REQ_F_FAIL))) { + pr_warn("%s: not queuing SQE %px, flags=0x%llx\n", __func__, sqe, req->flags); if (req->flags & IO_REQ_LINK_FLAGS) { link->head = req; link->last = req; @@ -2309,6 +2316,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) io_req_add_to_cache(req, ctx); break; } + pr_warn("%s: SQE = %px\n", __func__, sqe); /* * Continue submitting even for sqe failure if the diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index fc94c465a985..5d88c72d6a89 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -6,6 +6,7 @@ #include <linux/io_uring/net.h> #include <linux/security.h> #include <linux/nospec.h> +#include <linux/delay.h> #include <net/sock.h> #include <uapi/linux/io_uring.h> @@ -235,6 +236,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct file *file = req->file; int ret; + pr_warn("%s: SQE = %px\n", __func__, ioucmd->sqe); + if (!file->f_op->uring_cmd) return -EOPNOTSUPP; @@ -347,9 +350,15 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) struct proto *prot = READ_ONCE(sk->sk_prot); int ret, arg = 0; + pr_warn("%s: cmd->sqe = %px\n", __func__, cmd->sqe); + if (!prot || !prot->ioctl) return -EOPNOTSUPP; + pr_warn("%s: [first read] cmd->sqe->cmd_op = 0x%x\n", __func__, READ_ONCE(cmd->sqe->cmd_op)); + mdelay(2000); + pr_warn("%s: [second read] cmd->sqe->cmd_op = 0x%x\n", __func__, READ_ONCE(cmd->sqe->cmd_op)); + switch (cmd->sqe->cmd_op) { case SOCKET_URING_OP_SIOCINQ: ret = prot->ioctl(sk, SIOCINQ, &arg);