When user applis programming mode, like sumbit one sqe and wait its completion event, __io_uring_get_cqe() will result in many unnecessary syscalls, see below test program: int main(int argc, char *argv[]) { struct io_uring ring; int fd, ret; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec iov; off_t offset, filesize = 0; void *buf; if (argc < 2) { printf("%s: file\n", argv[0]); return 1; } ret = io_uring_queue_init(4, &ring, 0); if (ret < 0) { fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return 1; } fd = open(argv[1], O_RDONLY | O_DIRECT); if (fd < 0) { perror("open"); return 1; } if (posix_memalign(&buf, 4096, 4096)) return 1; iov.iov_base = buf; iov.iov_len = 4096; offset = 0; do { sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("here\n"); break; } io_uring_prep_readv(sqe, fd, &iov, 1, offset); ret = io_uring_submit(&ring); if (ret < 0) { fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe: %s\n", strerror(-ret)); return 1; } if (cqe->res <= 0) { if (cqe->res < 0) { fprintf(stderr, "got eror: %d\n", cqe->res); ret = 1; } io_uring_cqe_seen(&ring, cqe); break; } offset += cqe->res; filesize += cqe->res; io_uring_cqe_seen(&ring, cqe); } while (1); printf("filesize: %ld\n", filesize); close(fd); io_uring_queue_exit(&ring); return 0; } dd if=/dev/zero of=testfile bs=4096 count=16 ./test testfile and use bpftrace to trace io_uring_enter syscalls, in original codes, [lege@localhost ~]$ sudo bpftrace -e "tracepoint:syscalls:sys_enter_io_uring_enter {@c[tid] = count();}" Attaching 1 probe... @c[11184]: 49 Above test issues 49 syscalls, it's counterintuitive. After looking into the codes, it's because __io_uring_get_cqe issue one more syscall, indded when __io_uring_get_cqe issues the first syscall, one cqe should already be ready, we don't need to wait again. To fix this issue, after the first syscall, set wait_nr to be zero, with tihs patch, bpftrace shows the number of io_uring_enter syscall is 33. Signed-off-by: Xiaoguang Wang <xiaoguang.wang@xxxxxxxxxxxxxxxxx> --- src/queue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/queue.c b/src/queue.c index ef2cc2b..99a4a0c 100644 --- a/src/queue.c +++ b/src/queue.c @@ -53,6 +53,8 @@ int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, if (wait_nr || submit) ret = __sys_io_uring_enter(ring->ring_fd, submit, wait_nr, flags, sigmask); + if (wait_nr) + wait_nr = 0; if (ret < 0) err = -errno; submit -= ret; -- 2.17.2