When user applis programming mode, like sumbit one sqe and wait its
completion event, __io_uring_get_cqe() will result in many unnecessary
syscalls, see below test program:
int main(int argc, char *argv[])
{
struct io_uring ring;
int fd, ret;
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
struct iovec iov;
off_t offset, filesize = 0;
void *buf;
if (argc < 2) {
printf("%s: file\n", argv[0]);
return 1;
}
ret = io_uring_queue_init(4, &ring, 0);
if (ret < 0) {
fprintf(stderr, "queue_init: %s\n", strerror(-ret));
return 1;
}
fd = open(argv[1], O_RDONLY | O_DIRECT);
if (fd < 0) {
perror("open");
return 1;
}
if (posix_memalign(&buf, 4096, 4096))
return 1;
iov.iov_base = buf;
iov.iov_len = 4096;
offset = 0;
do {
sqe = io_uring_get_sqe(&ring);
if (!sqe) {
printf("here\n");
break;
}
io_uring_prep_readv(sqe, fd, &iov, 1, offset);
ret = io_uring_submit(&ring);
if (ret < 0) {
fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret));
return 1;
}
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret < 0) {
fprintf(stderr, "io_uring_wait_cqe: %s\n", strerror(-ret));
return 1;
}
if (cqe->res <= 0) {
if (cqe->res < 0) {
fprintf(stderr, "got eror: %d\n", cqe->res);
ret = 1;
}
io_uring_cqe_seen(&ring, cqe);
break;
}
offset += cqe->res;
filesize += cqe->res;
io_uring_cqe_seen(&ring, cqe);
} while (1);
printf("filesize: %ld\n", filesize);
close(fd);
io_uring_queue_exit(&ring);
return 0;
}
dd if=/dev/zero of=testfile bs=4096 count=16
./test testfile
and use bpftrace to trace io_uring_enter syscalls, in original codes,
[lege@localhost ~]$ sudo bpftrace -e "tracepoint:syscalls:sys_enter_io_uring_enter {@c[tid] = count();}"
Attaching 1 probe...
@c[11184]: 49
Above test issues 49 syscalls, it's counterintuitive. After looking
into the codes, it's because __io_uring_get_cqe issue one more syscall,
indded when __io_uring_get_cqe issues the first syscall, one cqe should
already be ready, we don't need to wait again.
To fix this issue, after the first syscall, set wait_nr to be zero, with
tihs patch, bpftrace shows the number of io_uring_enter syscall is 33.