Hi, io_uring's openat seems to produce FDs that are incompatible with large files (>2GB). If a file (smaller than 2GB) is opened using io_uring's openat then writes -- both using io_uring and just sync pwrite() -- past that threshold fail with EFBIG. If such a file is opened with sync openat, then both io_uring's writes and sync writes succeed. And if the file is larger than 2GB then io_uring's openat fails right away, while the sync one works. Kernel versions: 5.6.0-rc2, 5.6.0. A couple of reproducers attached, one demos successful open with failed writes afterwards, and another failing open (in comparison with sync calls). The output of the former one for example: *** sync openat openat succeeded sync write at offset 0 write succeeded sync write at offset 4294967296 write succeeded *** sync openat openat succeeded io_uring write at offset 0 write succeeded io_uring write at offset 4294967296 write succeeded *** io_uring openat openat succeeded sync write at offset 0 write succeeded sync write at offset 4294967296 write failed: File too large *** io_uring openat openat succeeded io_uring write at offset 0 write succeeded io_uring write at offset 4294967296 write failed: File too large -- Dmitry
#include <liburing.h> #include <string.h> #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <errno.h> #include <sys/resource.h> #include <unistd.h> static const int RSIZE = 2; static const int OPEN_FLAGS = O_RDWR | O_CREAT; static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR; #define DIE(...) do {\ fprintf(stderr, __VA_ARGS__);\ abort();\ } while(0); void do_write(struct io_uring *ring, int sync, int fd, off_t offset) { fprintf(stderr, "%s write at offset %lld\n", sync ? "sync": "io_uring", offset); char buf[] = "some test write buf"; int res; if (sync) { res = pwrite(fd, buf, sizeof(buf), offset); if (res < 0) { res = -errno; } } else { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return; } io_uring_prep_write(sqe, fd, buf, sizeof(buf), offset); int ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "failed to submit write: %s\n", strerror(-ret)); return; } struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(ring, &cqe); res = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret < 0) { fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); return; } } if (res < 0) { fprintf(stderr, "write failed: %s\n", strerror(-res)); } else { fprintf(stderr, "write succeeded\n"); } } void test_open_write(struct io_uring *ring, int sync_open, int sync_write, int dfd, const char* fn) { fprintf(stderr, "\n*** %s openat\n", sync_open ? "sync" : "io_uring"); struct io_uring_sqe *sqe; int fd = -1; if (sync_open) { fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE); if (fd < 0) { fd = -errno; } } else { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return; } io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); int ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); return; } struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(ring, &cqe); fd = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret < 0) { fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); return; } } if (fd < 0) { fprintf(stderr, "openat failed: %s\n", strerror(-fd)); } else { fprintf(stderr, "openat succeeded\n"); do_write(ring, sync_write, fd, 0); do_write(ring, sync_write, fd, 1ull << 32); close(fd); } } int main() { int dfd = open("/tmp", O_RDONLY | O_DIRECTORY); if (dfd < 0) { DIE("open /tmp: %s\n", strerror(errno)); } struct io_uring ring; int ret = io_uring_queue_init(RSIZE, &ring, 0); if (ret < 0) { DIE("failed to init io_uring: %s\n", strerror(-ret)); } test_open_write(&ring, 1, 1, dfd, "io_uring_openat_write_test1"); test_open_write(&ring, 1, 0, dfd, "io_uring_openat_write_test2"); test_open_write(&ring, 0, 1, dfd, "io_uring_openat_write_test3"); test_open_write(&ring, 0, 0, dfd, "io_uring_openat_write_test4"); io_uring_queue_exit(&ring); close(dfd); return 0; }
#include <liburing.h> #include <string.h> #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <errno.h> #include <sys/resource.h> #include <unistd.h> #define DIE(...) do {\ fprintf(stderr, __VA_ARGS__);\ abort();\ } while(0); static const int RSIZE = 2; static const int OPEN_FLAGS = O_RDWR | O_CREAT; static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR; void open_sync(int dfd, const char* fn) { int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE); if (fd < 0) { fprintf(stderr, "sync open failed: %s\n", strerror(errno)); } else { fprintf(stderr, "sync open succeeded\n"); close(fd); } } void open_io_uring(struct io_uring *ring, int dfd, const char* fn) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return; } io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); int ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); return; } struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(ring, &cqe); int fd = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret < 0) { fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); } else if (fd < 0) { fprintf(stderr, "io_uring openat failed: %s\n", strerror(-fd)); } else { fprintf(stderr, "io_uring openat succeeded\n"); close(fd); } } int prepare_file(int dfd, const char* fn) { const char buf[] = "foo"; int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE); if (fd < 0) { fprintf(stderr, "prepare/open: %s\n", strerror(errno)); return -1; } int res = pwrite(fd, buf, sizeof(buf), 1ull << 32); if (res < 0) { fprintf(stderr, "prepare/pwrite: %s\n", strerror(errno)); } close(fd); return res < 0 ? res : 0; } int main() { const char *fn = "io_uring_openat_test"; int dfd = open("/tmp", O_RDONLY | O_DIRECTORY); if (dfd < 0) { DIE("open /tmp: %s\n", strerror(errno)); } struct io_uring ring; int ret = io_uring_queue_init(RSIZE, &ring, 0); if (ret < 0) { DIE("failed to init io_uring: %s\n", strerror(-ret)); } if (!prepare_file(dfd, fn)) { open_sync(dfd, fn); open_io_uring(&ring, dfd, fn); } io_uring_queue_exit(&ring); close(dfd); return 0; }