io_uring's openat doesn't work with large (2G+) files

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

io_uring's openat seems to produce FDs that are incompatible with
large files (>2GB). If a file (smaller than 2GB) is opened using
io_uring's openat then writes -- both using io_uring and just sync
pwrite() -- past that threshold fail with EFBIG. If such a file is
opened with sync openat, then both io_uring's writes and sync writes
succeed. And if the file is larger than 2GB then io_uring's openat
fails right away, while the sync one works.

Kernel versions: 5.6.0-rc2, 5.6.0.

A couple of reproducers attached, one demos successful open with
failed writes afterwards, and another failing open (in comparison with
sync  calls).

The output of the former one for example:

*** sync openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write succeeded

*** sync openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write succeeded

*** io_uring openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write failed: File too large

*** io_uring openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write failed: File too large

-- 
Dmitry
#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

void do_write(struct io_uring *ring, int sync, int fd, off_t offset)
{
	fprintf(stderr, "%s write at offset %lld\n", sync ? "sync": "io_uring", offset);
	char buf[] = "some test write buf";
	int res;
	if (sync) {
		res = pwrite(fd, buf, sizeof(buf), offset);
		if (res < 0) {
			res = -errno;
		}
	}
	else {
		struct io_uring_sqe *sqe;
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_write(sqe, fd, buf, sizeof(buf), offset);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit write: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		res = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (res < 0) {
		fprintf(stderr, "write failed: %s\n", strerror(-res));
	}
	else {
		fprintf(stderr, "write succeeded\n");
	}
}

void test_open_write(struct io_uring *ring, int sync_open, int sync_write, int dfd, const char* fn)
{
	fprintf(stderr, "\n*** %s openat\n", sync_open ? "sync" : "io_uring");
	struct io_uring_sqe *sqe;
	int fd = -1;
	if (sync_open) {
		fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
		if (fd < 0) {
			fd = -errno;
		}
	}
	else {
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		fd = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (fd < 0) {
		fprintf(stderr, "openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "openat succeeded\n");
		do_write(ring, sync_write, fd, 0);
		do_write(ring, sync_write, fd, 1ull << 32);
		close(fd);
	}
}

int main()
{
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	test_open_write(&ring, 1, 1, dfd, "io_uring_openat_write_test1");
	test_open_write(&ring, 1, 0, dfd, "io_uring_openat_write_test2");
	test_open_write(&ring, 0, 1, dfd, "io_uring_openat_write_test3");
	test_open_write(&ring, 0, 0, dfd, "io_uring_openat_write_test4");

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}
#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

void open_sync(int dfd, const char* fn)
{
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "sync open failed: %s\n", strerror(errno));
	}
	else {
		fprintf(stderr, "sync open succeeded\n");
		close(fd);
	}
}

void open_io_uring(struct io_uring *ring, int dfd, const char* fn)
{
	struct io_uring_sqe *sqe;
	sqe = io_uring_get_sqe(ring);
	if (!sqe) {
		fprintf(stderr, "failed to get sqe\n");
		return;
	}
	io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
	int ret = io_uring_submit(ring);
	if (ret < 0) {
		fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
		return;
	}

	struct io_uring_cqe *cqe;
	ret = io_uring_wait_cqe(ring, &cqe);
	int fd = cqe->res;
	io_uring_cqe_seen(ring, cqe);
	if (ret < 0) {
		fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
	}
	else if (fd < 0) {
		fprintf(stderr, "io_uring openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "io_uring openat succeeded\n");
		close(fd);
	}
}

int prepare_file(int dfd, const char* fn)
{
	const char buf[] = "foo";
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "prepare/open: %s\n", strerror(errno));
		return -1;
	}
	int res = pwrite(fd, buf, sizeof(buf), 1ull << 32);
	if (res < 0) {
		fprintf(stderr, "prepare/pwrite: %s\n", strerror(errno));
	}
	close(fd);
	return res < 0 ? res : 0;
}

int main()
{
	const char *fn = "io_uring_openat_test";
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	if (!prepare_file(dfd, fn)) {
		open_sync(dfd, fn);
		open_io_uring(&ring, dfd, fn);
	}

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}

[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux