Re: [RFC PATCH] io_uring: add support for IORING_OP_GETDENTS64

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Jan 24, 2021 at 10:21:38PM +0000, David Laight wrote:

> > One open question is whether IORING_OP_GETDENTS64 should be more like
> > pread(2) and allow passing in a starting offset to read from the
> > directory from.  (This would require some more surgery in fs/readdir.c.)
> 
> Since directories are seekable this ought to work.
> Modulo horrid issues with 32bit file offsets.

The incremental patch below does this.  (It doesn't apply cleanly on
top of v1 of the IORING_OP_GETDENTS patch as I have other changes in
my tree -- I'm including it just to illustrate the changes that would
make this work.)

This change seems to work, and makes IORING_OP_GETDENTS take an
explicitly specified directory offset (instead of using the file's
->f_pos), making it more like pread(2), and I like the change from
a conceptual point of view, but it's a bit ugly around
iterate_dir_use_ctx_pos().  Any thoughts on how to do this more
cleanly (without breaking iterate_dir() semantics)?


> You'd need to return the final offset to allow another
> read to continue from the end position.

We can use the ->d_off value as returned in the last struct
linux_dirent64 as the directory offset to continue reading from
with the next IORING_OP_GETDENTS call, illustrated by the patch
to uringfind.c at the bottom.



diff --git a/fs/io_uring.c b/fs/io_uring.c
index 13dd29f8ebb3..0f9707ed9294 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -576,6 +576,7 @@ struct io_getdents {
 	struct file			*file;
 	struct linux_dirent64 __user	*dirent;
 	unsigned int			count;
+	loff_t				pos;
 };
 
 struct io_completion {
@@ -4584,9 +4585,10 @@ static int io_getdents_prep(struct io_kiocb *req,
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
-	if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index)
+	if (sqe->ioprio || sqe->rw_flags || sqe->buf_index)
 		return -EINVAL;
 
+	getdents->pos = READ_ONCE(sqe->off);
 	getdents->dirent = u64_to_user_ptr(READ_ONCE(sqe->addr));
 	getdents->count = READ_ONCE(sqe->len);
 	return 0;
@@ -4601,7 +4603,8 @@ static int io_getdents(struct io_kiocb *req, bool force_nonblock)
 	if (force_nonblock)
 		return -EAGAIN;
 
-	ret = vfs_getdents(req->file, getdents->dirent, getdents->count);
+	ret = vfs_getdents(req->file, getdents->dirent, getdents->count,
+			   &getdents->pos);
 	if (ret < 0) {
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
diff --git a/fs/readdir.c b/fs/readdir.c
index f52167c1eb61..d6bd78f6350a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -37,7 +37,7 @@
 } while (0)
 
 
-int iterate_dir(struct file *file, struct dir_context *ctx)
+int iterate_dir_use_ctx_pos(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
 	bool shared = false;
@@ -60,12 +60,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
 
 	res = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
-		ctx->pos = file->f_pos;
 		if (shared)
 			res = file->f_op->iterate_shared(file, ctx);
 		else
 			res = file->f_op->iterate(file, ctx);
-		file->f_pos = ctx->pos;
 		fsnotify_access(file);
 		file_accessed(file);
 	}
@@ -76,6 +74,17 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
 out:
 	return res;
 }
+
+int iterate_dir(struct file *file, struct dir_context *ctx)
+{
+	int res;
+
+	ctx->pos = file->f_pos;
+	res = iterate_dir_use_ctx_pos(file, ctx);
+	file->f_pos = ctx->pos;
+
+	return res;
+}
 EXPORT_SYMBOL(iterate_dir);
 
 /*
@@ -349,16 +358,18 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 }
 
 int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
-		 unsigned int count)
+		 unsigned int count, loff_t *pos)
 {
 	struct getdents_callback64 buf = {
 		.ctx.actor = filldir64,
+		.ctx.pos = *pos,
 		.count = count,
 		.current_dir = dirent
 	};
 	int error;
 
-	error = iterate_dir(file, &buf.ctx);
+	error = iterate_dir_use_ctx_pos(file, &buf.ctx);
+	*pos = buf.ctx.pos;
 	if (error >= 0)
 		error = buf.error;
 	if (buf.prev_reclen) {
@@ -384,7 +395,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
 	if (!f.file)
 		return -EBADF;
 
-	error = vfs_getdents(f.file, dirent, count);
+	error = vfs_getdents(f.file, dirent, count, &f.file->f_pos);
 	fdput_pos(f);
 	return error;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 114885d3f6c4..4d9d96163f92 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3107,11 +3107,12 @@ const char *simple_get_link(struct dentry *, struct inode *,
 			    struct delayed_call *);
 extern const struct inode_operations simple_symlink_inode_operations;
 
+extern int iterate_dir_use_ctx_pos(struct file *, struct dir_context *);
 extern int iterate_dir(struct file *, struct dir_context *);
 
 struct linux_dirent64;
 int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
-		 unsigned int count);
+		 unsigned int count, loff_t *pos);
 
 int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
 		int flags);



Corresponding uringfind.c change:

diff --git a/uringfind.c b/uringfind.c
index 4282296..e140388 100644
--- a/uringfind.c
+++ b/uringfind.c
@@ -22,9 +22,10 @@ struct linux_dirent64 {
 };
 
 static inline void io_uring_prep_getdents(struct io_uring_sqe *sqe, int fd,
-					  void *buf, unsigned int count)
+					  void *buf, unsigned int count,
+					  uint64_t off)
 {
-	io_uring_prep_rw(IORING_OP_GETDENTS, sqe, fd, buf, count, 0);
+	io_uring_prep_rw(IORING_OP_GETDENTS, sqe, fd, buf, count, off);
 }
 
 
@@ -38,6 +39,7 @@ struct dir {
 
 	struct dir	*parent;
 	int		fd;
+	uint64_t	off;
 	uint8_t		buf[16384];
 	char		name[0];
 };
@@ -131,7 +133,8 @@ static void schedule_readdir(struct dir *dir)
 	struct io_uring_sqe *sqe;
 
 	sqe = get_sqe();
-	io_uring_prep_getdents(sqe, dir->fd, dir->buf, sizeof(dir->buf));
+	io_uring_prep_getdents(sqe, dir->fd, dir->buf, sizeof(dir->buf),
+			       dir->off);
 	io_uring_sqe_set_data(sqe, dir);
 }
 
@@ -145,6 +148,7 @@ static void opendir_completion(struct dir *dir, int ret)
 	}
 
 	dir->fd = ret;
+	dir->off = 0;
 	schedule_readdir(dir);
 }
 
@@ -179,6 +183,7 @@ static void readdir_completion(struct dir *dir, int ret)
 				schedule_opendir(dir, dent->d_name);
 		}
 
+		dir->off = dent->d_off;
 		bufp += dent->d_reclen;
 	}
 



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux