On 30/03/2021 12:19, Lennert Buytenhek wrote: > IORING_OP_GETDENTS behaves much like getdents64(2) and takes the same > arguments, but with a small twist: it takes an additional offset > argument, and reading from the specified directory starts at the given > offset. > > Internally, if necessary, IORING_OP_GETDENTS will vfs_llseek() to > the right directory position before calling vfs_getdents(). > > For the first IORING_OP_GETDENTS call on a directory, the offset > parameter can be set to zero, and for subsequent calls, it can be > set to the ->d_off field of the last struct linux_dirent64 returned > by the previous IORING_OP_GETDENTS call. I still consider this API being quite a mess. In particular, changing file->pos even with specified offset is neither convenient for users nor good performance-wise, and just looks weird. I haven't been following the last discussion, but iirc Matthew proposed how to do it right. If you want to "get it done quick", just seek position back after doing your stuff, because once this patch is merged we have to maintain the behaviour. > Alternatively, specifying an offset argument of -1 will read from > the directory's current file offset (IORING_FEAT_RW_CUR_POS). > > Signed-off-by: Lennert Buytenhek <buytenh@xxxxxxxxxxxxxx> > --- > fs/io_uring.c | 66 +++++++++++++++++++++++++++++++++++ > include/uapi/linux/io_uring.h | 1 + > 2 files changed, 67 insertions(+) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index f4ff3da821a5..90637d5a34b9 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -670,6 +670,13 @@ struct io_mkdir { > struct filename *filename; > }; > > +struct io_getdents { > + struct file *file; > + struct linux_dirent64 __user *dirent; > + unsigned int count; > + loff_t pos; > +}; > + > struct io_completion { > struct file *file; > struct list_head list; > @@ -811,6 +818,7 @@ struct io_kiocb { > struct io_rename rename; > struct io_unlink unlink; > struct io_mkdir mkdir; > + struct io_getdents getdents; > /* use only after cleaning per-op data, see io_clean_op() */ > struct io_completion compl; > }; > @@ -1025,6 +1033,9 @@ static const struct io_op_def io_op_defs[] = { > [IORING_OP_RENAMEAT] = {}, > [IORING_OP_UNLINKAT] = {}, > [IORING_OP_MKDIRAT] = {}, > + [IORING_OP_GETDENTS] = { > + .needs_file = 1, > + }, > }; > > static bool io_disarm_next(struct io_kiocb *req); > @@ -4314,6 +4325,56 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) > return 0; > } > > +static int io_getdents_prep(struct io_kiocb *req, > + const struct io_uring_sqe *sqe) > +{ > + struct io_getdents *getdents = &req->getdents; > + > + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) > + return -EINVAL; > + if (sqe->ioprio || sqe->rw_flags || sqe->buf_index) > + return -EINVAL; > + > + getdents->pos = READ_ONCE(sqe->off); > + getdents->dirent = u64_to_user_ptr(READ_ONCE(sqe->addr)); > + getdents->count = READ_ONCE(sqe->len); > + return 0; > +} > + > +static int io_getdents(struct io_kiocb *req, unsigned int issue_flags) > +{ > + struct io_getdents *getdents = &req->getdents; > + int ret = 0; > + > + /* getdents always requires a blocking context */ > + if (issue_flags & IO_URING_F_NONBLOCK) > + return -EAGAIN; > + > + /* for vfs_llseek and to serialize ->iterate_shared() on this file */ > + mutex_lock(&req->file->f_pos_lock); > + > + if (getdents->pos != -1 && getdents->pos != req->file->f_pos) { > + loff_t res = vfs_llseek(req->file, getdents->pos, SEEK_SET); > + if (res < 0) > + ret = res; > + } > + > + if (ret == 0) { > + ret = vfs_getdents(req->file, getdents->dirent, > + getdents->count); > + } > + > + mutex_unlock(&req->file->f_pos_lock); > + > + if (ret < 0) { > + if (ret == -ERESTARTSYS) > + ret = -EINTR; > + req_set_fail_links(req); > + } > + io_req_complete(req, ret); > + return 0; > +} > + > #if defined(CONFIG_NET) > static int io_setup_async_msg(struct io_kiocb *req, > struct io_async_msghdr *kmsg) > @@ -5991,6 +6052,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > return io_unlinkat_prep(req, sqe); > case IORING_OP_MKDIRAT: > return io_mkdirat_prep(req, sqe); > + case IORING_OP_GETDENTS: > + return io_getdents_prep(req, sqe); > } > > printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", > @@ -6253,6 +6316,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) > case IORING_OP_MKDIRAT: > ret = io_mkdirat(req, issue_flags); > break; > + case IORING_OP_GETDENTS: > + ret = io_getdents(req, issue_flags); > + break; > default: > ret = -EINVAL; > break; > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index cf26a94ab880..0693a6e4d6bb 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -138,6 +138,7 @@ enum { > IORING_OP_RENAMEAT, > IORING_OP_UNLINKAT, > IORING_OP_MKDIRAT, > + IORING_OP_GETDENTS, > > /* this goes last, obviously */ > IORING_OP_LAST, > -- Pavel Begunkov