On 8/23/24 17:27, Mark Harmstone wrote:
Adds an io_uring interface for asynchronous encoded reads, using the
same interface as for the ioctl. To use this you would use an SQE opcode
of IORING_OP_URING_CMD, the cmd_op would be BTRFS_IOC_ENCODED_READ, and
addr would point to the userspace address of the
btrfs_ioctl_encoded_io_args struct. As with the ioctl, you need to have
CAP_SYS_ADMIN for this to work.
Signed-off-by: Mark Harmstone <maharmstone@xxxxxx>
---
...
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index a5d786c6d7d4..62e5757d3ba2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -620,6 +620,8 @@ struct btrfs_encoded_read_private {
struct btrfs_ioctl_encoded_io_args args;
struct file *file;
void __user *copy_out;
+ struct io_uring_cmd *cmd;
+ unsigned int issue_flags;
};
...
+static inline struct btrfs_encoded_read_private *btrfs_uring_encoded_read_pdu(
+ struct io_uring_cmd *cmd)
+{
+ return *(struct btrfs_encoded_read_private **)cmd->pdu;
+}
+static void btrfs_finish_uring_encoded_read(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ struct btrfs_encoded_read_private *priv;
+ ssize_t ret;
+
+ priv = btrfs_uring_encoded_read_pdu(cmd);
+ ret = btrfs_encoded_read_finish(priv, -EIOCBQUEUED);
+
+ io_uring_cmd_done(priv->cmd, ret, 0, priv->issue_flags);
s/priv->issue_flags/issue_flags/
You can't use flags that you got somewhere before from a completely
different execution context.
+
+ kfree(priv);
+}
+
+static void btrfs_encoded_read_uring_endio(struct btrfs_bio *bbio)
+{
+ struct btrfs_encoded_read_private *priv = bbio->private;
Might be cleaner if you combine it with btrfs_encoded_read_ioctl_endio
btrfs_encoded_read_endio()
{
...
if (!atomic_dec_return(&priv->pending)) {
if (priv->cmd)
io_uring_cmd_complete_in_task();
else
wake_up();
...
}
+
+ if (bbio->bio.bi_status) {
+ /*
+ * The memory barrier implied by the atomic_dec_return() here
+ * pairs with the memory barrier implied by the
+ * atomic_dec_return() or io_wait_event() in
+ * btrfs_encoded_read_regular_fill_pages() to ensure that this
+ * write is observed before the load of status in
+ * btrfs_encoded_read_regular_fill_pages().
+ */
+ WRITE_ONCE(priv->status, bbio->bio.bi_status);
+ }
+ if (!atomic_dec_return(&priv->pending)) {
+ io_uring_cmd_complete_in_task(priv->cmd,
+ btrfs_finish_uring_encoded_read);
+ }
+ bio_put(&bbio->bio);
+}
+
static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 file_offset, u64 disk_bytenr,
u64 disk_io_size,
@@ -9106,11 +9148,16 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long i = 0;
struct btrfs_bio *bbio;
+ btrfs_bio_end_io_t cb;
- init_waitqueue_head(&priv->wait);
+ if (priv->cmd) {
+ cb = btrfs_encoded_read_uring_endio;
+ } else {
+ init_waitqueue_head(&priv->wait);
+ cb = btrfs_encoded_read_ioctl_endio;
+ }
- bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
- btrfs_encoded_read_endio, priv);
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, cb, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->inode = inode;
@@ -9122,7 +9169,7 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
btrfs_submit_bio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
- btrfs_encoded_read_endio, priv);
+ cb, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->inode = inode;
continue;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c1886209933a..85a512a9ca64 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
...
+static void btrfs_uring_encoded_read(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ ssize_t ret;
+ void __user *argp = (void __user *)(uintptr_t)cmd->sqe->addr;
u64_to_user_ptr()
+ struct btrfs_encoded_read_private *priv;
+ bool compat = issue_flags & IO_URING_F_COMPAT;
+
+ priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto out_uring;
+ }
+
+ ret = btrfs_prepare_encoded_read(priv, cmd->file, compat, argp);
+ if (ret)
+ goto out_finish;
+
+ if (iov_iter_count(&priv->iter) == 0) {
+ ret = 0;
+ goto out_finish;
+ }
+
+ *(struct btrfs_encoded_read_private **)cmd->pdu = priv;
+ priv->cmd = cmd;
+ priv->issue_flags = issue_flags;
You shouldn't be stashing issue_flags, it almost always wrong.
Looks btrfs_finish_uring_encoded_read() is the only user, and
with my comment above you can kill priv->issue_flags
+ ret = btrfs_encoded_read(priv);
+
+ if (ret == -EIOCBQUEUED && atomic_dec_return(&priv->pending))
+ return;
Is gating on -EIOCBQUEUED just an optimisation? I.e. it's fine
to swap checks
if (atomic_dec_return(&priv->pending) && ret == -EIOCBQUEUED)
but we know that unless it returned -EIOCBQUEUED nobody
should've touched ->pending.
+
+out_finish:
+ ret = btrfs_encoded_read_finish(priv, ret);
+ kfree(priv);
+
+out_uring:
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
+}
+
--
Pavel Begunkov