Re: [PATCH 6/6] btrfs: add io_uring interface for encoded reads

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 8/23/24 17:27, Mark Harmstone wrote:
Adds an io_uring interface for asynchronous encoded reads, using the
same interface as for the ioctl. To use this you would use an SQE opcode
of IORING_OP_URING_CMD, the cmd_op would be BTRFS_IOC_ENCODED_READ, and
addr would point to the userspace address of the
btrfs_ioctl_encoded_io_args struct. As with the ioctl, you need to have
CAP_SYS_ADMIN for this to work.

Signed-off-by: Mark Harmstone <maharmstone@xxxxxx>
---
...
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index a5d786c6d7d4..62e5757d3ba2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -620,6 +620,8 @@ struct btrfs_encoded_read_private {
  	struct btrfs_ioctl_encoded_io_args args;
  	struct file *file;
  	void __user *copy_out;
+	struct io_uring_cmd *cmd;
+	unsigned int issue_flags;
  };
...
+static inline struct btrfs_encoded_read_private *btrfs_uring_encoded_read_pdu(
+		struct io_uring_cmd *cmd)
+{
+	return *(struct btrfs_encoded_read_private **)cmd->pdu;
+}
+static void btrfs_finish_uring_encoded_read(struct io_uring_cmd *cmd,
+					    unsigned int issue_flags)
+{
+	struct btrfs_encoded_read_private *priv;
+	ssize_t ret;
+
+	priv = btrfs_uring_encoded_read_pdu(cmd);
+	ret = btrfs_encoded_read_finish(priv, -EIOCBQUEUED);
+
+	io_uring_cmd_done(priv->cmd, ret, 0, priv->issue_flags);

s/priv->issue_flags/issue_flags/

You can't use flags that you got somewhere before from a completely
different execution context.

+
+	kfree(priv);
+}
+
+static void btrfs_encoded_read_uring_endio(struct btrfs_bio *bbio)
+{
+	struct btrfs_encoded_read_private *priv = bbio->private;

Might be cleaner if you combine it with btrfs_encoded_read_ioctl_endio

btrfs_encoded_read_endio()
{
	...
	if (!atomic_dec_return(&priv->pending)) {
		if (priv->cmd)
			io_uring_cmd_complete_in_task();
		else
			wake_up();
	...
}



+
+	if (bbio->bio.bi_status) {
+		/*
+		 * The memory barrier implied by the atomic_dec_return() here
+		 * pairs with the memory barrier implied by the
+		 * atomic_dec_return() or io_wait_event() in
+		 * btrfs_encoded_read_regular_fill_pages() to ensure that this
+		 * write is observed before the load of status in
+		 * btrfs_encoded_read_regular_fill_pages().
+		 */
+		WRITE_ONCE(priv->status, bbio->bio.bi_status);
+	}
+	if (!atomic_dec_return(&priv->pending)) {
+		io_uring_cmd_complete_in_task(priv->cmd,
+					      btrfs_finish_uring_encoded_read);
+	}
+	bio_put(&bbio->bio);
+}
+
  static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
  					u64 file_offset, u64 disk_bytenr,
  					u64 disk_io_size,
@@ -9106,11 +9148,16 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
  	struct btrfs_fs_info *fs_info = inode->root->fs_info;
  	unsigned long i = 0;
  	struct btrfs_bio *bbio;
+	btrfs_bio_end_io_t cb;
- init_waitqueue_head(&priv->wait);
+	if (priv->cmd) {
+		cb = btrfs_encoded_read_uring_endio;
+	} else {
+		init_waitqueue_head(&priv->wait);
+		cb = btrfs_encoded_read_ioctl_endio;
+	}
- bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
-			       btrfs_encoded_read_endio, priv);
+	bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, cb, priv);
  	bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
  	bbio->inode = inode;
@@ -9122,7 +9169,7 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
  			btrfs_submit_bio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
-					       btrfs_encoded_read_endio, priv);
+					       cb, priv);
  			bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
  			bbio->inode = inode;
  			continue;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c1886209933a..85a512a9ca64 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
...
+static void btrfs_uring_encoded_read(struct io_uring_cmd *cmd,
+				     unsigned int issue_flags)
+{
+	ssize_t ret;
+	void __user *argp = (void __user *)(uintptr_t)cmd->sqe->addr;

u64_to_user_ptr()

+	struct btrfs_encoded_read_private *priv;
+	bool compat = issue_flags & IO_URING_F_COMPAT;
+
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto out_uring;
+	}
+
+	ret = btrfs_prepare_encoded_read(priv, cmd->file, compat, argp);
+	if (ret)
+		goto out_finish;
+
+	if (iov_iter_count(&priv->iter) == 0) {
+		ret = 0;
+		goto out_finish;
+	}
+
+	*(struct btrfs_encoded_read_private **)cmd->pdu = priv;
+	priv->cmd = cmd;
+	priv->issue_flags = issue_flags;

You shouldn't be stashing issue_flags, it almost always wrong.
Looks btrfs_finish_uring_encoded_read() is the only user, and
with my comment above you can kill priv->issue_flags

+	ret = btrfs_encoded_read(priv);
+
+	if (ret == -EIOCBQUEUED && atomic_dec_return(&priv->pending))
+		return;

Is gating on -EIOCBQUEUED just an optimisation? I.e. it's fine
to swap checks

if (atomic_dec_return(&priv->pending) && ret == -EIOCBQUEUED)

but we know that unless it returned -EIOCBQUEUED nobody
should've touched ->pending.

+
+out_finish:
+	ret = btrfs_encoded_read_finish(priv, ret);
+	kfree(priv);
+
+out_uring:
+	io_uring_cmd_done(cmd, ret, 0, issue_flags);
+}
+

--
Pavel Begunkov




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux