Re: [RFC] what to do with IOCB_DSYNC?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/22/22 7:28 PM, Jens Axboe wrote:
> On 5/22/22 7:22 PM, Jens Axboe wrote:
>> On 5/22/22 6:42 PM, Al Viro wrote:
>>> On Sun, May 22, 2022 at 02:03:35PM -0600, Jens Axboe wrote:
>>>
>>>> Right, I'm saying it's not _immediately_ clear which cases are what when
>>>> reading the code.
>>>>
>>>>> up a while ago.  And no, turning that into indirect calls ended up with
>>>>> arseloads of overhead, more's the pity...
>>>>
>>>> It's a shame, since indirect calls make for nicer code, but it's always
>>>> been slower and these days even more so.
>>>>
>>>>> Anyway, at the moment I have something that builds; hadn't tried to
>>>>> boot it yet.
>>>>
>>>> Nice!
>>>
>>> Boots and survives LTP and xfstests...  Current variant is in
>>> vfs.git#work.iov_iter (head should be at 27fa77a9829c).  I have *not*
>>> looked into the code generation in primitives; the likely/unlikely on
>>> those cascades of ifs need rethinking.
>>
>> I noticed too. Haven't fiddled much in iov_iter.c, but for uio.h I had
>> the below. iov_iter.c is a worse "offender" though, with 53 unlikely and
>> 22 likely annotations...
> 
> Here it is...

Few more, most notably making sure that dio dirties reads even if they
are not of the iovec type.

Last two just add a helper for import_ubuf() and then adopts it for
io_uring send/recv which is also a hot path. The single range read/write
can be converted too, but that needs a bit more work...

-- 
Jens Axboe
From d05b59dc10e6c7c74da439561831f744196eb412 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@xxxxxxxxx>
Date: Sun, 22 May 2022 19:49:36 -0600
Subject: [PATCH 4/4] io_uring: use import_ubuf() for send/recv

We just have the single buffer, no point in using ITER_IOVEC when we
can be using the more efficient ITER_UBUF instead.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 fs/io_uring.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 73f53c208df2..91255f2326e5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5272,7 +5272,6 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_sr_msg *sr = &req->sr_msg;
 	struct msghdr msg;
-	struct iovec iov;
 	struct socket *sock;
 	unsigned flags;
 	int min_ret = 0;
@@ -5282,7 +5281,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
 	if (unlikely(!sock))
 		return -ENOTSOCK;
 
-	ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
+	ret = import_ubuf(WRITE, sr->buf, sr->len, &msg.msg_iter);
 	if (unlikely(ret))
 		return ret;
 
@@ -5521,7 +5520,6 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 	struct msghdr msg;
 	void __user *buf = sr->buf;
 	struct socket *sock;
-	struct iovec iov;
 	unsigned flags;
 	int ret, min_ret = 0;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -5537,7 +5535,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 		buf = u64_to_user_ptr(kbuf->addr);
 	}
 
-	ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
+	ret = import_ubuf(READ, buf, sr->len, &msg.msg_iter);
 	if (unlikely(ret))
 		goto out_free;
 
-- 
2.35.1

From 5786b1db0bf43313092e61797f639290079b2709 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@xxxxxxxxx>
Date: Sun, 22 May 2022 19:47:29 -0600
Subject: [PATCH 3/4] iov: add import_ubuf()

Like import_single_range(), but for ITER_UBUF.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 include/linux/uio.h |  1 +
 lib/iov_iter.c      | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index d10c19a650a8..4e473ea90b20 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -331,6 +331,7 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
 		 struct iov_iter *i, bool compat);
 int import_single_range(int type, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i);
+int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i);
 
 static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
 			void __user *buf, size_t count)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index aa41394174eb..348412757335 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -2167,6 +2167,17 @@ int import_single_range(int rw, void __user *buf, size_t len,
 }
 EXPORT_SYMBOL(import_single_range);
 
+int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
+{
+	if (len > MAX_RW_COUNT)
+		len = MAX_RW_COUNT;
+	if (unlikely(!access_ok(buf, len)))
+		return -EFAULT;
+
+	iov_iter_ubuf(i, rw, buf, len);
+	return 0;
+}
+
 /**
  * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
  *     iov_iter_save_state() was called.
-- 
2.35.1

From cc8bfd64c1a8d754230809a56bf23bcadeabd63f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@xxxxxxxxx>
Date: Sun, 22 May 2022 19:37:33 -0600
Subject: [PATCH 2/4] Switch iter_is_iovec() checks for user memory with
 iter_is_user()

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 block/fops.c         | 7 +++----
 fs/direct-io.c       | 2 +-
 fs/iomap/direct-io.c | 2 +-
 fs/nfs/direct.c      | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index b1f7c4111458..15788a36983a 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -77,8 +77,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 
 	if (iov_iter_rw(iter) == READ) {
 		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
-		if (iter_is_iovec(iter))
-			should_dirty = true;
+		should_dirty = iter_is_user(iter);
 	} else {
 		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
 	}
@@ -217,7 +216,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	}
 
 	dio->size = 0;
-	if (is_read && iter_is_iovec(iter))
+	if (is_read && iter_is_user(iter))
 		dio->flags |= DIO_SHOULD_DIRTY;
 
 	blk_start_plug(&plug);
@@ -346,7 +345,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	dio->size = bio->bi_iter.bi_size;
 
 	if (is_read) {
-		if (iter_is_iovec(iter)) {
+		if (iter_is_user(iter)) {
 			dio->flags |= DIO_SHOULD_DIRTY;
 			bio_set_pages_dirty(bio);
 		}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 56dc5a7ad119..5cfa53e0783f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1246,7 +1246,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
 
-	dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
+	dio->should_dirty = iter_is_user(iter) && iov_iter_rw(iter) == READ;
 	sdio.iter = iter;
 	sdio.final_block_in_request = end >> blkbits;
 
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 33b94e33189a..7d212bde101a 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -523,7 +523,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			iomi.flags |= IOMAP_NOWAIT;
 		}
 
-		if (iter_is_iovec(iter))
+		if (iter_is_user(iter))
 			dio->flags |= IOMAP_DIO_DIRTY;
 	} else {
 		iomi.flags |= IOMAP_WRITE;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 11c566d8769f..b6125d5a25c6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -481,7 +481,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	if (iter_is_iovec(iter))
+	if (iter_is_user(iter))
 		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
 
 	if (!swap)
-- 
2.35.1

From 992e03e0bd87801c35f5c6d8855396fc481387b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@xxxxxxxxx>
Date: Sun, 22 May 2022 19:34:45 -0600
Subject: [PATCH 1/4] iov: add iter_is_user()

Returns true if the iter is holding user memory. Use that for the
might_fault() checks.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 include/linux/uio.h | 5 +++++
 lib/iov_iter.c      | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 52baa3c89505..d10c19a650a8 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -112,6 +112,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 	return i->data_source ? WRITE : READ;
 }
 
+static inline bool iter_is_user(const struct iov_iter *i)
+{
+	return iter_is_iovec(i) || iter_is_ubuf(i);
+}
+
 /*
  * Total number of bytes covered by an iovec.
  *
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 08296769f5c6..aa41394174eb 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -767,7 +767,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i) || iter_is_ubuf(i))
+	if (iter_is_user(i))
 		might_fault();
 	iterate_and_advance(i, bytes, base, len, off,
 		copyout(base, addr + off, len),
@@ -849,7 +849,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_mc_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i) || iter_is_ubuf(i))
+	if (iter_is_user(i))
 		might_fault();
 	__iterate_and_advance(i, bytes, base, len, off,
 		copyout_mc(base, addr + off, len),
@@ -867,7 +867,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 		WARN_ON(1);
 		return 0;
 	}
-	if (iter_is_iovec(i) || iter_is_ubuf(i))
+	if (iter_is_user(i))
 		might_fault();
 	iterate_and_advance(i, bytes, base, len, off,
 		copyin(addr + off, base, len),
-- 
2.35.1


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux