On 5/22/22 7:28 PM, Jens Axboe wrote: > On 5/22/22 7:22 PM, Jens Axboe wrote: >> On 5/22/22 6:42 PM, Al Viro wrote: >>> On Sun, May 22, 2022 at 02:03:35PM -0600, Jens Axboe wrote: >>> >>>> Right, I'm saying it's not _immediately_ clear which cases are what when >>>> reading the code. >>>> >>>>> up a while ago. And no, turning that into indirect calls ended up with >>>>> arseloads of overhead, more's the pity... >>>> >>>> It's a shame, since indirect calls make for nicer code, but it's always >>>> been slower and these days even more so. >>>> >>>>> Anyway, at the moment I have something that builds; hadn't tried to >>>>> boot it yet. >>>> >>>> Nice! >>> >>> Boots and survives LTP and xfstests... Current variant is in >>> vfs.git#work.iov_iter (head should be at 27fa77a9829c). I have *not* >>> looked into the code generation in primitives; the likely/unlikely on >>> those cascades of ifs need rethinking. >> >> I noticed too. Haven't fiddled much in iov_iter.c, but for uio.h I had >> the below. iov_iter.c is a worse "offender" though, with 53 unlikely and >> 22 likely annotations... > > Here it is... Few more, most notably making sure that dio dirties reads even if they are not of the iovec type. Last two just add a helper for import_ubuf() and then adopts it for io_uring send/recv which is also a hot path. The single range read/write can be converted too, but that needs a bit more work... -- Jens Axboe
From d05b59dc10e6c7c74da439561831f744196eb412 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@xxxxxxxxx> Date: Sun, 22 May 2022 19:49:36 -0600 Subject: [PATCH 4/4] io_uring: use import_ubuf() for send/recv We just have the single buffer, no point in using ITER_IOVEC when we can be using the more efficient ITER_UBUF instead. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 73f53c208df2..91255f2326e5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5272,7 +5272,6 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = &req->sr_msg; struct msghdr msg; - struct iovec iov; struct socket *sock; unsigned flags; int min_ret = 0; @@ -5282,7 +5281,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(!sock)) return -ENOTSOCK; - ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); + ret = import_ubuf(WRITE, sr->buf, sr->len, &msg.msg_iter); if (unlikely(ret)) return ret; @@ -5521,7 +5520,6 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) struct msghdr msg; void __user *buf = sr->buf; struct socket *sock; - struct iovec iov; unsigned flags; int ret, min_ret = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; @@ -5537,7 +5535,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) buf = u64_to_user_ptr(kbuf->addr); } - ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter); + ret = import_ubuf(READ, buf, sr->len, &msg.msg_iter); if (unlikely(ret)) goto out_free; -- 2.35.1
From 5786b1db0bf43313092e61797f639290079b2709 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@xxxxxxxxx> Date: Sun, 22 May 2022 19:47:29 -0600 Subject: [PATCH 3/4] iov: add import_ubuf() Like import_single_range(), but for ITER_UBUF. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- include/linux/uio.h | 1 + lib/iov_iter.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/uio.h b/include/linux/uio.h index d10c19a650a8..4e473ea90b20 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -331,6 +331,7 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec, struct iov_iter *i, bool compat); int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); +int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i); static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, void __user *buf, size_t count) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index aa41394174eb..348412757335 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -2167,6 +2167,17 @@ int import_single_range(int rw, void __user *buf, size_t len, } EXPORT_SYMBOL(import_single_range); +int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) +{ + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT; + if (unlikely(!access_ok(buf, len))) + return -EFAULT; + + iov_iter_ubuf(i, rw, buf, len); + return 0; +} + /** * iov_iter_restore() - Restore a &struct iov_iter to the same state as when * iov_iter_save_state() was called. -- 2.35.1
From cc8bfd64c1a8d754230809a56bf23bcadeabd63f Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@xxxxxxxxx> Date: Sun, 22 May 2022 19:37:33 -0600 Subject: [PATCH 2/4] Switch iter_is_iovec() checks for user memory with iter_is_user() Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- block/fops.c | 7 +++---- fs/direct-io.c | 2 +- fs/iomap/direct-io.c | 2 +- fs/nfs/direct.c | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/block/fops.c b/block/fops.c index b1f7c4111458..15788a36983a 100644 --- a/block/fops.c +++ b/block/fops.c @@ -77,8 +77,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (iov_iter_rw(iter) == READ) { bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ); - if (iter_is_iovec(iter)) - should_dirty = true; + should_dirty = iter_is_user(iter); } else { bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); } @@ -217,7 +216,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } dio->size = 0; - if (is_read && iter_is_iovec(iter)) + if (is_read && iter_is_user(iter)) dio->flags |= DIO_SHOULD_DIRTY; blk_start_plug(&plug); @@ -346,7 +345,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, dio->size = bio->bi_iter.bi_size; if (is_read) { - if (iter_is_iovec(iter)) { + if (iter_is_user(iter)) { dio->flags |= DIO_SHOULD_DIRTY; bio_set_pages_dirty(bio); } diff --git a/fs/direct-io.c b/fs/direct-io.c index 56dc5a7ad119..5cfa53e0783f 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1246,7 +1246,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, spin_lock_init(&dio->bio_lock); dio->refcount = 1; - dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ; + dio->should_dirty = iter_is_user(iter) && iov_iter_rw(iter) == READ; sdio.iter = iter; sdio.final_block_in_request = end >> blkbits; diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 33b94e33189a..7d212bde101a 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -523,7 +523,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomi.flags |= IOMAP_NOWAIT; } - if (iter_is_iovec(iter)) + if (iter_is_user(iter)) dio->flags |= IOMAP_DIO_DIRTY; } else { iomi.flags |= IOMAP_WRITE; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 11c566d8769f..b6125d5a25c6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -481,7 +481,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - if (iter_is_iovec(iter)) + if (iter_is_user(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; if (!swap) -- 2.35.1
From 992e03e0bd87801c35f5c6d8855396fc481387b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@xxxxxxxxx> Date: Sun, 22 May 2022 19:34:45 -0600 Subject: [PATCH 1/4] iov: add iter_is_user() Returns true if the iter is holding user memory. Use that for the might_fault() checks. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- include/linux/uio.h | 5 +++++ lib/iov_iter.c | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 52baa3c89505..d10c19a650a8 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -112,6 +112,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) return i->data_source ? WRITE : READ; } +static inline bool iter_is_user(const struct iov_iter *i) +{ + return iter_is_iovec(i) || iter_is_ubuf(i); +} + /* * Total number of bytes covered by an iovec. * diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 08296769f5c6..aa41394174eb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -767,7 +767,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i) || iter_is_ubuf(i)) + if (iter_is_user(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyout(base, addr + off, len), @@ -849,7 +849,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_mc_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i) || iter_is_ubuf(i)) + if (iter_is_user(i)) might_fault(); __iterate_and_advance(i, bytes, base, len, off, copyout_mc(base, addr + off, len), @@ -867,7 +867,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return 0; } - if (iter_is_iovec(i) || iter_is_ubuf(i)) + if (iter_is_user(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyin(addr + off, base, len), -- 2.35.1