The block layer spends quite a while in iov_iter_npages(), but for the bvec case the number of pages is already known and stored in iter->nr_segs, so it can be returned immediately as an optimisation Perf for an io_uring benchmark with registered buffers (i.e. bvec) shows ~1.5-2.0% total cycle count spent in iov_iter_npages(), that's dropped by this patch to ~0.2%. Reviewed-by: Jens Axboe <axboe@xxxxxxxxx> Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- lib/iov_iter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1635111c5bd2..0fa7ac330acf 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1594,6 +1594,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) return 0; if (unlikely(iov_iter_is_discard(i))) return 0; + if (unlikely(iov_iter_is_bvec(i))) + return min_t(int, i->nr_segs, maxpages); if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; @@ -1614,11 +1616,9 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) - p / PAGE_SIZE; if (npages >= maxpages) return maxpages; - 0;}),({ - npages++; - if (npages >= maxpages) - return maxpages; - }),({ + 0;}), + 0 /* bvecs are handled above */ + ,({ unsigned long p = (unsigned long)v.iov_base; npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) - p / PAGE_SIZE; -- 2.24.0