Block layer spend quite a while in iov_iter_npages(), but for bvecs number of pages is already known and stored in iter->nr_segs, so it can be returned directly as an optimisation Running an io_uring benchmark with registered buffers (i.e. bvec) perf showed ~1.5-2.0% total cycle was spent there, and that dropped to ~0.2% after applying this patch. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- lib/iov_iter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1635111c5bd2..0fa7ac330acf 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1594,6 +1594,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) return 0; if (unlikely(iov_iter_is_discard(i))) return 0; + if (unlikely(iov_iter_is_bvec(i))) + return min_t(int, i->nr_segs, maxpages); if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; @@ -1614,11 +1616,9 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) - p / PAGE_SIZE; if (npages >= maxpages) return maxpages; - 0;}),({ - npages++; - if (npages >= maxpages) - return maxpages; - }),({ + 0;}), + 0 /* bvecs are handled above */ + ,({ unsigned long p = (unsigned long)v.iov_base; npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) - p / PAGE_SIZE; -- 2.24.0