On 6/7/21 12:29 AM, Al Viro wrote: > On Sun, Jun 06, 2021 at 03:05:49PM -0700, Linus Torvalds wrote: [...] > > BTW, speaking of initializers... Pavel, could you check if > the following breaks anything? Unless I'm misreading __io_import_fixed(), > looks like that's what it's trying to do... It's a version of iov_iter_advance() that assumes all bvecs are single-paged and all possibly besides first/last are page aligned/sized. Looks and works well, will try the full set later. btw, as that assumption is not true in general, I'd suggest to add a comment. Don't like the idea of it being misused... > diff --git a/fs/io_uring.c b/fs/io_uring.c > index f46acbbeed57..9bd2da9a4c3d 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -2773,57 +2773,14 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter > { > size_t len = req->rw.len; > u64 buf_end, buf_addr = req->rw.addr; > - size_t offset; > > if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) > return -EFAULT; > /* not inside the mapped region */ > if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) > return -EFAULT; > - > - /* > - * May not be a start of buffer, set size appropriately > - * and advance us to the beginning. > - */ > - offset = buf_addr - imu->ubuf; > - iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); > - > - if (offset) { > - /* > - * Don't use iov_iter_advance() here, as it's really slow for > - * using the latter parts of a big fixed buffer - it iterates > - * over each segment manually. We can cheat a bit here, because > - * we know that: > - * > - * 1) it's a BVEC iter, we set it up > - * 2) all bvecs are PAGE_SIZE in size, except potentially the > - * first and last bvec > - * > - * So just find our index, and adjust the iterator afterwards. > - * If the offset is within the first bvec (or the whole first > - * bvec, just use iov_iter_advance(). This makes it easier > - * since we can just skip the first segment, which may not > - * be PAGE_SIZE aligned. > - */ > - const struct bio_vec *bvec = imu->bvec; > - > - if (offset <= bvec->bv_len) { > - iov_iter_advance(iter, offset); > - } else { > - unsigned long seg_skip; > - > - /* skip first vec */ > - offset -= bvec->bv_len; > - seg_skip = 1 + (offset >> PAGE_SHIFT); > - > - iter->bvec = bvec + seg_skip; > - iter->nr_segs -= seg_skip; > - iter->count -= bvec->bv_len + offset; > - iter->iov_offset = offset & ~PAGE_MASK; > - } > - } > - > - return 0; > + return import_pagevec(rw, buf_addr, len, imu->ubuf, > + imu->nr_bvecs, imu->bvec, iter); > } > > static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) > diff --git a/include/linux/uio.h b/include/linux/uio.h > index fd88d9911dad..f6291e981d07 100644 > --- a/include/linux/uio.h > +++ b/include/linux/uio.h > @@ -299,5 +299,8 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec, > struct iov_iter *i, bool compat); > int import_single_range(int type, void __user *buf, size_t len, > struct iovec *iov, struct iov_iter *i); > +int import_pagevec(int rw, unsigned long from, size_t len, > + unsigned long base, unsigned nr_pages, > + struct bio_vec *bvec, struct iov_iter *i); > > #endif > diff --git a/lib/iov_iter.c b/lib/iov_iter.c > index 11b39bd1d1ab..4a771fcb529b 100644 > --- a/lib/iov_iter.c > +++ b/lib/iov_iter.c > @@ -1982,3 +1982,21 @@ int import_single_range(int rw, void __user *buf, size_t len, > return 0; > } > EXPORT_SYMBOL(import_single_range); > + > +int import_pagevec(int rw, unsigned long from, size_t len, > + unsigned long base, unsigned nr_pages, > + struct bio_vec *bvec, struct iov_iter *i) > + > +{ > + unsigned long skip_pages = (from >> PAGE_SHIFT) - (base >> PAGE_SHIFT); > + > + *i = (struct iov_iter){ > + .iter_type = ITER_BVEC, > + .data_source = rw, > + .bvec = bvec + skip_pages, > + .nr_segs = nr_pages - skip_pages, > + .iov_offset = skip_pages ? from & ~PAGE_MASK : from - base, > + .count = len > + }; > + return 0; > +} > -- Pavel Begunkov