On 9 Nov 2020, at 22:36, Matthew Wilcox (Oracle) wrote: > Add filemap_get_read_batch() which returns the THPs which represent a > contiguous array of bytes in the file. It also stops when encountering > a page marked as Readahead or !Uptodate (but does return that page) > so it can be handled appropriately by filemap_get_pages(). That lets us > remove the loop in filemap_get_pages() and check only the last page. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> > Reviewed-by: Kent Overstreet <kent.overstreet@xxxxxxxxx> > Reviewed-by: Christoph Hellwig <hch@xxxxxx> > --- > mm/filemap.c | 122 +++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 85 insertions(+), 37 deletions(-) > > diff --git a/mm/filemap.c b/mm/filemap.c > index bd02820601f8..1de586eb377e 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2176,6 +2176,51 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page) > return lock_page_killable(page); > } > > +/* > + * filemap_get_read_batch - Get a batch of pages for read > + * > + * Get a batch of pages which represent a contiguous range of bytes > + * in the file. No tail pages will be returned. If @index is in the > + * middle of a THP, the entire THP will be returned. The last page in > + * the batch may have Readahead set or be not Uptodate so that the > + * caller can take the appropriate action. > + */ > +static void filemap_get_read_batch(struct address_space *mapping, > + pgoff_t index, pgoff_t max, struct pagevec *pvec) > +{ > + XA_STATE(xas, &mapping->i_pages, index); > + struct page *head; > + > + rcu_read_lock(); > + for (head = xas_load(&xas); head; head = xas_next(&xas)) { > + if (xas_retry(&xas, head)) > + continue; > + if (xas.xa_index > max || xa_is_value(head)) > + break; > + if (!page_cache_get_speculative(head)) > + goto retry; > + > + /* Has the page moved or been split? */ > + if (unlikely(head != xas_reload(&xas))) > + goto put_page; > + > + if (!pagevec_add(pvec, head)) > + break; > + if (!PageUptodate(head)) > + break; > + if (PageReadahead(head)) > + break; > + xas.xa_index = head->index + thp_nr_pages(head) - 1; > + xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK; > + continue; > +put_page: > + put_page(head); > +retry: > + xas_reset(&xas); > + } > + rcu_read_unlock(); > +} > + > static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp, > struct address_space *mapping, struct page *page) > { > @@ -2329,15 +2374,15 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, > struct address_space *mapping = filp->f_mapping; > struct file_ra_state *ra = &filp->f_ra; > pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; > - pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; > - unsigned int nr = min_t(unsigned long, last_index - index, PAGEVEC_SIZE); > - int i, j, err = 0; > + pgoff_t last_index; > + int err = 0; > > + last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE); > find_page: > if (fatal_signal_pending(current)) > return -EINTR; > > - pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages); > + filemap_get_read_batch(mapping, index, last_index, pvec); > if (pvec->nr) > goto got_pages; > > @@ -2346,29 +2391,30 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, > > page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); > > - pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages); > + filemap_get_read_batch(mapping, index, last_index, pvec); > if (pvec->nr) > goto got_pages; > > pvec->pages[0] = filemap_create_page(iocb, iter); > err = PTR_ERR_OR_ZERO(pvec->pages[0]); > - if (!IS_ERR_OR_NULL(pvec->pages[0])) > - pvec->nr = 1; > + if (IS_ERR_OR_NULL(pvec->pages[0])) > + goto err; > + pvec->nr = 1; > + return 0; > got_pages: > - for (i = 0; i < pvec->nr; i++) { > - struct page *page = pvec->pages[i]; > - pgoff_t pg_index = index + i; > + { > + struct page *page = pvec->pages[pvec->nr - 1]; > + pgoff_t pg_index = page->index; > loff_t pg_pos = max(iocb->ki_pos, > (loff_t) pg_index << PAGE_SHIFT); > loff_t pg_count = iocb->ki_pos + iter->count - pg_pos; > > if (PageReadahead(page)) { > if (iocb->ki_flags & IOCB_NOIO) { > - for (j = i; j < pvec->nr; j++) > - put_page(pvec->pages[j]); > - pvec->nr = i; > + put_page(page); > + pvec->nr--; > err = -EAGAIN; > - break; > + goto err; > } > page_cache_async_readahead(mapping, ra, filp, page, > pg_index, last_index - pg_index); > @@ -2376,26 +2422,23 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, > > if (!PageUptodate(page)) { > if ((iocb->ki_flags & IOCB_NOWAIT) || > - ((iocb->ki_flags & IOCB_WAITQ) && i)) { > - for (j = i; j < pvec->nr; j++) > - put_page(pvec->pages[j]); > - pvec->nr = i; > + ((iocb->ki_flags & IOCB_WAITQ) && pvec->nr > 1)) { > + put_page(page); > + pvec->nr--; > err = -EAGAIN; > - break; > + goto err; > } > > page = filemap_update_page(iocb, filp, iter, page, > pg_pos, pg_count); > if (IS_ERR_OR_NULL(page)) { > - for (j = i + 1; j < pvec->nr; j++) > - put_page(pvec->pages[j]); > - pvec->nr = i; > + pvec->nr--; > err = PTR_ERR_OR_ZERO(page); > - break; > } > } > } > > +err: > if (likely(pvec->nr)) > return 0; > if (err) > @@ -2437,6 +2480,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, > if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes)) > return 0; > iov_iter_truncate(iter, inode->i_sb->s_maxbytes); > + pagevec_init(pvec); This should be pagevec_init(&pvec); > > do { > cond_resched(); > @@ -2464,13 +2508,8 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, > isize = i_size_read(inode); > if (unlikely(iocb->ki_pos >= isize)) > goto put_pages; > - > end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count); > > - while ((iocb->ki_pos >> PAGE_SHIFT) + pvec.nr > > - (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT) > - put_page(pvec.pages[--pvec.nr]); > - > /* > * Once we start copying data, we don't want to be touching any > * cachelines that might be contended: > @@ -2484,24 +2523,32 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, > if (iocb->ki_pos >> PAGE_SHIFT != > ra->prev_pos >> PAGE_SHIFT) > mark_page_accessed(pvec.pages[0]); > - for (i = 1; i < pagevec_count(&pvec); i++) > - mark_page_accessed(pvec.pages[i]); > > for (i = 0; i < pagevec_count(&pvec); i++) { > - unsigned int offset = iocb->ki_pos & ~PAGE_MASK; > - unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos, > - PAGE_SIZE - offset); > - unsigned int copied; > + struct page *page = pvec.pages[i]; > + size_t page_size = thp_size(page); > + size_t offset = iocb->ki_pos & (page_size - 1); > + size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos, > + page_size - offset); > + size_t copied; > > + if (end_offset < page_offset(page)) > + break; > + if (i > 0) > + mark_page_accessed(page); > /* > * If users can be writing to this page using arbitrary > * virtual addresses, take care about potential aliasing > * before reading the page on the kernel side. > */ > - if (writably_mapped) > - flush_dcache_page(pvec.pages[i]); > + if (writably_mapped) { > + int j; > + > + for (j = 0; j < thp_nr_pages(page); j++) > + flush_dcache_page(page + j); > + } > > - copied = copy_page_to_iter(pvec.pages[i], offset, bytes, iter); > + copied = copy_page_to_iter(page, offset, bytes, iter); > > written += copied; > iocb->ki_pos += copied; > @@ -2515,6 +2562,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, > put_pages: > for (i = 0; i < pagevec_count(&pvec); i++) > put_page(pvec.pages[i]); > + pagevec_reinit(pvec); It should be pagevec_reinit(&pvec); Found the above two issues during compilation. — Best Regards, Yan Zi
Attachment:
signature.asc
Description: OpenPGP digital signature