When testing large folio support with XFS on our servers, we observed that only a few large folios are mapped when reading large files via mmap. This behavior occurs because large folio support is currently implemented only for sync readahead, not for async readahead. Consequently, while the first filemap fault may map to a large folio, subsequent filemap faults are mapped to regular folios. This can be verified with a simple test case, as shown below: #define LEN (1024 * 1024 * 1024) // 1GB file int main(int argc, char *argv[]) { char *addr; int fd, i; fd = open("data", O_RDWR); if (fd < 0) { perror("open"); exit(-1); } addr = mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); exit(-1); } if (madvise(addr, LEN, MADV_HUGEPAGE)) { perror("madvise"); exit(-1); } for (i = 0; i < LEN / 4096; i++) memset(addr + i * 4096, 1, 1); while (1) {} // Verifiable with /proc/meminfo munmap(addr, LEN); close(fd); exit(0); } This patch adds large folio support to async readahead, aligning its behavior with sync readahead. Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> --- include/linux/pagemap.h | 4 +++- mm/filemap.c | 4 ++++ mm/readahead.c | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 68a5f1ff3301..b77eb5d3a3dd 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1350,8 +1350,10 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; - bool _workingset; unsigned long _pflags; + bool _workingset; + bool _large_folio; + bool _rand_read; }; #define DEFINE_READAHEAD(ractl, f, r, m, i) \ diff --git a/mm/filemap.c b/mm/filemap.c index 36d22968be9a..c9695effdd88 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3226,6 +3226,10 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, WRITE_ONCE(ra->mmap_miss, --mmap_miss); if (folio_test_readahead(folio)) { + if (vmf->vma->vm_flags & VM_HUGEPAGE) + ractl._large_folio = true; + if (vmf->vma->vm_flags & VM_RAND_READ) + ractl._rand_read = true; fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_async_ra(&ractl, folio, ra->ra_pages); } diff --git a/mm/readahead.c b/mm/readahead.c index 3dc6c7a128dd..8c4f95f87b71 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -667,6 +667,15 @@ void page_cache_async_ra(struct readahead_control *ractl, ra->async_size = ra->size; readit: ractl->_index = ra->start; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (ractl->_large_folio) { + ractl->_index &= ~((unsigned long)HPAGE_PMD_NR - 1); + if (!ractl->_rand_read) + ra->size = 2 * HPAGE_PMD_NR; + ra->async_size = HPAGE_PMD_NR; + order = HPAGE_PMD_ORDER; + } +#endif page_cache_ra_order(ractl, ra, order); } EXPORT_SYMBOL_GPL(page_cache_async_ra); -- 2.43.5