The patch titled Subject: mm: page cache: store only head pages in i_pages has been added to the -mm tree. Its filename is page-cache-store-only-head-pages-in-i_pages.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/page-cache-store-only-head-pages-in-i_pages.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/page-cache-store-only-head-pages-in-i_pages.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Matthew Wilcox <willy@xxxxxxxxxxxxx> Subject: mm: page cache: store only head pages in i_pages Transparent Huge Pages are currently stored in i_pages as pointers to consecutive subpages. This patch changes that to storing consecutive pointers to the head page in preparation for storing huge pages more efficiently in i_pages. Large parts of this are "inspired" by Kirill's patch https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@xxxxxxxxxxxxxxx/ Link: http://lkml.kernel.org/r/20190307153051.18815-1-willy@xxxxxxxxxxxxx Signed-off-by: Matthew Wilcox <willy@xxxxxxxxxxxxx> Acked-by: Jan Kara <jack@xxxxxxx> Reviewed-by: Kirill Shutemov <kirill@xxxxxxxxxxxxx> Reviewed-and-tested-by: Song Liu <songliubraving@xxxxxx> Tested-by: William Kucharski <william.kucharski@xxxxxxxxxx> Reviewed-by: William Kucharski <william.kucharski@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Song Liu <liu.song.a23@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/pagemap.h | 9 ++ mm/filemap.c | 159 ++++++++++++++------------------------ mm/huge_memory.c | 3 mm/khugepaged.c | 4 mm/memfd.c | 2 mm/migrate.c | 2 mm/shmem.c | 2 mm/swap_state.c | 4 8 files changed, 82 insertions(+), 103 deletions(-) --- a/include/linux/pagemap.h~page-cache-store-only-head-pages-in-i_pages +++ a/include/linux/pagemap.h @@ -333,6 +333,15 @@ static inline struct page *grab_cache_pa mapping_gfp_mask(mapping)); } +static inline struct page *find_subpage(struct page *page, pgoff_t offset) +{ + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(page->index > offset, page); + VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) <= offset, + page); + return page - page->index + offset; +} + struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, --- a/mm/filemap.c~page-cache-store-only-head-pages-in-i_pages +++ a/mm/filemap.c @@ -279,11 +279,11 @@ EXPORT_SYMBOL(delete_from_page_cache); * @pvec: pagevec with pages to delete * * The function walks over mapping->i_pages and removes pages passed in @pvec - * from the mapping. The function expects @pvec to be sorted by page index. + * from the mapping. The function expects @pvec to be sorted by page index + * and is optimised for it to be dense. * It tolerates holes in @pvec (mapping entries at those indices are not * modified). The function expects only THP head pages to be present in the - * @pvec and takes care to delete all corresponding tail pages from the - * mapping as well. + * @pvec. * * The function expects the i_pages lock to be held. */ @@ -292,40 +292,44 @@ static void page_cache_delete_batch(stru { XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); int total_pages = 0; - int i = 0, tail_pages = 0; + int i = 0; struct page *page; mapping_set_update(&xas, mapping); xas_for_each(&xas, page, ULONG_MAX) { - if (i >= pagevec_count(pvec) && !tail_pages) + if (i >= pagevec_count(pvec)) break; + + /* A swap/dax/shadow entry got inserted? Skip it. */ if (xa_is_value(page)) continue; - if (!tail_pages) { - /* - * Some page got inserted in our range? Skip it. We - * have our pages locked so they are protected from - * being removed. - */ - if (page != pvec->pages[i]) { - VM_BUG_ON_PAGE(page->index > - pvec->pages[i]->index, page); - continue; - } - WARN_ON_ONCE(!PageLocked(page)); - if (PageTransHuge(page) && !PageHuge(page)) - tail_pages = HPAGE_PMD_NR - 1; + /* + * A page got inserted in our range? Skip it. We have our + * pages locked so they are protected from being removed. + * If we see a page whose index is higher than ours, it + * means our page has been removed, which shouldn't be + * possible because we're holding the PageLock. + */ + if (page != pvec->pages[i]) { + VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, + page); + continue; + } + + WARN_ON_ONCE(!PageLocked(page)); + + if (page->index == xas.xa_index) page->mapping = NULL; - /* - * Leave page->index set: truncation lookup relies - * upon it - */ + /* Leave page->index set: truncation lookup relies on it */ + + /* + * Move to the next page in the vector if this is a regular + * page or the index is of the last sub-page of this compound + * page. + */ + if (page->index + (1UL << compound_order(page)) - 1 == + xas.xa_index) i++; - } else { - VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages - != pvec->pages[i]->index, page); - tail_pages--; - } xas_store(&xas, NULL); total_pages++; } @@ -1491,7 +1495,7 @@ EXPORT_SYMBOL(page_cache_prev_miss); struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) { XA_STATE(xas, &mapping->i_pages, offset); - struct page *head, *page; + struct page *page; rcu_read_lock(); repeat: @@ -1506,25 +1510,19 @@ repeat: if (!page || xa_is_value(page)) goto out; - head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (!page_cache_get_speculative(page)) goto repeat; - /* The page was split under us? */ - if (compound_head(page) != head) { - put_page(head); - goto repeat; - } - /* - * Has the page moved? + * Has the page moved or been split? * This is part of the lockless pagecache protocol. See * include/linux/pagemap.h for details. */ if (unlikely(page != xas_reload(&xas))) { - put_page(head); + put_page(page); goto repeat; } + page = find_subpage(page, offset); out: rcu_read_unlock(); @@ -1706,7 +1704,6 @@ unsigned find_get_entries(struct address rcu_read_lock(); xas_for_each(&xas, page, ULONG_MAX) { - struct page *head; if (xas_retry(&xas, page)) continue; /* @@ -1717,17 +1714,13 @@ unsigned find_get_entries(struct address if (xa_is_value(page)) goto export; - head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (!page_cache_get_speculative(page)) goto retry; - /* The page was split under us? */ - if (compound_head(page) != head) - goto put_page; - - /* Has the page moved? */ + /* Has the page moved or been split? */ if (unlikely(page != xas_reload(&xas))) goto put_page; + page = find_subpage(page, xas.xa_index); export: indices[ret] = xas.xa_index; @@ -1736,7 +1729,7 @@ export: break; continue; put_page: - put_page(head); + put_page(page); retry: xas_reset(&xas); } @@ -1778,33 +1771,27 @@ unsigned find_get_pages_range(struct add rcu_read_lock(); xas_for_each(&xas, page, end) { - struct page *head; if (xas_retry(&xas, page)) continue; /* Skip over shadow, swap and DAX entries */ if (xa_is_value(page)) continue; - head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (!page_cache_get_speculative(page)) goto retry; - /* The page was split under us? */ - if (compound_head(page) != head) - goto put_page; - - /* Has the page moved? */ + /* Has the page moved or been split? */ if (unlikely(page != xas_reload(&xas))) goto put_page; - pages[ret] = page; + pages[ret] = find_subpage(page, xas.xa_index); if (++ret == nr_pages) { *start = xas.xa_index + 1; goto out; } continue; put_page: - put_page(head); + put_page(page); retry: xas_reset(&xas); } @@ -1849,7 +1836,6 @@ unsigned find_get_pages_contig(struct ad rcu_read_lock(); for (page = xas_load(&xas); page; page = xas_next(&xas)) { - struct page *head; if (xas_retry(&xas, page)) continue; /* @@ -1859,24 +1845,19 @@ unsigned find_get_pages_contig(struct ad if (xa_is_value(page)) break; - head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (!page_cache_get_speculative(page)) goto retry; - /* The page was split under us? */ - if (compound_head(page) != head) - goto put_page; - - /* Has the page moved? */ + /* Has the page moved or been split? */ if (unlikely(page != xas_reload(&xas))) goto put_page; - pages[ret] = page; + pages[ret] = find_subpage(page, xas.xa_index); if (++ret == nr_pages) break; continue; put_page: - put_page(head); + put_page(page); retry: xas_reset(&xas); } @@ -1912,7 +1893,6 @@ unsigned find_get_pages_range_tag(struct rcu_read_lock(); xas_for_each_marked(&xas, page, end, tag) { - struct page *head; if (xas_retry(&xas, page)) continue; /* @@ -1923,26 +1903,21 @@ unsigned find_get_pages_range_tag(struct if (xa_is_value(page)) continue; - head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (!page_cache_get_speculative(page)) goto retry; - /* The page was split under us? */ - if (compound_head(page) != head) - goto put_page; - - /* Has the page moved? */ + /* Has the page moved or been split? */ if (unlikely(page != xas_reload(&xas))) goto put_page; - pages[ret] = page; + pages[ret] = find_subpage(page, xas.xa_index); if (++ret == nr_pages) { *index = xas.xa_index + 1; goto out; } continue; put_page: - put_page(head); + put_page(page); retry: xas_reset(&xas); } @@ -1991,7 +1966,6 @@ unsigned find_get_entries_tag(struct add rcu_read_lock(); xas_for_each_marked(&xas, page, ULONG_MAX, tag) { - struct page *head; if (xas_retry(&xas, page)) continue; /* @@ -2002,17 +1976,13 @@ unsigned find_get_entries_tag(struct add if (xa_is_value(page)) goto export; - head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (!page_cache_get_speculative(page)) goto retry; - /* The page was split under us? */ - if (compound_head(page) != head) - goto put_page; - - /* Has the page moved? */ + /* Has the page moved or been split? */ if (unlikely(page != xas_reload(&xas))) goto put_page; + page = find_subpage(page, xas.xa_index); export: indices[ret] = xas.xa_index; @@ -2021,7 +1991,7 @@ export: break; continue; put_page: - put_page(head); + put_page(page); retry: xas_reset(&xas); } @@ -2691,7 +2661,7 @@ void filemap_map_pages(struct vm_fault * pgoff_t last_pgoff = start_pgoff; unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); - struct page *head, *page; + struct page *page; rcu_read_lock(); xas_for_each(&xas, page, end_pgoff) { @@ -2700,24 +2670,19 @@ void filemap_map_pages(struct vm_fault * if (xa_is_value(page)) goto next; - head = compound_head(page); - /* * Check for a locked page first, as a speculative * reference may adversely influence page migration. */ - if (PageLocked(head)) + if (PageLocked(page)) goto next; - if (!page_cache_get_speculative(head)) + if (!page_cache_get_speculative(page)) goto next; - /* The page was split under us? */ - if (compound_head(page) != head) - goto skip; - - /* Has the page moved? */ + /* Has the page moved or been split? */ if (unlikely(page != xas_reload(&xas))) goto skip; + page = find_subpage(page, xas.xa_index); if (!PageUptodate(page) || PageReadahead(page) || --- a/mm/huge_memory.c~page-cache-store-only-head-pages-in-i_pages +++ a/mm/huge_memory.c @@ -2456,6 +2456,9 @@ static void __split_huge_page(struct pag if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) shmem_uncharge(head->mapping->host, 1); put_page(head + i); + } else if (!PageAnon(page)) { + __xa_store(&head->mapping->i_pages, head[i].index, + head + i, 0); } } --- a/mm/khugepaged.c~page-cache-store-only-head-pages-in-i_pages +++ a/mm/khugepaged.c @@ -1374,7 +1374,7 @@ static void collapse_shmem(struct mm_str result = SCAN_FAIL; goto xa_locked; } - xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); + xas_store(&xas, new_page); nr_none++; continue; } @@ -1450,7 +1450,7 @@ static void collapse_shmem(struct mm_str list_add_tail(&page->lru, &pagelist); /* Finally, replace with the new page. */ - xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); + xas_store(&xas, new_page); continue; out_unlock: unlock_page(page); --- a/mm/memfd.c~page-cache-store-only-head-pages-in-i_pages +++ a/mm/memfd.c @@ -39,6 +39,7 @@ static void memfd_tag_pins(struct xa_sta xas_for_each(xas, page, ULONG_MAX) { if (xa_is_value(page)) continue; + page = find_subpage(page, xas->xa_index); if (page_count(page) - page_mapcount(page) > 1) xas_set_mark(xas, MEMFD_TAG_PINNED); @@ -88,6 +89,7 @@ static int memfd_wait_for_pins(struct ad bool clear = true; if (xa_is_value(page)) continue; + page = find_subpage(page, xas.xa_index); if (page_count(page) - page_mapcount(page) != 1) { /* * On the last scan, we clean up all those tags --- a/mm/migrate.c~page-cache-store-only-head-pages-in-i_pages +++ a/mm/migrate.c @@ -465,7 +465,7 @@ int migrate_page_move_mapping(struct add for (i = 1; i < HPAGE_PMD_NR; i++) { xas_next(&xas); - xas_store(&xas, newpage + i); + xas_store(&xas, newpage); } } --- a/mm/shmem.c~page-cache-store-only-head-pages-in-i_pages +++ a/mm/shmem.c @@ -614,7 +614,7 @@ static int shmem_add_to_page_cache(struc if (xas_error(&xas)) goto unlock; next: - xas_store(&xas, page + i); + xas_store(&xas, page); if (++i < nr) { xas_next(&xas); goto next; --- a/mm/swap_state.c~page-cache-store-only-head-pages-in-i_pages +++ a/mm/swap_state.c @@ -132,7 +132,7 @@ int add_to_swap_cache(struct page *page, for (i = 0; i < nr; i++) { VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); set_page_private(page + i, entry.val + i); - xas_store(&xas, page + i); + xas_store(&xas, page); xas_next(&xas); } address_space->nrpages += nr; @@ -167,7 +167,7 @@ void __delete_from_swap_cache(struct pag for (i = 0; i < nr; i++) { void *entry = xas_store(&xas, NULL); - VM_BUG_ON_PAGE(entry != page + i, entry); + VM_BUG_ON_PAGE(entry != page, entry); set_page_private(page + i, 0); xas_next(&xas); } _ Patches currently in -mm which might be from willy@xxxxxxxxxxxxx are page-cache-store-only-head-pages-in-i_pages.patch