On Mon, Feb 06, 2023 at 08:28:56PM +0900, David Stevens wrote: > This change first makes sure that the intermediate page cache state > during collapse is not visible by moving when gaps are filled to after > the page cache lock is acquired for the final time. This is necessary > because the synchronization provided by locking hpage is insufficient > for functions which operate on the page cache without actually locking > individual pages to examine their content (e.g. shmem_mfill_atomic_pte). I've been a little scared of touching khugepaged because, well, look at that function. But if we are going to touch it, how about this patch first? It does _part_ of what you need by not filling in the holes, but obviously not the part that looks at uffd. It leaves the old pages in-place and frozen. I think this should be safe, but I haven't booted it (not entirely sure what test I'd run to prove that it's not broken) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index eb38bd1b1b2f..cfd33dff7253 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1845,15 +1845,14 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * - allocate and lock a new huge page; * - scan page cache replacing old pages with the new one * + swap/gup in pages if necessary; - * + fill in gaps; + * + freeze the old pages * + keep old pages around in case rollback is required; * - if replacing succeeds: * + copy data over; * + free old pages; * + unlock huge page; * - if replacing failed; - * + put all pages back and unfreeze them; - * + restore gaps in the page cache; + * + unfreeze old pages; * + unlock and free huge page; */ static int collapse_file(struct mm_struct *mm, unsigned long addr, @@ -1930,7 +1929,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, result = SCAN_FAIL; goto xa_locked; } - xas_store(&xas, hpage); nr_none++; continue; } @@ -2081,8 +2079,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, */ list_add_tail(&page->lru, &pagelist); - /* Finally, replace with the new page. */ - xas_store(&xas, hpage); continue; out_unlock: unlock_page(page); @@ -2195,32 +2191,17 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, shmem_uncharge(mapping->host, nr_none); } - xas_set(&xas, start); - xas_for_each(&xas, page, end - 1) { + list_for_each_entry_safe(page, tmp, &pagelist, lru) { + list_del(&page->lru); page = list_first_entry_or_null(&pagelist, struct page, lru); - if (!page || xas.xa_index < page->index) { - if (!nr_none) - break; - nr_none--; - /* Put holes back where they were */ - xas_store(&xas, NULL); - continue; - } - - VM_BUG_ON_PAGE(page->index != xas.xa_index, page); /* Unfreeze the page. */ list_del(&page->lru); page_ref_unfreeze(page, 2); - xas_store(&xas, page); - xas_pause(&xas); - xas_unlock_irq(&xas); unlock_page(page); putback_lru_page(page); - xas_lock_irq(&xas); } - VM_BUG_ON(nr_none); /* * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only. * This undo is not needed unless failure is due to SCAN_COPY_MC.