Support splitting pages during THP zone device migration as needed. The common case that arises is that after setup, during migrate the destination might not be able to allocate MIGRATE_PFN_COMPOUND pages. Add a new routine migrate_vma_split_pages() to support the splitting of already isolated pages. The pages being migrated are already unmapped and marked for migration during setup (via unmap). folio_split() and __split_unmapped_folio() take additional isolated arguments, to avoid unmapping and remaping these pages and unlocking/putting the folio. Since unmap/remap is avoided in these code paths, an extra reference count is added to the split folio pages, which will be dropped in the finalize phase. Signed-off-by: Balbir Singh <balbirs@xxxxxxxxxx> --- include/linux/huge_mm.h | 11 ++++++-- mm/huge_memory.c | 53 +++++++++++++++++++++++++----------- mm/migrate_device.c | 60 ++++++++++++++++++++++++++++++++--------- 3 files changed, 94 insertions(+), 30 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ad0c0ccfcbc2..abb8debfb362 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -341,8 +341,8 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add vm_flags_t vm_flags); bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins); -int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, - unsigned int new_order); +int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order, bool isolated); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); bool uniform_split_supported(struct folio *folio, unsigned int new_order, @@ -351,6 +351,13 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, bool warns); int folio_split(struct folio *folio, unsigned int new_order, struct page *page, struct list_head *list); + +static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) +{ + return __split_huge_page_to_list_to_order(page, list, new_order, false); +} + /* * try_folio_split - try to split a @folio at @page using non uniform split. * @folio: folio to be split diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 518a70d1b58a..1a6f0e70acee 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3544,7 +3544,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, struct page *split_at, struct page *lock_at, struct list_head *list, pgoff_t end, struct xa_state *xas, struct address_space *mapping, - bool uniform_split) + bool uniform_split, bool isolated) { struct lruvec *lruvec; struct address_space *swap_cache = NULL; @@ -3586,6 +3586,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, int old_order = folio_order(folio); struct folio *release; struct folio *end_folio = folio_next(folio); + int extra_count = 1; /* order-1 anonymous folio is not supported */ if (folio_test_anon(folio) && split_order == 1) @@ -3629,6 +3630,14 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, __split_folio_to_order(folio, old_order, split_order); after_split: + /* + * When a folio is isolated, the split folios will + * not go through unmap/remap, so add the extra + * count here + */ + if (isolated) + extra_count++; + /* * Iterate through after-split folios and perform related * operations. But in buddy allocator like split, the folio @@ -3665,7 +3674,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, * page cache. */ folio_ref_unfreeze(release, - 1 + ((!folio_test_anon(origin_folio) || + extra_count + ((!folio_test_anon(origin_folio) || folio_test_swapcache(origin_folio)) ? folio_nr_pages(release) : 0)); @@ -3676,7 +3685,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, if (release == origin_folio) continue; - if (!folio_is_device_private(origin_folio)) + if (!isolated && !folio_is_device_private(origin_folio)) lru_add_page_tail(origin_folio, &release->page, lruvec, list); @@ -3714,6 +3723,12 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, if (nr_dropped) shmem_uncharge(mapping->host, nr_dropped); + /* + * Don't remap and unlock isolated folios + */ + if (isolated) + return ret; + remap_page(origin_folio, 1 << order, folio_test_anon(origin_folio) ? RMP_USE_SHARED_ZEROPAGE : 0); @@ -3808,6 +3823,7 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order, * @lock_at: a page within @folio to be left locked to caller * @list: after-split folios will be put on it if non NULL * @uniform_split: perform uniform split or not (non-uniform split) + * @isolated: The pages are already unmapped * * It calls __split_unmapped_folio() to perform uniform and non-uniform split. * It is in charge of checking whether the split is supported or not and @@ -3818,7 +3834,7 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order, */ static int __folio_split(struct folio *folio, unsigned int new_order, struct page *split_at, struct page *lock_at, - struct list_head *list, bool uniform_split) + struct list_head *list, bool uniform_split, bool isolated) { struct deferred_split *ds_queue = get_deferred_split_queue(folio); XA_STATE(xas, &folio->mapping->i_pages, folio->index); @@ -3864,14 +3880,16 @@ static int __folio_split(struct folio *folio, unsigned int new_order, * is taken to serialise against parallel split or collapse * operations. */ - anon_vma = folio_get_anon_vma(folio); - if (!anon_vma) { - ret = -EBUSY; - goto out; + if (!isolated) { + anon_vma = folio_get_anon_vma(folio); + if (!anon_vma) { + ret = -EBUSY; + goto out; + } + anon_vma_lock_write(anon_vma); } end = -1; mapping = NULL; - anon_vma_lock_write(anon_vma); } else { unsigned int min_order; gfp_t gfp; @@ -3933,7 +3951,8 @@ static int __folio_split(struct folio *folio, unsigned int new_order, goto out_unlock; } - unmap_folio(folio); + if (!isolated) + unmap_folio(folio); /* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); @@ -3986,14 +4005,15 @@ static int __folio_split(struct folio *folio, unsigned int new_order, ret = __split_unmapped_folio(folio, new_order, split_at, lock_at, list, end, &xas, mapping, - uniform_split); + uniform_split, isolated); } else { spin_unlock(&ds_queue->split_queue_lock); fail: if (mapping) xas_unlock(&xas); local_irq_enable(); - remap_page(folio, folio_nr_pages(folio), 0); + if (!isolated) + remap_page(folio, folio_nr_pages(folio), 0); ret = -EAGAIN; } @@ -4059,12 +4079,13 @@ static int __folio_split(struct folio *folio, unsigned int new_order, * Returns -EINVAL when trying to split to an order that is incompatible * with the folio. Splitting to order 0 is compatible with all folios. */ -int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, - unsigned int new_order) +int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order, bool isolated) { struct folio *folio = page_folio(page); - return __folio_split(folio, new_order, &folio->page, page, list, true); + return __folio_split(folio, new_order, &folio->page, page, list, true, + isolated); } /* @@ -4093,7 +4114,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *split_at, struct list_head *list) { return __folio_split(folio, new_order, split_at, &folio->page, list, - false); + false, false); } int min_order_for_split(struct folio *folio) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index f3fff5d705bd..e4510bb86b3c 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -804,6 +804,24 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate, src[i] &= ~MIGRATE_PFN_MIGRATE; return 0; } + +static void migrate_vma_split_pages(struct migrate_vma *migrate, + unsigned long idx, unsigned long addr, + struct folio *folio) +{ + unsigned long i; + unsigned long pfn; + unsigned long flags; + + folio_get(folio); + split_huge_pmd_address(migrate->vma, addr, true, folio); + __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL, 0, true); + migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND; + flags = migrate->src[idx] & ((1UL << MIGRATE_PFN_SHIFT) - 1); + pfn = migrate->src[idx] >> MIGRATE_PFN_SHIFT; + for (i = 1; i < HPAGE_PMD_NR; i++) + migrate->src[i+idx] = migrate_pfn(pfn + i) | flags; +} #else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate, unsigned long addr, @@ -813,6 +831,11 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate, { return 0; } + +static void migrate_vma_split_pages(struct migrate_vma *migrate, + unsigned long idx, unsigned long addr, + struct folio *folio) +{} #endif /* @@ -962,8 +985,9 @@ static void __migrate_device_pages(unsigned long *src_pfns, struct migrate_vma *migrate) { struct mmu_notifier_range range; - unsigned long i; + unsigned long i, j; bool notified = false; + unsigned long addr; for (i = 0; i < npages; ) { struct page *newpage = migrate_pfn_to_page(dst_pfns[i]); @@ -1005,12 +1029,16 @@ static void __migrate_device_pages(unsigned long *src_pfns, (!(dst_pfns[i] & MIGRATE_PFN_COMPOUND))) { nr = HPAGE_PMD_NR; src_pfns[i] &= ~MIGRATE_PFN_COMPOUND; - src_pfns[i] &= ~MIGRATE_PFN_MIGRATE; - goto next; + } else { + nr = 1; } - migrate_vma_insert_page(migrate, addr, &dst_pfns[i], - &src_pfns[i]); + for (j = 0; j < nr && i + j < npages; j++) { + src_pfns[i+j] |= MIGRATE_PFN_MIGRATE; + migrate_vma_insert_page(migrate, + addr + j * PAGE_SIZE, + &dst_pfns[i+j], &src_pfns[i+j]); + } goto next; } @@ -1032,7 +1060,10 @@ static void __migrate_device_pages(unsigned long *src_pfns, MIGRATE_PFN_COMPOUND); goto next; } - src_pfns[i] &= ~MIGRATE_PFN_MIGRATE; + nr = 1 << folio_order(folio); + addr = migrate->start + i * PAGE_SIZE; + migrate_vma_split_pages(migrate, i, addr, folio); + extra_cnt++; } else if ((src_pfns[i] & MIGRATE_PFN_MIGRATE) && (dst_pfns[i] & MIGRATE_PFN_COMPOUND) && !(src_pfns[i] & MIGRATE_PFN_COMPOUND)) { @@ -1067,12 +1098,17 @@ static void __migrate_device_pages(unsigned long *src_pfns, BUG_ON(folio_test_writeback(folio)); if (migrate && migrate->fault_page == page) - extra_cnt = 1; - r = folio_migrate_mapping(mapping, newfolio, folio, extra_cnt); - if (r != MIGRATEPAGE_SUCCESS) - src_pfns[i] &= ~MIGRATE_PFN_MIGRATE; - else - folio_migrate_flags(newfolio, folio); + extra_cnt++; + for (j = 0; j < nr && i + j < npages; j++) { + folio = page_folio(migrate_pfn_to_page(src_pfns[i+j])); + newfolio = page_folio(migrate_pfn_to_page(dst_pfns[i+j])); + + r = folio_migrate_mapping(mapping, newfolio, folio, extra_cnt); + if (r != MIGRATEPAGE_SUCCESS) + src_pfns[i+j] &= ~MIGRATE_PFN_MIGRATE; + else + folio_migrate_flags(newfolio, folio); + } next: i += nr; } -- 2.48.1