This is a preparatory patch that enables batch copying for folios undergoing migration. By enabling batch copying the folio content, we can efficiently utilize the capabilities of DMA hardware or multi-threaded folio copy. It also adds MIGRATE_NO_COPY back to migrate_mode, so that folio copy will be skipped during metadata copy process and performed in a batch later. Currently, the folio move operation is performed individually for each folio in sequential manner: for_each_folio() { Copy folio metadata like flags and mappings Copy the folio content from src to dst Update page tables with dst folio } With this patch, we transition to a batch processing approach as shown below: for_each_folio() { Copy folio metadata like flags and mappings } Batch copy all src folios to dst for_each_folio() { Update page tables with dst folios } dst->private is used to store page states and possible anon_vma value, thus needs to be cleared during metadata copy process. To avoid additional memory allocation to store the data during batch copy process, src->private is used to store the data after metadata copy process, since src is no longer used. Originally-by: Shivank Garg <shivankg@xxxxxxx> Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/migrate_mode.h | 2 + mm/migrate.c | 207 +++++++++++++++++++++++++++++++++-- 2 files changed, 201 insertions(+), 8 deletions(-) diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index 265c4328b36a..9af6c949a057 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -7,11 +7,13 @@ * on most operations but not ->writepage as the potential stall time * is too significant * MIGRATE_SYNC will block when migrating pages + * MIGRATE_NO_COPY will not copy page content */ enum migrate_mode { MIGRATE_ASYNC, MIGRATE_SYNC_LIGHT, MIGRATE_SYNC, + MIGRATE_NO_COPY, }; enum migrate_reason { diff --git a/mm/migrate.c b/mm/migrate.c index a83508f94c57..95c4cc4a7823 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -51,6 +51,7 @@ #include "internal.h" + bool isolate_movable_page(struct page *page, isolate_mode_t mode) { struct folio *folio = folio_get_nontail_page(page); @@ -752,14 +753,19 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst, enum migrate_mode mode) { int rc, expected_count = folio_expected_refs(mapping, src); + unsigned long dst_private = (unsigned long)dst->private; /* Check whether src does not have extra refs before we do more work */ if (folio_ref_count(src) != expected_count) return -EAGAIN; - rc = folio_mc_copy(dst, src); - if (unlikely(rc)) - return rc; + if (mode == MIGRATE_NO_COPY) + dst->private = NULL; + else { + rc = folio_mc_copy(dst, src); + if (unlikely(rc)) + return rc; + } rc = __folio_migrate_mapping(mapping, dst, src, expected_count); if (rc != MIGRATEPAGE_SUCCESS) @@ -769,6 +775,10 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst, folio_attach_private(dst, folio_detach_private(src)); folio_migrate_flags(dst, src); + + if (mode == MIGRATE_NO_COPY) + src->private = (void *)dst_private; + return MIGRATEPAGE_SUCCESS; } @@ -1042,7 +1052,7 @@ static int _move_to_new_folio_prep(struct folio *dst, struct folio *src, mode); else rc = fallback_migrate_folio(mapping, dst, src, mode); - } else { + } else if (mode != MIGRATE_NO_COPY) { const struct movable_operations *mops; /* @@ -1060,7 +1070,8 @@ static int _move_to_new_folio_prep(struct folio *dst, struct folio *src, rc = mops->migrate_page(&dst->page, &src->page, mode); WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS && !folio_test_isolated(src)); - } + } else + rc = -EAGAIN; out: return rc; } @@ -1138,7 +1149,7 @@ static void __migrate_folio_record(struct folio *dst, dst->private = (void *)anon_vma + old_page_state; } -static void __migrate_folio_extract(struct folio *dst, +static void __migrate_folio_read(struct folio *dst, int *old_page_state, struct anon_vma **anon_vmap) { @@ -1146,6 +1157,13 @@ static void __migrate_folio_extract(struct folio *dst, *anon_vmap = (struct anon_vma *)(private & ~PAGE_OLD_STATES); *old_page_state = private & PAGE_OLD_STATES; +} + +static void __migrate_folio_extract(struct folio *dst, + int *old_page_state, + struct anon_vma **anon_vmap) +{ + __migrate_folio_read(dst, old_page_state, anon_vmap); dst->private = NULL; } @@ -1771,6 +1789,174 @@ static void migrate_folios_move(struct list_head *src_folios, } } +static void migrate_folios_batch_move(struct list_head *src_folios, + struct list_head *dst_folios, + free_folio_t put_new_folio, unsigned long private, + enum migrate_mode mode, int reason, + struct list_head *ret_folios, + struct migrate_pages_stats *stats, + int *retry, int *thp_retry, int *nr_failed, + int *nr_retry_pages) +{ + struct folio *folio, *folio2, *dst, *dst2; + int rc, nr_pages = 0, nr_mig_folios = 0; + int old_page_state = 0; + struct anon_vma *anon_vma = NULL; + bool is_lru; + int is_thp = 0; + LIST_HEAD(err_src); + LIST_HEAD(err_dst); + + if (mode != MIGRATE_ASYNC) { + *retry += 1; + return; + } + + /* + * Iterate over the list of locked src/dst folios to copy the metadata + */ + dst = list_first_entry(dst_folios, struct folio, lru); + dst2 = list_next_entry(dst, lru); + list_for_each_entry_safe(folio, folio2, src_folios, lru) { + is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); + is_lru = !__folio_test_movable(folio); + + /* + * dst->private is not cleared here. It is cleared and moved to + * src->private in __migrate_folio(). + */ + __migrate_folio_read(dst, &old_page_state, &anon_vma); + + /* + * Use MIGRATE_NO_COPY mode in migrate_folio family functions + * to copy the flags, mapping and some other ancillary information. + * This does everything except the page copy. The actual page copy + * is handled later in a batch manner. + */ + rc = _move_to_new_folio_prep(dst, folio, MIGRATE_NO_COPY); + + /* + * -EAGAIN: Move src/dst folios to tmp lists for retry + * Other Errno: Put src folio on ret_folios list, remove the dst folio + * Success: Copy the folio bytes, restoring working pte, unlock and + * decrement refcounter + */ + if (rc == -EAGAIN) { + *retry += 1; + *thp_retry += is_thp; + *nr_retry_pages += nr_pages; + + list_move_tail(&folio->lru, &err_src); + list_move_tail(&dst->lru, &err_dst); + __migrate_folio_record(dst, old_page_state, anon_vma); + } else if (rc != MIGRATEPAGE_SUCCESS) { + *nr_failed += 1; + stats->nr_thp_failed += is_thp; + stats->nr_failed_pages += nr_pages; + + list_del(&dst->lru); + migrate_folio_undo_src(folio, old_page_state & PAGE_WAS_MAPPED, + anon_vma, true, ret_folios); + migrate_folio_undo_dst(dst, true, put_new_folio, private); + } else /* MIGRATEPAGE_SUCCESS */ + nr_mig_folios++; + + dst = dst2; + dst2 = list_next_entry(dst, lru); + } + + /* Exit if folio list for batch migration is empty */ + if (!nr_mig_folios) + goto out; + + /* Batch copy the folios */ + { + dst = list_first_entry(dst_folios, struct folio, lru); + dst2 = list_next_entry(dst, lru); + list_for_each_entry_safe(folio, folio2, src_folios, lru) { + is_thp = folio_test_large(folio) && + folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); + rc = folio_mc_copy(dst, folio); + + if (rc) { + int old_page_state = 0; + struct anon_vma *anon_vma = NULL; + + /* + * dst->private is moved to src->private in + * __migrate_folio(), so page state and anon_vma + * values can be extracted from (src) folio. + */ + __migrate_folio_extract(folio, &old_page_state, + &anon_vma); + migrate_folio_undo_src(folio, + old_page_state & PAGE_WAS_MAPPED, + anon_vma, true, ret_folios); + list_del(&dst->lru); + migrate_folio_undo_dst(dst, true, put_new_folio, + private); + } + + switch (rc) { + case MIGRATEPAGE_SUCCESS: + stats->nr_succeeded += nr_pages; + stats->nr_thp_succeeded += is_thp; + break; + default: + *nr_failed += 1; + stats->nr_thp_failed += is_thp; + stats->nr_failed_pages += nr_pages; + break; + } + + dst = dst2; + dst2 = list_next_entry(dst, lru); + } + } + + /* + * Iterate the folio lists to remove migration pte and restore them + * as working pte. Unlock the folios, add/remove them to LRU lists (if + * applicable) and release the src folios. + */ + dst = list_first_entry(dst_folios, struct folio, lru); + dst2 = list_next_entry(dst, lru); + list_for_each_entry_safe(folio, folio2, src_folios, lru) { + is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); + /* + * dst->private is moved to src->private in __migrate_folio(), + * so page state and anon_vma values can be extracted from + * (src) folio. + */ + __migrate_folio_extract(folio, &old_page_state, &anon_vma); + list_del(&dst->lru); + + _move_to_new_folio_finalize(dst, folio, MIGRATEPAGE_SUCCESS); + + /* + * Below few steps are only applicable for lru pages which is + * ensured as we have removed the non-lru pages from our list. + */ + _migrate_folio_move_finalize1(folio, dst, old_page_state); + + _migrate_folio_move_finalize2(folio, dst, reason, anon_vma); + + /* Page migration successful, increase stat counter */ + stats->nr_succeeded += nr_pages; + stats->nr_thp_succeeded += is_thp; + + dst = dst2; + dst2 = list_next_entry(dst, lru); + } +out: + /* Add tmp folios back to the list to let CPU re-attempt migration. */ + list_splice(&err_src, src_folios); + list_splice(&err_dst, dst_folios); +} + static void migrate_folios_undo(struct list_head *src_folios, struct list_head *dst_folios, free_folio_t put_new_folio, unsigned long private, @@ -1981,13 +2167,18 @@ static int migrate_pages_batch(struct list_head *from, /* Flush TLBs for all unmapped folios */ try_to_unmap_flush(); - retry = 1; + retry = 0; + /* Batch move the unmapped folios */ + migrate_folios_batch_move(&unmap_folios, &dst_folios, put_new_folio, + private, mode, reason, ret_folios, stats, &retry, + &thp_retry, &nr_failed, &nr_retry_pages); + for (pass = 0; pass < nr_pass && retry; pass++) { retry = 0; thp_retry = 0; nr_retry_pages = 0; - /* Move the unmapped folios */ + /* Move the remaining unmapped folios */ migrate_folios_move(&unmap_folios, &dst_folios, put_new_folio, private, mode, reason, ret_folios, stats, &retry, &thp_retry, -- 2.45.2