Huang Ying <ying.huang@xxxxxxxxx> writes: > This is a preparation patch to batch the folio unmapping and moving > for the non-hugetlb folios. Based on that we can batch the TLB > shootdown during the folio migration and make it possible to use some > hardware accelerator for the folio copying. > > In this patch the hugetlb folios and non-hugetlb folios migration is > separated in migrate_pages() to make it easy to change the non-hugetlb > folios migration implementation. > > Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx> > Cc: Zi Yan <ziy@xxxxxxxxxx> > Cc: Yang Shi <shy828301@xxxxxxxxx> > Cc: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> > Cc: Oscar Salvador <osalvador@xxxxxxx> > Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> > Cc: Bharata B Rao <bharata@xxxxxxx> > Cc: Alistair Popple <apopple@xxxxxxxxxx> > Cc: haoxin <xhao@xxxxxxxxxxxxxxxxx> > --- > mm/migrate.c | 114 ++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 99 insertions(+), 15 deletions(-) > > diff --git a/mm/migrate.c b/mm/migrate.c > index ec9263a33d38..bdbe73fe2eb7 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -1404,6 +1404,87 @@ struct migrate_pages_stats { > int nr_thp_split; > }; > > +static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, > + free_page_t put_new_page, unsigned long private, > + enum migrate_mode mode, int reason, > + struct migrate_pages_stats *stats, > + struct list_head *ret_folios) > +{ > + int retry = 1; > + int nr_failed = 0; > + int nr_retry_pages = 0; > + int pass = 0; > + struct folio *folio, *folio2; > + int rc = 0, nr_pages; > + > + for (pass = 0; pass < 10 && retry; pass++) { > + retry = 0; > + nr_retry_pages = 0; > + > + list_for_each_entry_safe(folio, folio2, from, lru) { > + if (!folio_test_hugetlb(folio)) > + continue; > + > + nr_pages = folio_nr_pages(folio); > + > + cond_resched(); > + > + rc = unmap_and_move_huge_page(get_new_page, > + put_new_page, private, > + &folio->page, pass > 2, mode, > + reason, ret_folios); > + /* > + * The rules are: > + * Success: hugetlb folio will be put back > + * -EAGAIN: stay on the from list > + * -ENOMEM: stay on the from list > + * -ENOSYS: stay on the from list > + * Other errno: put on ret_folios list > + */ > + switch(rc) { > + case -ENOSYS: > + /* Hugetlb migration is unsupported */ > + nr_failed++; > + stats->nr_failed_pages += nr_pages; > + list_move_tail(&folio->lru, ret_folios); > + break; > + case -ENOMEM: > + /* > + * When memory is low, don't bother to try to migrate > + * other folios, just exit. > + */ > + nr_failed++; This currently isn't relevant for -ENOMEM and I think it would be clearer if it was dropped. > + stats->nr_failed_pages += nr_pages; Makes sense not to continue migration with low memory, but shouldn't we add the remaining unmigrated hugetlb folios to stats->nr_failed_pages as well? Ie. don't we still have to continue the iteration to to find and account for these? > + goto out; Given this is the only use of the out label, and that there is a special case for -ENOMEM there anyway I think it would be clearer to return directly. > + case -EAGAIN: > + retry++; > + nr_retry_pages += nr_pages; > + break; > + case MIGRATEPAGE_SUCCESS: > + stats->nr_succeeded += nr_pages; > + break; > + default: > + /* > + * Permanent failure (-EBUSY, etc.): > + * unlike -EAGAIN case, the failed folio is > + * removed from migration folio list and not > + * retried in the next outer loop. > + */ > + nr_failed++; > + stats->nr_failed_pages += nr_pages; > + break; > + } > + } > + } > +out: > + nr_failed += retry; > + stats->nr_failed_pages += nr_retry_pages; > + if (rc != -ENOMEM) > + rc = nr_failed; > + > + return rc; > +} > + > /* > * migrate_pages - migrate the folios specified in a list, to the free folios > * supplied as the target for the page migration > @@ -1437,7 +1518,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, > int retry = 1; > int large_retry = 1; > int thp_retry = 1; > - int nr_failed = 0; > + int nr_failed; > int nr_retry_pages = 0; > int nr_large_failed = 0; > int pass = 0; > @@ -1454,6 +1535,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, > trace_mm_migrate_pages_start(mode, reason); > > memset(&stats, 0, sizeof(stats)); > + rc = migrate_hugetlbs(from, get_new_page, put_new_page, private, mode, reason, > + &stats, &ret_folios); > + if (rc < 0) > + goto out; > + nr_failed = rc; > + > split_folio_migration: > for (pass = 0; pass < 10 && (retry || large_retry); pass++) { > retry = 0; > @@ -1462,30 +1549,28 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, > nr_retry_pages = 0; > > list_for_each_entry_safe(folio, folio2, from, lru) { > + if (folio_test_hugetlb(folio)) { How do we hit this case? Shouldn't migrate_hugetlbs() have already moved any hugetlb folios off the from list? > + list_move_tail(&folio->lru, &ret_folios); > + continue; > + } > + > /* > * Large folio statistics is based on the source large > * folio. Capture required information that might get > * lost during migration. > */ > - is_large = folio_test_large(folio) && !folio_test_hugetlb(folio); > + is_large = folio_test_large(folio); > is_thp = is_large && folio_test_pmd_mappable(folio); > nr_pages = folio_nr_pages(folio); > + > cond_resched(); > > - if (folio_test_hugetlb(folio)) > - rc = unmap_and_move_huge_page(get_new_page, > - put_new_page, private, > - &folio->page, pass > 2, mode, > - reason, > - &ret_folios); > - else > - rc = unmap_and_move(get_new_page, put_new_page, > - private, folio, pass > 2, mode, > - reason, &ret_folios); > + rc = unmap_and_move(get_new_page, put_new_page, > + private, folio, pass > 2, mode, > + reason, &ret_folios); > /* > * The rules are: > - * Success: non hugetlb folio will be freed, hugetlb > - * folio will be put back > + * Success: folio will be freed > * -EAGAIN: stay on the from list > * -ENOMEM: stay on the from list > * -ENOSYS: stay on the from list > @@ -1512,7 +1597,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, > stats.nr_thp_split += is_thp; > break; > } > - /* Hugetlb migration is unsupported */ > } else if (!no_split_folio_counting) { > nr_failed++; > }