check_and_migrate_movable_pages() does not honor isolation errors, and also retries migration failures indefinably. Fix both of the above issues: add a new function that checks and unpins pages range check_and_unpin_pages(). Move the retry loop from check_and_migrate_movable_pages() to __gup_longterm_locked(). Rename check_and_migrate_movable_pages() as migrate_movable_pages() and make this function accept already unpinned pages. Also, track the errors during isolation, so they can be re-tried with a different maximum limit, the isolation errors should be ephemeral. Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> --- mm/gup.c | 179 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 111 insertions(+), 68 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 1ebb7cc2fbe4..70cc8b8f67c4 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1550,27 +1550,57 @@ struct page *get_dump_page(unsigned long addr) } #endif /* CONFIG_ELF_CORE */ -static long check_and_migrate_movable_pages(struct mm_struct *mm, - unsigned long start, - unsigned long nr_pages, - struct page **pages, - struct vm_area_struct **vmas, - unsigned int gup_flags) -{ - unsigned long i; - unsigned long step; - bool drain_allow = true; - bool migrate_allow = true; +/* + * Verify that there are no unpinnable (movable) pages, if so return true. + * Otherwise an unpinnable pages is found return false, and unpin all pages. + */ +static bool check_and_unpin_pages(unsigned long nr_pages, struct page **pages, + unsigned int gup_flags) +{ + unsigned long i, step; + + for (i = 0; i < nr_pages; i += step) { + struct page *head = compound_head(pages[i]); + + step = compound_nr(head) - (pages[i] - head); + if (!is_pinnable_page(head)) + break; + } + + if (i >= nr_pages) + return true; + + if (gup_flags & FOLL_PIN) { + unpin_user_pages(pages, nr_pages); + } else { + for (i = 0; i < nr_pages; i++) + put_page(pages[i]); + } + + return false; +} + +#define PINNABLE_MIGRATE_MAX 10 +#define PINNABLE_ISOLATE_MAX 100 + +/* + * Migrate pages that cannot be pinned. Return zero on success and error code + * on migration failure. If migration was successful but page isolation had + * failures return number of pages that failed to be isolated. + */ +static long migrate_movable_pages(unsigned long nr_pages, struct page **pages) +{ + unsigned long i, step; LIST_HEAD(movable_page_list); - long ret = nr_pages; + long ret = 0; + long error_count = 0; struct migration_target_control mtc = { .nid = NUMA_NO_NODE, .gfp_mask = GFP_USER | __GFP_NOWARN, }; -check_again: - for (i = 0; i < nr_pages;) { - + lru_add_drain_all(); + for (i = 0; i < nr_pages; i += step) { struct page *head = compound_head(pages[i]); /* @@ -1583,62 +1613,42 @@ static long check_and_migrate_movable_pages(struct mm_struct *mm, * these entries, try to move them out if possible. */ if (!is_pinnable_page(head)) { - if (PageHuge(head)) - isolate_huge_page(head, &movable_page_list); - else { - if (!PageLRU(head) && drain_allow) { - lru_add_drain_all(); - drain_allow = false; - } - + if (PageHuge(head)) { + if (!isolate_huge_page(head, &movable_page_list)) + error_count += step; + } else { if (!isolate_lru_page(head)) { list_add_tail(&head->lru, &movable_page_list); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), thp_nr_pages(head)); + } else { + error_count += step; } } } - - i += step; } if (!list_empty(&movable_page_list)) { - /* - * drop the above get_user_pages reference. - */ - if (gup_flags & FOLL_PIN) - unpin_user_pages(pages, nr_pages); - else - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); + ret = migrate_pages(&movable_page_list, alloc_migration_target, + NULL, (unsigned long)&mtc, MIGRATE_SYNC, + MR_LONGTERM_PIN); + /* Assume -EBUSY failure if some pages were not migrated */ + if (ret > 0) + ret = -EBUSY; + } - if (migrate_pages(&movable_page_list, alloc_migration_target, NULL, - (unsigned long)&mtc, MIGRATE_SYNC, MR_LONGTERM_PIN)) { - /* - * some of the pages failed migration. Do get_user_pages - * without migration. - */ - migrate_allow = false; + if (ret && !list_empty(&movable_page_list)) + putback_movable_pages(&movable_page_list); - if (!list_empty(&movable_page_list)) - putback_movable_pages(&movable_page_list); - } - /* - * We did migrate all the pages, Try to get the page references - * again migrating any pages which we failed to isolate earlier. - */ - ret = __get_user_pages_locked(mm, start, nr_pages, - pages, vmas, NULL, - gup_flags); - - if ((ret > 0) && migrate_allow) { - nr_pages = ret; - drain_allow = true; - goto check_again; - } - } + /* + * Check if there were isolation errors, if so they should not be + * counted toward PINNABLE_MIGRATE_MAX, so separate them, by + * returning number of pages failed to isolate. + */ + if (!ret && error_count) + ret = error_count; return ret; } @@ -1654,22 +1664,55 @@ static long __gup_longterm_locked(struct mm_struct *mm, struct vm_area_struct **vmas, unsigned int gup_flags) { - unsigned long flags = 0; + int migrate_retry = 0; + int isolate_retry = 0; + unsigned int flags; long rc; - if (gup_flags & FOLL_LONGTERM) - flags = memalloc_pin_save(); + if (!(gup_flags & FOLL_LONGTERM)) + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, + NULL, gup_flags); - rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, - gup_flags); + /* + * Without FOLL_WRITE fault handler may return zero page, which can + * be in a movable zone, and also will fail to isolate during migration, + * thus the longterm pin will fail. + */ + gup_flags &= FOLL_WRITE; - if (gup_flags & FOLL_LONGTERM) { - if (rc > 0) - rc = check_and_migrate_movable_pages(mm, start, rc, - pages, vmas, - gup_flags); - memalloc_pin_restore(flags); + flags = memalloc_pin_save(); + /* + * Migration may fail, we retry before giving up. Also, because after + * migration pages[] becomes outdated, we unpin and repin all pages + * in the range, so pages array is repopulated with new values. + * Also, because of this we cannot retry migration failures in a loop + * without pinning/unpinnig pages. + */ + for (; ; ) { + rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, + NULL, gup_flags); + + /* Return if error or if all pages are pinnable */ + if (rc <= 0 || check_and_unpin_pages(rc, pages, gup_flags)) + break; + + /* Some pages are not pinnable, migrate them */ + rc = migrate_movable_pages(rc, pages); + + /* + * If there is an error, and we tried maximum number of times + * bail out. Notice: we return an error code, and all pages are + * unpinned + */ + if (rc < 0 && migrate_retry++ >= PINNABLE_MIGRATE_MAX) { + break; + } else if (rc > 0 && isolate_retry++ >= PINNABLE_ISOLATE_MAX) { + rc = -EBUSY; + break; + } } + memalloc_pin_restore(flags); + return rc; } -- 2.25.1