The patch titled Subject: selftests/vm: add selftest to verify multi THP collapse has been added to the -mm mm-unstable branch. Its filename is selftests-vm-add-selftest-to-verify-multi-thp-collapse.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/selftests-vm-add-selftest-to-verify-multi-thp-collapse.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: "Zach O'Keefe" <zokeefe@xxxxxxxxxx> Subject: selftests/vm: add selftest to verify multi THP collapse Date: Wed, 6 Jul 2022 16:59:36 -0700 Add support to allocate and verify collapse of multiple hugepage-sized regions into multiple THPs. Add "nr" argument to check_huge() that instructs check_huge() to check for exactly "nr_hpages" THPs. This has the added benefit of now being able to check for exactly 0 THPs, and so callsites that previously checked the negation of exactly 1 THP are now more correct. ->collapse struct collapse_context hook has been expanded with a "nr_hpages" argument to collapse "nr_hpages" hugepages. The collapse_full() test has been repurposed to collapse 4 THPs at once. It is expected more tests will want to test multi THP collapse (e.g. file/shmem). This is of particular benefit to madvise collapse context given that it may do many THP collapses during a single syscall. Link: https://lkml.kernel.org/r/20220706235936.2197195-19-zokeefe@xxxxxxxxxx Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx> Cc: Alex Shi <alex.shi@xxxxxxxxxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> Cc: Axel Rasmussen <axelrasmussen@xxxxxxxxxx> Cc: Chris Kennelly <ckennelly@xxxxxxxxxx> Cc: Chris Zankel <chris@xxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Helge Deller <deller@xxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Ivan Kokshaysky <ink@xxxxxxxxxxxxxxxxxxxx> Cc: James Bottomley <James.Bottomley@xxxxxxxxxxxxxxxxxxxxx> Cc: Jens Axboe <axboe@xxxxxxxxx> Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Matt Turner <mattst88@xxxxxxxxx> Cc: Max Filippov <jcmvbkbc@xxxxxxxxx> Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Pavel Begunkov <asml.silence@xxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Rongwei Wang <rongwei.wang@xxxxxxxxxxxxxxxxx> Cc: SeongJae Park <sj@xxxxxxxxxx> Cc: Song Liu <songliubraving@xxxxxx> Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Yang Shi <shy828301@xxxxxxxxx> Cc: Zi Yan <ziy@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- tools/testing/selftests/vm/khugepaged.c | 140 +++++++++++----------- 1 file changed, 73 insertions(+), 67 deletions(-) --- a/tools/testing/selftests/vm/khugepaged.c~selftests-vm-add-selftest-to-verify-multi-thp-collapse +++ a/tools/testing/selftests/vm/khugepaged.c @@ -27,7 +27,7 @@ static int hpage_pmd_nr; #define PID_SMAPS "/proc/self/smaps" struct collapse_context { - void (*collapse)(const char *msg, char *p, bool expect); + void (*collapse)(const char *msg, char *p, int nr_hpages, bool expect); bool enforce_pte_scan_limits; }; @@ -362,7 +362,7 @@ static bool check_for_pattern(FILE *fp, return false; } -static bool check_huge(void *addr) +static bool check_huge(void *addr, int nr_hpages) { bool thp = false; int ret; @@ -387,7 +387,7 @@ static bool check_huge(void *addr) goto err_out; ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB", - hpage_pmd_size >> 10); + nr_hpages * (hpage_pmd_size >> 10)); if (ret >= MAX_LINE_LENGTH) { printf("%s: Pattern is too long\n", __func__); exit(EXIT_FAILURE); @@ -455,12 +455,12 @@ err_out: return swap; } -static void *alloc_mapping(void) +static void *alloc_mapping(int nr) { void *p; - p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (p != BASE_ADDR) { printf("Failed to allocate VMA at %p\n", BASE_ADDR); exit(EXIT_FAILURE); @@ -485,11 +485,11 @@ static void *alloc_hpage(void) { void *p; - p = alloc_mapping(); + p = alloc_mapping(1); printf("Allocate huge page..."); madvise(p, hpage_pmd_size, MADV_HUGEPAGE); fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -509,14 +509,15 @@ static void validate_memory(int *p, unsi } } -static void madvise_collapse(const char *msg, char *p, bool expect) +static void madvise_collapse(const char *msg, char *p, int nr_hpages, + bool expect) { int ret; struct settings settings = *current_settings(); printf("%s...", msg); /* Sanity check */ - if (check_huge(p)) { + if (!check_huge(p, 0)) { printf("Unexpected huge page\n"); exit(EXIT_FAILURE); } @@ -529,11 +530,11 @@ static void madvise_collapse(const char push_settings(&settings); /* Clear VM_NOHUGEPAGE */ - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - ret = madvise(p, hpage_pmd_size, MADV_COLLAPSE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); + ret = madvise(p, nr_hpages * hpage_pmd_size, MADV_COLLAPSE); if (((bool)ret) == expect) fail("Fail: Bad return value"); - else if (check_huge(p) != expect) + else if (check_huge(p, nr_hpages) != expect) fail("Fail: check_huge()"); else success("OK"); @@ -542,25 +543,25 @@ static void madvise_collapse(const char } #define TICK 500000 -static bool wait_for_scan(const char *msg, char *p) +static bool wait_for_scan(const char *msg, char *p, int nr_hpages) { int full_scans; int timeout = 6; /* 3 seconds */ /* Sanity check */ - if (check_huge(p)) { + if (!check_huge(p, 0)) { printf("Unexpected huge page\n"); exit(EXIT_FAILURE); } - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); /* Wait until the second full_scan completed */ full_scans = read_num("khugepaged/full_scans") + 2; printf("%s...", msg); while (timeout--) { - if (check_huge(p)) + if (check_huge(p, nr_hpages)) break; if (read_num("khugepaged/full_scans") >= full_scans) break; @@ -568,20 +569,21 @@ static bool wait_for_scan(const char *ms usleep(TICK); } - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE); return timeout == -1; } -static void khugepaged_collapse(const char *msg, char *p, bool expect) +static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, + bool expect) { - if (wait_for_scan(msg, p)) { + if (wait_for_scan(msg, p, nr_hpages)) { if (expect) fail("Timeout"); else success("OK"); return; - } else if (check_huge(p) == expect) { + } else if (check_huge(p, nr_hpages) == expect) { success("OK"); } else { fail("Fail"); @@ -596,10 +598,10 @@ static void alloc_at_fault(void) settings.thp_enabled = THP_ALWAYS; push_settings(&settings); - p = alloc_mapping(); + p = alloc_mapping(1); *p = 1; printf("Allocate huge page on fault..."); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -608,7 +610,7 @@ static void alloc_at_fault(void) madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -618,20 +620,23 @@ static void alloc_at_fault(void) static void collapse_full(struct collapse_context *c) { void *p; + int nr_hpages = 4; + unsigned long size = nr_hpages * hpage_pmd_size; - p = alloc_mapping(); - fill_memory(p, 0, hpage_pmd_size); - c->collapse("Collapse fully populated PTE table", p, true); - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + p = alloc_mapping(nr_hpages); + fill_memory(p, 0, size); + c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages, + true); + validate_memory(p, 0, size); + munmap(p, size); } static void collapse_empty(struct collapse_context *c) { void *p; - p = alloc_mapping(); - c->collapse("Do not collapse empty PTE table", p, false); + p = alloc_mapping(1); + c->collapse("Do not collapse empty PTE table", p, 1, false); munmap(p, hpage_pmd_size); } @@ -639,10 +644,10 @@ static void collapse_single_pte_entry(st { void *p; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, page_size); c->collapse("Collapse PTE table with single PTE entry present", p, - true); + 1, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); } @@ -656,16 +661,17 @@ static void collapse_max_ptes_none(struc settings.khugepaged.max_ptes_none = max_ptes_none; push_settings(&settings); - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - c->collapse("Maybe collapse with max_ptes_none exceeded", p, + c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, !c->enforce_pte_scan_limits); validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); if (c->enforce_pte_scan_limits) { fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - c->collapse("Collapse with max_ptes_none PTEs empty", p, true); + c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, + true); validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); } @@ -677,7 +683,7 @@ static void collapse_max_ptes_none(struc static void collapse_swapin_single_pte(struct collapse_context *c) { void *p; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, hpage_pmd_size); printf("Swapout one page..."); @@ -692,7 +698,7 @@ static void collapse_swapin_single_pte(s goto out; } - c->collapse("Collapse with swapping in single PTE entry", p, true); + c->collapse("Collapse with swapping in single PTE entry", p, 1, true); validate_memory(p, 0, hpage_pmd_size); out: munmap(p, hpage_pmd_size); @@ -703,7 +709,7 @@ static void collapse_max_ptes_swap(struc int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); void *p; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, hpage_pmd_size); printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); @@ -718,7 +724,7 @@ static void collapse_max_ptes_swap(struc goto out; } - c->collapse("Maybe collapse with max_ptes_swap exceeded", p, + c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, !c->enforce_pte_scan_limits); validate_memory(p, 0, hpage_pmd_size); @@ -738,7 +744,7 @@ static void collapse_max_ptes_swap(struc } c->collapse("Collapse with max_ptes_swap pages swapped out", p, - true); + 1, true); validate_memory(p, 0, hpage_pmd_size); } out: @@ -753,13 +759,13 @@ static void collapse_single_pte_entry_co madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); printf("Split huge page leaving single PTE mapping compound page..."); madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Collapse PTE table with single PTE mapping compound page", - p, true); + p, 1, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); } @@ -772,12 +778,12 @@ static void collapse_full_of_compound(st printf("Split huge page leaving single PTE page table full of compound pages..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); - c->collapse("Collapse PTE table full of compound pages", p, true); + c->collapse("Collapse PTE table full of compound pages", p, 1, true); validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); } @@ -787,14 +793,14 @@ static void collapse_compound_extreme(st void *p; int i; - p = alloc_mapping(); + p = alloc_mapping(1); for (i = 0; i < hpage_pmd_nr; i++) { printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", i + 1, hpage_pmd_nr); madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); fill_memory(BASE_ADDR, 0, hpage_pmd_size); - if (!check_huge(BASE_ADDR)) { + if (!check_huge(BASE_ADDR, 1)) { printf("Failed to allocate huge page\n"); exit(EXIT_FAILURE); } @@ -823,12 +829,12 @@ static void collapse_compound_extreme(st munmap(BASE_ADDR, hpage_pmd_size); fill_memory(p, 0, hpage_pmd_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); - c->collapse("Collapse PTE table full of different compound pages", p, + c->collapse("Collapse PTE table full of different compound pages", p, 1, true); validate_memory(p, 0, hpage_pmd_size); @@ -840,11 +846,11 @@ static void collapse_fork(struct collaps int wstatus; void *p; - p = alloc_mapping(); + p = alloc_mapping(1); printf("Allocate small page..."); fill_memory(p, 0, page_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -855,14 +861,14 @@ static void collapse_fork(struct collaps skip_settings_restore = true; exit_status = 0; - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); fill_memory(p, page_size, 2 * page_size); c->collapse("Collapse PTE table with single page shared with parent process", - p, true); + p, 1, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); @@ -873,7 +879,7 @@ static void collapse_fork(struct collaps exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has small page..."); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -893,7 +899,7 @@ static void collapse_fork_compound(struc skip_settings_restore = true; exit_status = 0; - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -901,7 +907,7 @@ static void collapse_fork_compound(struc printf("Split huge page PMD in child process..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -909,7 +915,7 @@ static void collapse_fork_compound(struc write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); c->collapse("Collapse PTE table full of compound pages in child", - p, true); + p, 1, true); write_num("khugepaged/max_ptes_shared", current_settings()->khugepaged.max_ptes_shared); @@ -922,7 +928,7 @@ static void collapse_fork_compound(struc exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -943,7 +949,7 @@ static void collapse_max_ptes_shared(str skip_settings_restore = true; exit_status = 0; - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -951,26 +957,26 @@ static void collapse_max_ptes_shared(str printf("Trigger CoW on page %d of %d...", hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Maybe collapse with max_ptes_shared exceeded", p, - !c->enforce_pte_scan_limits); + 1, !c->enforce_pte_scan_limits); if (c->enforce_pte_scan_limits) { printf("Trigger CoW on page %d of %d...", hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Collapse with max_ptes_shared PTEs shared", - p, true); + p, 1, true); } validate_memory(p, 0, hpage_pmd_size); @@ -982,7 +988,7 @@ static void collapse_max_ptes_shared(str exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -995,7 +1001,7 @@ static void madvise_collapse_existing_th void *p; int err; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, hpage_pmd_size); printf("Collapse fully populated PTE table..."); @@ -1005,11 +1011,11 @@ static void madvise_collapse_existing_th * MADV_COLLAPSE in "madvise" mode. */ err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); - if (err == 0 && check_huge(p)) { + if (err == 0 && check_huge(p, 1)) { success("OK"); printf("Re-collapse PMD-mapped hugepage"); err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); - if (err == 0 && check_huge(p)) + if (err == 0 && check_huge(p, 1)) success("OK"); else fail("Fail"); _ Patches currently in -mm which might be from zokeefe@xxxxxxxxxx are mm-khugepaged-remove-redundant-transhuge_vma_suitable-check.patch mm-khugepaged-add-struct-collapse_control.patch mm-khugepaged-dedup-and-simplify-hugepage-alloc-and-charging.patch mm-khugepaged-pipe-enum-scan_result-codes-back-to-callers.patch mm-khugepaged-add-flag-to-predicate-khugepaged-only-behavior.patch mm-thp-add-flag-to-enforce-sysfs-thp-in-hugepage_vma_check.patch mm-khugepaged-record-scan_pmd_mapped-when-scan_pmd-finds-hugepage.patch mm-madvise-introduce-madv_collapse-sync-hugepage-collapse.patch mm-khugepaged-rename-prefix-of-shared-collapse-functions.patch mm-madvise-add-huge_memory-mm_madvise_collapse-tracepoint.patch mm-madvise-add-madv_collapse-to-process_madvise.patch proc-smaps-add-pmdmappable-field-to-smaps.patch selftests-vm-modularize-collapse-selftests.patch selftests-vm-dedup-hugepage-allocation-logic.patch selftests-vm-add-madv_collapse-collapse-context-to-selftests.patch selftests-vm-add-selftest-to-verify-recollapse-of-thps.patch selftests-vm-add-selftest-to-verify-multi-thp-collapse.patch