The large folio is mapped with folio size aligned virtual address during the pagefault, eg, 'addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE)' in do_anonymous_page(), but after the mremap(), the virtual address only require PAGE_SIZE aligned, also pte is moved to new in move_page_tables(), then traverse the new pte in numa_rebuild_large_mapping() will hint the following issue, Unable to handle kernel paging request at virtual address 00000a80c021a788 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00002040341a6000 [00000a80c021a788] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP ... CPU: 76 PID: 15187 Comm: git Kdump: loaded Tainted: G W 6.10.0-rc2+ #209 Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 1.79 08/21/2021 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : numa_rebuild_large_mapping+0x338/0x638 lr : numa_rebuild_large_mapping+0x320/0x638 sp : ffff8000b41c3b00 x29: ffff8000b41c3b30 x28: ffff8000812a0000 x27: 00000000000a8000 x26: 00000000000000a8 x25: 0010000000000001 x24: ffff20401c7170f0 x23: 0000ffff33a1e000 x22: 0000ffff33a76000 x21: ffff20400869eca0 x20: 0000ffff33976000 x19: 00000000000000a8 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000020 x15: ffff8000b41c36a8 x14: 0000000000000000 x13: 205d373831353154 x12: 5b5d333331363732 x11: 000000000011ff78 x10: 000000000011ff10 x9 : ffff800080273f30 x8 : 000000320400869e x7 : c0000000ffffd87f x6 : 00000000001e6ba8 x5 : ffff206f3fb5af88 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : fffffdffc0000000 x0 : 00000a80c021a780 Call trace: numa_rebuild_large_mapping+0x338/0x638 do_numa_page+0x3e4/0x4e0 handle_pte_fault+0x1bc/0x238 __handle_mm_fault+0x20c/0x400 handle_mm_fault+0xa8/0x288 do_page_fault+0x124/0x498 do_translation_fault+0x54/0x80 do_mem_abort+0x4c/0xa8 el0_da+0x40/0x110 el0t_64_sync_handler+0xe4/0x158 el0t_64_sync+0x188/0x190 Fix it by correct the start and end, which may lead to only rebuild part of large mapping in one numa page fault, there is no issue since other part could rebuild by another pagefault. Fixes: d2136d749d76 ("mm: support multi-size THP numa balancing") Signed-off-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx> --- mm/memory.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index db9130488231..0ad57b6485ca 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5223,15 +5223,21 @@ static void numa_rebuild_single_mapping(struct vm_fault *vmf, struct vm_area_str update_mmu_cache_range(vmf, vma, fault_addr, fault_pte, 1); } -static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_struct *vma, - struct folio *folio, pte_t fault_pte, - bool ignore_writable, bool pte_write_upgrade) +static void numa_rebuild_large_mapping(struct vm_fault *vmf, + struct vm_area_struct *vma, struct folio *folio, int nr_pages, + pte_t fault_pte, bool ignore_writable, bool pte_write_upgrade) { int nr = pte_pfn(fault_pte) - folio_pfn(folio); - unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start); - unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end); - pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE; - unsigned long addr; + unsigned long folio_size = nr_pages * PAGE_SIZE; + unsigned long addr = vmf->address; + unsigned long start, end, align_addr; + pte_t *start_ptep; + + align_addr = ALIGN_DOWN(addr, folio_size); + start = max3(addr - nr * PAGE_SIZE, align_addr, vma->vm_start); + end = min3(addr + (nr_pages - nr) * PAGE_SIZE, align_addr + folio_size, + vma->vm_end); + start_ptep = vmf->pte - (addr - start) / PAGE_SIZE; /* Restore all PTEs' mapping of the large folio */ for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) { @@ -5361,8 +5367,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) * non-accessible ptes, some can allow access by kernel mode. */ if (folio && folio_test_large(folio)) - numa_rebuild_large_mapping(vmf, vma, folio, pte, ignore_writable, - pte_write_upgrade); + numa_rebuild_large_mapping(vmf, vma, folio, nr_pages, pte, + ignore_writable, pte_write_upgrade); else numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); -- 2.27.0