Sachin Sant <sachinp@xxxxxxxxxxxxx> writes: >> 1. First try to map things using PMD (2M) >> 2. With altmap if altmap cross-boundary check returns true, fall back to PAGE_SIZE >> 3. IF we can't allocate PMD_SIZE backing memory for vmemmap, fallback to PAGE_SIZE >> >> On removing vmemmap mapping, check if every subsection that is using the vmemmap >> area is invalid. If found to be invalid, that implies we can safely free the >> vmemmap area. We don't use the PAGE_UNUSED pattern used by x86 because with 64K >> page size, we need to do the above check even at the PAGE_SIZE granularity. >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> >> --- > > With this patch series applied I see the following warning > > [ OK ] Started Monitoring of LVM2 mirrors,…sing dmeventd or progress polling. > [ 3.283884] papr_scm ibm,persistent-memory:ibm,pmemory@44104001: nvdimm pmu didn't register rc=-2 > [ 3.284212] papr_scm ibm,persistent-memory:ibm,pmemory@44104002: nvdimm pmu didn't register rc=-2 > [ 3.563890] radix-mmu: Mapped 0x0000040010000000-0x0000040c90000000 with 64.0 KiB pages > [ 3.703227] ------------[ cut here ]------------ > [ 3.703236] failed to free all reserved pages > [ 3.703244] WARNING: CPU: 41 PID: 923 at mm/memremap.c:152 memunmap_pages+0x37c/0x3a0 > [ 3.703252] Modules linked in: device_dax(+) nd_pmem nd_btt dax_pmem papr_scm libnvdimm pseries_rng vmx_crypto aes_gcm_p10_crypto ext4 mbcache jbd2 sd_mod t10_pi crc64_rocksoft crc64 sg ibmvscsi scsi_transport_srp ibmveth fuse > [ 3.703272] CPU: 41 PID: 923 Comm: systemd-udevd Not tainted 6.4.0-rc6-00037-gb6dad5178cea-dirty #1 > [ 3.703276] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1030.20 (NH1030_058) hv:phyp pSeries > [ 3.703280] NIP: c00000000057a18c LR: c00000000057a188 CTR: 00000000005ca81c > [ 3.703283] REGS: c000000032a170d0 TRAP: 0700 Not tainted (6.4.0-rc6-00037-gb6dad5178cea-dirty) > [ 3.703286] MSR: 800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 48248824 XER: 00000002 > [ 3.703296] CFAR: c00000000015f0c0 IRQMASK: 0 [ 3.703296] GPR00: c00000000057a188 c000000032a17370 c000000001421500 0000000000000021 [ 3.703296] GPR04: 00000000ffff7fff c000000032a17140 c000000032a17138 0000000000000027 [ 3.703296] GPR08: c0000015c91a7c10 0000000000000001 0000000000000027 c000000002a18a20 [ 3.703296] GPR12: 0000000048248824 c0000015cb9f4300 c000000032a17d68 c000000001262b20 [ 3.703296] GPR16: c008000001310000 000000000000ff20 000000000000fff2 c0080000012d7418 [ 3.703296] GPR20: c000000032a17c30 0000000000000004 ffffffffffffc005 0000000001000200 [ 3.703296] GPR24: c000000002f11570 c00000000e376870 0000000000000001 0000000000000001 [ 3.703296] GPR28: c00000000e376840 c00000000e3768c8 0000000000000000 c00000000e376840 [ 3.703333] NIP [c00000000057a18c] memunmap_pages+0x37c/0x3a0 > [ 3.703338] LR [c00000000057a188] memunmap_pages+0x378/0x3a0 > [ 3.703342] Call Trace: > [ 3.703344] [c000000032a17370] [c00000000057a188] memunmap_pages+0x378/0x3a0 (unreliable) > [ 3.703349] [c000000032a17420] [c00000000057a928] memremap_pages+0x4a8/0x890 > [ 3.703355] [c000000032a17500] [c00000000057ad4c] devm_memremap_pages+0x3c/0xd0 > [ 3.703359] [c000000032a17540] [c0080000011c084c] dev_dax_probe+0x134/0x3a0 [device_dax] > [ 3.703366] [c000000032a175e0] [c0000000009f7e8c] dax_bus_probe+0xac/0x140 > [ 3.703371] [c000000032a17610] [c0000000009b5828] really_probe+0x108/0x530 > [ 3.703375] [c000000032a176a0] [c0000000009b5d04] __driver_probe_device+0xb4/0x200 > [ 3.703379] [c000000032a17720] [c0000000009b5ea8] driver_probe_device+0x58/0x120 > [ 3.703383] [c000000032a17760] [c0000000009b6298] __driver_attach+0x148/0x250 > [ 3.703387] [c000000032a177e0] [c0000000009b1a58] bus_for_each_dev+0xa8/0x130 > [ 3.703392] [c000000032a17840] [c0000000009b4b34] driver_attach+0x34/0x50 > [ 3.703396] [c000000032a17860] [c0000000009b3b98] bus_add_driver+0x258/0x300 > [ 3.703400] [c000000032a178f0] [c0000000009b78d4] driver_register+0xa4/0x1b0 > [ 3.703404] [c000000032a17960] [c0000000009f9530] __dax_driver_register+0x50/0x70 > [ 3.703409] [c000000032a17980] [c0080000011c1374] dax_init+0x3c/0x58 [device_dax] > [ 3.703414] [c000000032a179a0] [c000000000013260] do_one_initcall+0x60/0x2f0 > [ 3.703418] [c000000032a17a70] [c000000000248af8] do_init_module+0x78/0x310 > [ 3.703424] [c000000032a17af0] [c00000000024bcac] load_module+0x2a7c/0x2f30 > [ 3.703429] [c000000032a17d00] [c00000000024c4f0] __do_sys_finit_module+0xe0/0x180 > [ 3.703434] [c000000032a17e10] [c0000000000374c0] system_call_exception+0x140/0x350 > [ 3.703439] [c000000032a17e50] [c00000000000d6a0] system_call_common+0x160/0x2e4 > [ 3.703444] --- interrupt: c00 at 0x7fff9af2fb34 > [ 3.703447] NIP: 00007fff9af2fb34 LR: 00007fff9b6dea9c CTR: 0000000000000000 > [ 3.703450] REGS: c000000032a17e80 TRAP: 0c00 Not tainted (6.4.0-rc6-00037-gb6dad5178cea-dirty) > [ 3.703453] MSR: 800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222204 XER: 00000000 > [ 3.703462] IRQMASK: 0 [ 3.703462] GPR00: 0000000000000161 00007fffed351350 00007fff9b007300 000000000000000f [ 3.703462] GPR04: 00007fff9b6ead30 0000000000000000 000000000000000f 0000000000000000 [ 3.703462] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 3.703462] GPR12: 0000000000000000 00007fff9b7c6610 0000000000020000 000000011057db18 [ 3.703462] GPR16: 00000001105c0108 0000000110585f48 0000000000000000 0000000000000000 [ 3.703462] GPR20: 0000000000000000 0000000110585f80 0000000147985200 00007fffed351570 [ 3.703462] GPR24: 00000001105c0128 0000000000020000 0000000000000000 0000000147981010 [ 3.703462] GPR28: 00007fff9b6ead30 0000000000020000 0000000000000000 0000000147985200 [ 3.703497] NIP [00007fff9af2fb34] 0x7fff9af2fb34 > [ 3.703499] LR [00007fff9b6dea9c] 0x7fff9b6dea9c > [ 3.703502] --- interrupt: c00 > [ 3.703504] Code: 60000000 3d220170 8929b2b7 2f890000 409eff28 3c62ffe7 39200001 3d420170 3863c518 992ab2b7 4bbe4e55 60000000 <0fe00000> fac10060 fae10068 fb010070 [ 3.703516] ---[ end trace 0000000000000000 ]--- > [ 3.703520] device_dax: probe of dax0.0 failed with error -12 > [ OK ] Created slice system-daxdev\x2dreconfigure.slice. > [ OK ] Started udev Wait for Complete Device Initialization. > [ OK ] Reached target Local File Systems (Pre). > [ OK ] Reached target Local File Systems. > The below change fixed the warning on the test machine you shared. diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 1c49af91fd9c..d884c1b39128 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -994,6 +994,7 @@ void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node, pte_t entry; pte_t *ptep = pmdp_ptep(pmdp); + VM_BUG_ON(!IS_ALIGNED((unsigned long)(addr), PMD_SIZE)); entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); set_pte_at(&init_mm, addr, ptep, entry); asm volatile("ptesync": : :"memory"); @@ -1012,6 +1013,10 @@ static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmd, unsigned long a void *p; if (!reuse) { + + if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE)) + altmap = NULL; + p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); if (!p) return NULL; @@ -1028,6 +1033,8 @@ static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmd, unsigned long a get_page(reuse); p = page_to_virt(reuse); } + + VM_BUG_ON(!PAGE_ALIGNED(addr)); entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); asm volatile("ptesync": : :"memory"); @@ -1108,10 +1115,14 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd = vmemmap_pmd_alloc(pud, node, addr); if (!pmd) return -ENOMEM; + if (pmd_none(READ_ONCE(*pmd))) { void *p; - if (altmap && altmap_cross_boundary(altmap, start, PMD_SIZE)) { + if (!IS_ALIGNED(addr, PMD_SIZE)) + goto base_mapping; + + if (altmap && altmap_cross_boundary(altmap, addr, PMD_SIZE)) { /* make sure we don't create altmap mappings covery things outside. */ goto base_mapping;