Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> writes: > On Tue, May 21, 2024 at 1:49 PM Björn Töpel <bjorn@xxxxxxxxxx> wrote: >> >> From: Björn Töpel <bjorn@xxxxxxxxxxxx> >> >> For an architecture to support memory hotplugging, a couple of >> callbacks needs to be implemented: >> >> arch_add_memory() >> This callback is responsible for adding the physical memory into the >> direct map, and call into the memory hotplugging generic code via >> __add_pages() that adds the corresponding struct page entries, and >> updates the vmemmap mapping. >> >> arch_remove_memory() >> This is the inverse of the callback above. >> >> vmemmap_free() >> This function tears down the vmemmap mappings (if >> CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the >> backing vmemmap pages. Note that for persistent memory, an >> alternative allocator for the backing pages can be used; The >> vmem_altmap. This means that when the backing pages are cleared, >> extra care is needed so that the correct deallocation method is >> used. >> >> arch_get_mappable_range() >> This functions returns the PA range that the direct map can map. >> Used by the MHP internals for sanity checks. >> >> The page table unmap/teardown functions are heavily based on code from >> the x86 tree. The same remove_pgd_mapping() function is used in both >> vmemmap_free() and arch_remove_memory(), but in the latter function >> the backing pages are not removed. >> >> Signed-off-by: Björn Töpel <bjorn@xxxxxxxxxxxx> >> --- >> arch/riscv/mm/init.c | 261 +++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 261 insertions(+) >> >> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >> index 6f72b0b2b854..6693b742bf2f 100644 >> --- a/arch/riscv/mm/init.c >> +++ b/arch/riscv/mm/init.c >> @@ -1493,3 +1493,264 @@ void __init pgtable_cache_init(void) >> } >> } >> #endif >> + >> +#ifdef CONFIG_MEMORY_HOTPLUG >> +static void __meminit free_pagetable(struct page *page, int order) >> +{ >> + unsigned int nr_pages = 1 << order; >> + >> + /* >> + * vmemmap/direct page tables can be reserved, if added at >> + * boot. >> + */ >> + if (PageReserved(page)) { >> + __ClearPageReserved(page); > > What's the difference between __ClearPageReserved() and > ClearPageReserved()? Because it seems like free_reserved_page() calls > the latter already, so why would you need to call > __ClearPageReserved() on the first page? Indeed! x86 copy pasta (which uses bootmem info page that RV doesn't). >> + while (nr_pages--) >> + free_reserved_page(page++); >> + return; >> + } >> + >> + free_pages((unsigned long)page_address(page), order); >> +} >> + >> +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) >> +{ >> + pte_t *pte; >> + int i; >> + >> + for (i = 0; i < PTRS_PER_PTE; i++) { >> + pte = pte_start + i; >> + if (!pte_none(*pte)) >> + return; >> + } >> + >> + free_pagetable(pmd_page(*pmd), 0); >> + pmd_clear(pmd); >> +} >> + >> +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) >> +{ >> + pmd_t *pmd; >> + int i; >> + >> + for (i = 0; i < PTRS_PER_PMD; i++) { >> + pmd = pmd_start + i; >> + if (!pmd_none(*pmd)) >> + return; >> + } >> + >> + free_pagetable(pud_page(*pud), 0); >> + pud_clear(pud); >> +} >> + >> +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) >> +{ >> + pud_t *pud; >> + int i; >> + >> + for (i = 0; i < PTRS_PER_PUD; i++) { >> + pud = pud_start + i; >> + if (!pud_none(*pud)) >> + return; >> + } >> + >> + free_pagetable(p4d_page(*p4d), 0); >> + p4d_clear(p4d); >> +} >> + >> +static void __meminit free_vmemmap_storage(struct page *page, size_t size, >> + struct vmem_altmap *altmap) >> +{ >> + if (altmap) >> + vmem_altmap_free(altmap, size >> PAGE_SHIFT); >> + else >> + free_pagetable(page, get_order(size)); >> +} >> + >> +static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + pte_t *ptep, pte; >> + >> + for (; addr < end; addr = next) { >> + next = (addr + PAGE_SIZE) & PAGE_MASK; > > Nit: use ALIGN() instead. > >> + if (next > end) >> + next = end; >> + >> + ptep = pte_base + pte_index(addr); >> + pte = READ_ONCE(*ptep); > > Nit: Use ptep_get() > >> + >> + if (!pte_present(*ptep)) >> + continue; >> + >> + pte_clear(&init_mm, addr, ptep); >> + if (is_vmemmap) >> + free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); >> + } >> +} >> + >> +static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + pte_t *pte_base; >> + pmd_t *pmdp, pmd; >> + >> + for (; addr < end; addr = next) { >> + next = pmd_addr_end(addr, end); >> + pmdp = pmd_base + pmd_index(addr); >> + pmd = READ_ONCE(*pmdp); > > Nit: Use pmdp_get() > >> + >> + if (!pmd_present(pmd)) >> + continue; >> + >> + if (pmd_leaf(pmd)) { >> + pmd_clear(pmdp); >> + if (is_vmemmap) >> + free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap); >> + continue; >> + } >> + >> + pte_base = (pte_t *)pmd_page_vaddr(*pmdp); >> + remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); >> + free_pte_table(pte_base, pmdp); >> + } >> +} >> + >> +static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + pud_t *pudp, pud; >> + pmd_t *pmd_base; >> + >> + for (; addr < end; addr = next) { >> + next = pud_addr_end(addr, end); >> + pudp = pud_base + pud_index(addr); >> + pud = READ_ONCE(*pudp); > > Nit: Use pudp_get() > >> + >> + if (!pud_present(pud)) >> + continue; >> + >> + if (pud_leaf(pud)) { >> + if (pgtable_l4_enabled) { >> + pud_clear(pudp); >> + if (is_vmemmap) >> + free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap); >> + } >> + continue; >> + } >> + >> + pmd_base = pmd_offset(pudp, 0); >> + remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); >> + >> + if (pgtable_l4_enabled) >> + free_pmd_table(pmd_base, pudp); >> + } >> +} >> + >> +static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + p4d_t *p4dp, p4d; >> + pud_t *pud_base; >> + >> + for (; addr < end; addr = next) { >> + next = p4d_addr_end(addr, end); >> + p4dp = p4d_base + p4d_index(addr); >> + p4d = READ_ONCE(*p4dp); > > Nit: Use p4dp_get() ...and I'll make sure to address these nits as well. Thanks! Björn