On x86_64, vmemmap is always PMD mapped if the machine has hugepages support and if we have 2MB contiguos pages and PMD aligned. If we want to free the unused vmemmap pages, we have to split the huge pmd firstly. So we should pre-allocate pgtable to split PMD to PTE. Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> --- mm/hugetlb_vmemmap.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/hugetlb_vmemmap.h | 12 +++++++++ 2 files changed, 85 insertions(+) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index a6c9948302e2..b7dfa97b4ea9 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -71,6 +71,8 @@ */ #define pr_fmt(fmt) "HugeTLB Vmemmap: " fmt +#include <linux/list.h> +#include <asm/pgalloc.h> #include "hugetlb_vmemmap.h" /* @@ -83,6 +85,77 @@ */ #define RESERVE_VMEMMAP_NR 2U +#ifndef VMEMMAP_HPAGE_SHIFT +#define VMEMMAP_HPAGE_SHIFT HPAGE_SHIFT +#endif +#define VMEMMAP_HPAGE_ORDER (VMEMMAP_HPAGE_SHIFT - PAGE_SHIFT) +#define VMEMMAP_HPAGE_NR (1 << VMEMMAP_HPAGE_ORDER) +#define VMEMMAP_HPAGE_SIZE ((1UL) << VMEMMAP_HPAGE_SHIFT) +#define VMEMMAP_HPAGE_MASK (~(VMEMMAP_HPAGE_SIZE - 1)) + +#define page_huge_pte(page) ((page)->pmd_huge_pte) + +static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h) +{ + return h->nr_free_vmemmap_pages; +} + +static inline unsigned int vmemmap_pages_per_hpage(struct hstate *h) +{ + return free_vmemmap_pages_per_hpage(h) + RESERVE_VMEMMAP_NR; +} + +static inline unsigned long vmemmap_pages_size_per_hpage(struct hstate *h) +{ + return (unsigned long)vmemmap_pages_per_hpage(h) << PAGE_SHIFT; +} + +static inline unsigned int pgtable_pages_to_prealloc_per_hpage(struct hstate *h) +{ + unsigned long vmemmap_size = vmemmap_pages_size_per_hpage(h); + + /* + * No need pre-allocate page tables when there is no vmemmap pages + * to free. + */ + if (!free_vmemmap_pages_per_hpage(h)) + return 0; + + return ALIGN(vmemmap_size, VMEMMAP_HPAGE_SIZE) >> VMEMMAP_HPAGE_SHIFT; +} + +void vmemmap_pgtable_free(struct page *page) +{ + struct page *pte_page, *t_page; + + list_for_each_entry_safe(pte_page, t_page, &page->lru, lru) { + list_del(&pte_page->lru); + pte_free_kernel(&init_mm, page_to_virt(pte_page)); + } +} + +int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page) +{ + unsigned int nr = pgtable_pages_to_prealloc_per_hpage(h); + + /* Store preallocated pages on huge page lru list */ + INIT_LIST_HEAD(&page->lru); + + while (nr--) { + pte_t *pte_p; + + pte_p = pte_alloc_one_kernel(&init_mm); + if (!pte_p) + goto out; + list_add(&virt_to_page(pte_p)->lru, &page->lru); + } + + return 0; +out: + vmemmap_pgtable_free(page); + return -ENOMEM; +} + void __init hugetlb_vmemmap_init(struct hstate *h) { unsigned int order = huge_page_order(h); diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index 40c0c7dfb60d..2a72d2f62411 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -9,12 +9,24 @@ #ifndef _LINUX_HUGETLB_VMEMMAP_H #define _LINUX_HUGETLB_VMEMMAP_H #include <linux/hugetlb.h> +#include <linux/mm.h> #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP void __init hugetlb_vmemmap_init(struct hstate *h); +int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page); +void vmemmap_pgtable_free(struct page *page); #else static inline void hugetlb_vmemmap_init(struct hstate *h) { } + +static inline int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page) +{ + return 0; +} + +static inline void vmemmap_pgtable_free(struct page *page) +{ +} #endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */ #endif /* _LINUX_HUGETLB_VMEMMAP_H */ -- 2.11.0