On Mon, Aug 5, 2024 at 8:21 PM Yu Zhao <yuzhao@xxxxxxxxxx> wrote: > > Pause remote CPUs so that the local CPU can follow the proper BBM > sequence to safely update the vmemmap mapping `struct page` areas. > > While updating the vmemmap, it is guaranteed that neither the local > CPU nor the remote ones will access the `struct page` area being > updated, and therefore they will not trigger kernel PFs. > > Signed-off-by: Yu Zhao <yuzhao@xxxxxxxxxx> > --- > arch/arm64/include/asm/pgalloc.h | 55 ++++++++++++++++++++++++++++++++ > mm/hugetlb_vmemmap.c | 14 ++++++++ > 2 files changed, 69 insertions(+) > > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > index 8ff5f2a2579e..1af1aa34a351 100644 > --- a/arch/arm64/include/asm/pgalloc.h > +++ b/arch/arm64/include/asm/pgalloc.h > @@ -12,6 +12,7 @@ > #include <asm/processor.h> > #include <asm/cacheflush.h> > #include <asm/tlbflush.h> > +#include <asm/cpu.h> > > #define __HAVE_ARCH_PGD_FREE > #define __HAVE_ARCH_PUD_FREE > @@ -137,4 +138,58 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) > __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); > } > > +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP > + > +#define vmemmap_update_lock vmemmap_update_lock > +static inline void vmemmap_update_lock(void) > +{ > + cpus_read_lock(); > +} > + > +#define vmemmap_update_unlock vmemmap_update_unlock > +static inline void vmemmap_update_unlock(void) > +{ > + cpus_read_unlock(); > +} > + > +#define vmemmap_update_pte vmemmap_update_pte > +static inline void vmemmap_update_pte(unsigned long addr, pte_t *ptep, pte_t pte) > +{ > + preempt_disable(); > + pause_remote_cpus(); > + > + pte_clear(&init_mm, addr, ptep); > + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > + set_pte_at(&init_mm, addr, ptep, pte); > + > + resume_remote_cpus(); > + preempt_enable(); > +} Note that I kept this API from Nanyong for the sake of discussion. What I actually plan to test in our production is: #define vmemmap_update_pte_range_start vmemmap_update_pte_range_start static inline void vmemmap_update_pte_range_start(pte_t *pte, unsigned long start, unsigned long end) { preempt_disable(); pause_remote_cpus(); for (; start != end; start += PAGE_SIZE, pte++) pte_clear(&init_mm, start, pte); flush_tlb_kernel_range(start, end); } #define vmemmap_update_pte_range_end vmemmap_update_pte_range_end static inline void vmemmap_update_pte_range_end(void) { resume_remote_cpus(); preempt_enable(); } > +#define vmemmap_update_pmd vmemmap_update_pmd > +static inline void vmemmap_update_pmd(unsigned long addr, pmd_t *pmdp, pte_t *ptep) > +{ > + preempt_disable(); > + pause_remote_cpus(); > + > + pmd_clear(pmdp); > + flush_tlb_kernel_range(addr, addr + PMD_SIZE); > + pmd_populate_kernel(&init_mm, pmdp, ptep); > + > + resume_remote_cpus(); > + preempt_enable(); > +} > + > +#define vmemmap_flush_tlb_all vmemmap_flush_tlb_all > +static inline void vmemmap_flush_tlb_all(void) > +{ > +} > + > +#define vmemmap_flush_tlb_range vmemmap_flush_tlb_range > +static inline void vmemmap_flush_tlb_range(unsigned long start, unsigned long end) > +{ > +} > + > +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ > + > #endif > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > index 2dd92e58f304..893c73493d9c 100644 > --- a/mm/hugetlb_vmemmap.c > +++ b/mm/hugetlb_vmemmap.c > @@ -46,6 +46,18 @@ struct vmemmap_remap_walk { > unsigned long flags; > }; > > +#ifndef vmemmap_update_lock > +static void vmemmap_update_lock(void) > +{ > +} > +#endif > + > +#ifndef vmemmap_update_unlock > +static void vmemmap_update_unlock(void) > +{ > +} > +#endif > + > #ifndef vmemmap_update_pmd > static inline void vmemmap_update_pmd(unsigned long addr, > pmd_t *pmdp, pte_t *ptep) > @@ -194,10 +206,12 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, > > VM_BUG_ON(!PAGE_ALIGNED(start | end)); > > + vmemmap_update_lock(); > mmap_read_lock(&init_mm); > ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops, > NULL, walk); > mmap_read_unlock(&init_mm); > + vmemmap_update_unlock(); > if (ret) > return ret; > > -- > 2.46.0.rc2.264.g509ed76dc8-goog >