On Fri, Oct 21, 2022 at 04:36:24PM +0000, James Houghton wrote: > Currently it is possible for all shared VMAs to use HGM, but it must be > enabled first. This is because with HGM, we lose PMD sharing, and page > table walks require additional synchronization (we need to take the VMA > lock). > > Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> > --- > include/linux/hugetlb.h | 22 +++++++++++++ > mm/hugetlb.c | 69 +++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 91 insertions(+) > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index 534958499ac4..6e0c36b08a0c 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -123,6 +123,9 @@ struct hugetlb_vma_lock { > > struct hugetlb_shared_vma_data { > struct hugetlb_vma_lock vma_lock; > +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING > + bool hgm_enabled; > +#endif > }; > > extern struct resv_map *resv_map_alloc(void); > @@ -1179,6 +1182,25 @@ static inline void hugetlb_unregister_node(struct node *node) > } > #endif /* CONFIG_HUGETLB_PAGE */ > > +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING > +bool hugetlb_hgm_enabled(struct vm_area_struct *vma); > +bool hugetlb_hgm_eligible(struct vm_area_struct *vma); > +int enable_hugetlb_hgm(struct vm_area_struct *vma); > +#else > +static inline bool hugetlb_hgm_enabled(struct vm_area_struct *vma) > +{ > + return false; > +} > +static inline bool hugetlb_hgm_eligible(struct vm_area_struct *vma) > +{ > + return false; > +} > +static inline int enable_hugetlb_hgm(struct vm_area_struct *vma) > +{ > + return -EINVAL; > +} > +#endif > + > static inline spinlock_t *huge_pte_lock(struct hstate *h, > struct mm_struct *mm, pte_t *pte) > { > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 5ae8bc8c928e..a18143add956 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -6840,6 +6840,10 @@ static bool pmd_sharing_possible(struct vm_area_struct *vma) > #ifdef CONFIG_USERFAULTFD > if (uffd_disable_huge_pmd_share(vma)) > return false; > +#endif > +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING > + if (hugetlb_hgm_enabled(vma)) > + return false; > #endif > /* > * Only shared VMAs can share PMDs. > @@ -7033,6 +7037,9 @@ static int hugetlb_vma_data_alloc(struct vm_area_struct *vma) > kref_init(&data->vma_lock.refs); > init_rwsem(&data->vma_lock.rw_sema); > data->vma_lock.vma = vma; > +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING > + data->hgm_enabled = false; > +#endif > vma->vm_private_data = data; > return 0; > } > @@ -7290,6 +7297,68 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h) > > #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ > > +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING > +bool hugetlb_hgm_eligible(struct vm_area_struct *vma) > +{ > + /* > + * All shared VMAs may have HGM. > + * > + * HGM requires using the VMA lock, which only exists for shared VMAs. > + * To make HGM work for private VMAs, we would need to use another > + * scheme to prevent collapsing/splitting from invalidating other > + * threads' page table walks. > + */ > + return vma && (vma->vm_flags & VM_MAYSHARE); > +} > +bool hugetlb_hgm_enabled(struct vm_area_struct *vma) > +{ > + struct hugetlb_shared_vma_data *data = vma->vm_private_data; > + > + if (!vma || !(vma->vm_flags & VM_MAYSHARE)) > + return false; Nit: smells like a open-coded hugetlb_hgm_eligible(). > + > + return data && data->hgm_enabled; > +} > + > +/* > + * Enable high-granularity mapping (HGM) for this VMA. Once enabled, HGM > + * cannot be turned off. > + * > + * PMDs cannot be shared in HGM VMAs. > + */ > +int enable_hugetlb_hgm(struct vm_area_struct *vma) > +{ > + int ret; > + struct hugetlb_shared_vma_data *data; > + > + if (!hugetlb_hgm_eligible(vma)) > + return -EINVAL; > + > + if (hugetlb_hgm_enabled(vma)) > + return 0; > + > + /* > + * We must hold the mmap lock for writing so that callers can rely on > + * hugetlb_hgm_enabled returning a consistent result while holding > + * the mmap lock for reading. > + */ > + mmap_assert_write_locked(vma->vm_mm); > + > + /* HugeTLB HGM requires the VMA lock to synchronize collapsing. */ > + ret = hugetlb_vma_data_alloc(vma); > + if (ret) > + return ret; > + > + data = vma->vm_private_data; > + BUG_ON(!data); Let's avoid BUG_ON() as afaiu it's mostly not welcomed unless extremely necessary. In this case it'll crash immediately in next dereference anyway with the whole stack dumped, so we won't miss anything important. :) > + data->hgm_enabled = true; > + > + /* We don't support PMD sharing with HGM. */ > + hugetlb_unshare_all_pmds(vma); > + return 0; > +} > +#endif /* CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING */ > + > /* > * These functions are overwritable if your architecture needs its own > * behavior. > -- > 2.38.0.135.g90850a2211-goog > -- Peter Xu