On 10/11/21 6:17 PM, Mina Almasry wrote: > Support mremap() for hugepage backed vma segment by simply repositioning > page table entries. The page table entries are repositioned to the new > virtual address on mremap(). > > Hugetlb mremap() support is of course generic; my motivating use case > is a library (hugepage_text), which reloads the ELF text of executables > in hugepages. This significantly increases the execution performance of > said executables. > > Restricts the mremap operation on hugepages to up to the size of the > original mapping as the underlying hugetlb reservation is not yet > capable of handling remapping to a larger size. > > During the mremap() operation we detect pmd_share'd mappings and we > unshare those during the mremap(). On access and fault the sharing is > established again. > > Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx> Thanks! Just some minor nits below. If you agree with the suggestions and make the changes, you can add: Reviewed-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index ebaba02706c87..c6b70f1ede6bf 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -124,6 +124,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, > void hugepage_put_subpool(struct hugepage_subpool *spool); > > void reset_vma_resv_huge_pages(struct vm_area_struct *vma); > +void clear_vma_resv_huge_pages(struct vm_area_struct *vma); > int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); > int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, > loff_t *); > @@ -132,6 +133,10 @@ int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, > int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, > loff_t *); > > +int move_hugetlb_page_tables(struct vm_area_struct *vma, > + struct vm_area_struct *new_vma, > + unsigned long old_addr, unsigned long new_addr, > + unsigned long len); > int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); > long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, > struct page **, struct vm_area_struct **, > @@ -215,6 +220,10 @@ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) > { > } > > +static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) > +{ > +} > + > static inline unsigned long hugetlb_total_pages(void) > { > return 0; > @@ -262,6 +271,12 @@ static inline int copy_hugetlb_page_range(struct mm_struct *dst, > return 0; > } > > +#define move_hugetlb_page_tables(vma, new_vma, old_addr, new_addr, len) \ > + ({ \ > + BUG(); \ > + 0; \ > + }) > + Any reason why you did not make this a static inline? Trying to save code in the !CONFIG_HUGETLB case? macros seem to end up causing more issues down the line. I would suggest making this a static inline unless there is a good reason to keep as a macro. > static inline void hugetlb_report_meminfo(struct seq_file *m) > { > } > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 6d2f4c25dd9fb..6e91cd3905e73 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c ... > +int move_hugetlb_page_tables(struct vm_area_struct *vma, > + struct vm_area_struct *new_vma, > + unsigned long old_addr, unsigned long new_addr, > + unsigned long len) > +{ > + struct hstate *h = hstate_vma(vma); > + struct address_space *mapping = vma->vm_file->f_mapping; > + unsigned long sz = huge_page_size(h); > + struct mm_struct *mm = vma->vm_mm; > + unsigned long old_end = old_addr + len; > + unsigned long old_addr_copy; > + pte_t *src_pte, *dst_pte; > + struct mmu_notifier_range range; > + > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, old_addr, > + old_end); > + adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); > + mmu_notifier_invalidate_range_start(&range); > + /* Prevent race with file truncation */ > + i_mmap_lock_write(mapping); > + for (; old_addr < old_end; old_addr += sz, new_addr += sz) { > + src_pte = huge_pte_offset(mm, old_addr, sz); > + if (!src_pte) > + continue; > + if (huge_pte_none(huge_ptep_get(src_pte))) > + continue; > + > + /* old_addr arg to huge_pmd_unshare() is a pointer and so the > + * arg may be modified. Pass a copy instead to preserve the > + * value in old_arg. value in old_addr. > + */ > + old_addr_copy = old_addr; > + > + if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte)) > + continue; > + > + dst_pte = huge_pte_alloc(mm, new_vma, new_addr, sz); > + if (!dst_pte) > + break; > + > + move_huge_pte(vma, old_addr, new_addr, src_pte); > + } > + i_mmap_unlock_write(mapping); > + flush_tlb_range(vma, old_end - len, old_end); > + mmu_notifier_invalidate_range_end(&range); > + > + return len + old_addr - old_end; > +} > + > static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, > unsigned long start, unsigned long end, > struct page *ref_page) > @@ -6280,7 +6385,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, > return saddr; > } > > -static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) > +static bool hugetlb_vma_shareable(struct vm_area_struct *vma, > + unsigned long addr) > { > unsigned long base = addr & PUD_MASK; > unsigned long end = base + PUD_SIZE; > @@ -6299,7 +6405,7 @@ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr) > if (uffd_disable_huge_pmd_share(vma)) > return false; > #endif > - return vma_shareable(vma, addr); > + return hugetlb_vma_shareable(vma, addr); > } > > /* In an earlier version of the patch, vma_shareable was renamed hugetlb_vma_shareable because it was going to be used outside hugetlb.c. Therefore the hugetlb_* name would provide some context. That is no longer the case. So, there really is no need to change the name. In fact, none of the remap code even calls this routine. Sugggest you drop the name change. -- Mike Kravetz