The patch titled Subject: hugetlb: really allocate vma lock for all sharable vmas has been added to the -mm mm-hotfixes-unstable branch. Its filename is hugetlb-really-allocate-vma-lock-for-all-sharable-vmas.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/hugetlb-really-allocate-vma-lock-for-all-sharable-vmas.patch This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Subject: hugetlb: really allocate vma lock for all sharable vmas Date: Mon, 12 Dec 2022 15:50:41 -0800 Commit bbff39cc6cbc ("hugetlb: allocate vma lock for all sharable vmas") removed the pmd sharable checks in the vma lock helper routines. However, it left the functional version of helper routines behind #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE. Therefore, the vma lock is not being used for sharable vmas on architectures that do not support pmd sharing. On these architectures, a potential fault/truncation race is exposed that could leave pages in a hugetlb file past i_size until the file is removed. Move the functional vma lock helpers outside the ifdef, and remove the non-functional stubs. Since the vma lock is not just for pmd sharing, rename the routine __vma_shareable_flags_pmd. Link: https://lkml.kernel.org/r/20221212235042.178355-1-mike.kravetz@xxxxxxxxxx Fixes: bbff39cc6cbc ("hugetlb: allocate vma lock for all sharable vmas") Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: "Aneesh Kumar K.V" <aneesh.kumar@xxxxxxxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: James Houghton <jthoughton@xxxxxxxxxx> Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Mina Almasry <almasrymina@xxxxxxxxxx> Cc: Muchun Song <songmuchun@xxxxxxxxxxxxx> Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: <stable@xxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/hugetlb.c | 333 +++++++++++++++++++++---------------------------- 1 file changed, 148 insertions(+), 185 deletions(-) --- a/mm/hugetlb.c~hugetlb-really-allocate-vma-lock-for-all-sharable-vmas +++ a/mm/hugetlb.c @@ -255,6 +255,152 @@ static inline struct hugepage_subpool *s return subpool_inode(file_inode(vma->vm_file)); } +/* + * hugetlb vma_lock helper routines + */ +static bool __vma_shareable_lock(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) && + vma->vm_private_data; +} + +void hugetlb_vma_lock_read(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_read(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_unlock_read(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + up_read(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_lock_write(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_write(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_unlock_write(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + up_write(&vma_lock->rw_sema); + } +} + +int hugetlb_vma_trylock_write(struct vm_area_struct *vma) +{ + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + if (!__vma_shareable_lock(vma)) + return 1; + + return down_write_trylock(&vma_lock->rw_sema); +} + +void hugetlb_vma_assert_locked(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + lockdep_assert_held(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_lock_release(struct kref *kref) +{ + struct hugetlb_vma_lock *vma_lock = container_of(kref, + struct hugetlb_vma_lock, refs); + + kfree(vma_lock); +} + +static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) +{ + struct vm_area_struct *vma = vma_lock->vma; + + /* + * vma_lock structure may or not be released as a result of put, + * it certainly will no longer be attached to vma so clear pointer. + * Semaphore synchronizes access to vma_lock->vma field. + */ + vma_lock->vma = NULL; + vma->vm_private_data = NULL; + up_write(&vma_lock->rw_sema); + kref_put(&vma_lock->refs, hugetlb_vma_lock_release); +} + +static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + __hugetlb_vma_unlock_write_put(vma_lock); + } +} + +static void hugetlb_vma_lock_free(struct vm_area_struct *vma) +{ + /* + * Only present in sharable vmas. + */ + if (!vma || !__vma_shareable_lock(vma)) + return; + + if (vma->vm_private_data) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_write(&vma_lock->rw_sema); + __hugetlb_vma_unlock_write_put(vma_lock); + } +} + +static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +{ + struct hugetlb_vma_lock *vma_lock; + + /* Only establish in (flags) sharable vmas */ + if (!vma || !(vma->vm_flags & VM_MAYSHARE)) + return; + + /* Should never get here with non-NULL vm_private_data */ + if (vma->vm_private_data) + return; + + vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); + if (!vma_lock) { + /* + * If we can not allocate structure, then vma can not + * participate in pmd sharing. This is only a possible + * performance enhancement and memory saving issue. + * However, the lock is also used to synchronize page + * faults with truncation. If the lock is not present, + * unlikely races could leave pages in a file past i_size + * until the file is removed. Warn in the unlikely case of + * allocation failure. + */ + pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); + return; + } + + kref_init(&vma_lock->refs); + init_rwsem(&vma_lock->rw_sema); + vma_lock->vma = vma; + vma->vm_private_data = vma_lock; +} + /* Helper that removes a struct file_region from the resv_map cache and returns * it for use. */ @@ -6616,7 +6762,8 @@ bool hugetlb_reserve_pages(struct inode } /* - * vma specific semaphore used for pmd sharing synchronization + * vma specific semaphore used for pmd sharing and fault/truncation + * synchronization */ hugetlb_vma_lock_alloc(vma); @@ -6872,149 +7019,6 @@ void adjust_range_if_pmd_sharing_possibl *end = ALIGN(*end, PUD_SIZE); } -static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma) -{ - return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) && - vma->vm_private_data; -} - -void hugetlb_vma_lock_read(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_read(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_unlock_read(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - up_read(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_lock_write(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_write(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_unlock_write(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - up_write(&vma_lock->rw_sema); - } -} - -int hugetlb_vma_trylock_write(struct vm_area_struct *vma) -{ - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - if (!__vma_shareable_flags_pmd(vma)) - return 1; - - return down_write_trylock(&vma_lock->rw_sema); -} - -void hugetlb_vma_assert_locked(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - lockdep_assert_held(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_lock_release(struct kref *kref) -{ - struct hugetlb_vma_lock *vma_lock = container_of(kref, - struct hugetlb_vma_lock, refs); - - kfree(vma_lock); -} - -static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) -{ - struct vm_area_struct *vma = vma_lock->vma; - - /* - * vma_lock structure may or not be released as a result of put, - * it certainly will no longer be attached to vma so clear pointer. - * Semaphore synchronizes access to vma_lock->vma field. - */ - vma_lock->vma = NULL; - vma->vm_private_data = NULL; - up_write(&vma_lock->rw_sema); - kref_put(&vma_lock->refs, hugetlb_vma_lock_release); -} - -static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - __hugetlb_vma_unlock_write_put(vma_lock); - } -} - -static void hugetlb_vma_lock_free(struct vm_area_struct *vma) -{ - /* - * Only present in sharable vmas. - */ - if (!vma || !__vma_shareable_flags_pmd(vma)) - return; - - if (vma->vm_private_data) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_write(&vma_lock->rw_sema); - __hugetlb_vma_unlock_write_put(vma_lock); - } -} - -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) -{ - struct hugetlb_vma_lock *vma_lock; - - /* Only establish in (flags) sharable vmas */ - if (!vma || !(vma->vm_flags & VM_MAYSHARE)) - return; - - /* Should never get here with non-NULL vm_private_data */ - if (vma->vm_private_data) - return; - - vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); - if (!vma_lock) { - /* - * If we can not allocate structure, then vma can not - * participate in pmd sharing. This is only a possible - * performance enhancement and memory saving issue. - * However, the lock is also used to synchronize page - * faults with truncation. If the lock is not present, - * unlikely races could leave pages in a file past i_size - * until the file is removed. Warn in the unlikely case of - * allocation failure. - */ - pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); - return; - } - - kref_init(&vma_lock->refs); - init_rwsem(&vma_lock->rw_sema); - vma_lock->vma = vma; - vma->vm_private_data = vma_lock; -} - /* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the @@ -7103,47 +7107,6 @@ int huge_pmd_unshare(struct mm_struct *m #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ -void hugetlb_vma_lock_read(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_unlock_read(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_lock_write(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_unlock_write(struct vm_area_struct *vma) -{ -} - -int hugetlb_vma_trylock_write(struct vm_area_struct *vma) -{ - return 1; -} - -void hugetlb_vma_assert_locked(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_lock_release(struct kref *kref) -{ -} - -static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) -{ -} - -static void hugetlb_vma_lock_free(struct vm_area_struct *vma) -{ -} - -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) -{ -} - pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { _ Patches currently in -mm which might be from mike.kravetz@xxxxxxxxxx are hugetlb-really-allocate-vma-lock-for-all-sharable-vmas.patch