* Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> [241101 14:46]: > Ever since commit 8d7071af8907 ("mm: always expand the stack with the mmap > write lock held") we have been expanding the stack with the mmap write lock > held. > > This is true in all code paths: > > get_arg_page() > -> expand_downwards() > setup_arg_pages() > -> expand_stack_locked() > -> expand_downwards() / expand_upwards() > lock_mm_and_find_vma() > -> expand_stack_locked() > -> expand_downwards() / expand_upwards() > create_elf_tables() > -> find_extend_vma_locked() > -> expand_stack_locked() > expand_stack() > -> vma_expand_down() > -> expand_downwards() > expand_stack() > -> vma_expand_up() > -> expand_upwards() > > Each of which acquire the mmap write lock before doing so. Despite this, we > maintain code that acquires a page table lock in the expand_upwards() and > expand_downwards() code, stating that we hold a shared mmap lock and thus > this is necessary. > > It is not, we do not have to worry about concurrent VMA expansions so we > can simply drop this, and update comments accordingly. > > We do not even need be concerned with racing page faults, as > vma_start_write() is invoked in both cases. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> > --- > mm/mmap.c | 38 ++++++-------------------------------- > 1 file changed, 6 insertions(+), 32 deletions(-) > > diff --git a/mm/mmap.c b/mm/mmap.c > index f904b3bba962..386429f7db5a 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1039,6 +1039,8 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) > if (!(vma->vm_flags & VM_GROWSUP)) > return -EFAULT; > > + mmap_assert_write_locked(mm); > + > /* Guard against exceeding limits of the address space. */ > address &= PAGE_MASK; > if (address >= (TASK_SIZE & PAGE_MASK)) > @@ -1074,11 +1076,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) > > /* Lock the VMA before expanding to prevent concurrent page faults */ > vma_start_write(vma); > - /* > - * vma->vm_start/vm_end cannot change under us because the caller > - * is required to hold the mmap_lock in read mode. We need the > - * anon_vma lock to serialize against concurrent expand_stacks. > - */ > + /* We update the anon VMA tree. */ > anon_vma_lock_write(vma->anon_vma); > > /* Somebody else might have raced and expanded it already */ > @@ -1092,16 +1090,6 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) > if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { > error = acct_stack_growth(vma, size, grow); > if (!error) { > - /* > - * We only hold a shared mmap_lock lock here, so > - * we need to protect against concurrent vma > - * expansions. anon_vma_lock_write() doesn't > - * help here, as we don't guarantee that all > - * growable vmas in a mm share the same root > - * anon vma. So, we reuse mm->page_table_lock > - * to guard against concurrent vma expansions. > - */ > - spin_lock(&mm->page_table_lock); > if (vma->vm_flags & VM_LOCKED) > mm->locked_vm += grow; > vm_stat_account(mm, vma->vm_flags, grow); > @@ -1110,7 +1098,6 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) > /* Overwrite old entry in mtree. */ > vma_iter_store(&vmi, vma); > anon_vma_interval_tree_post_update_vma(vma); > - spin_unlock(&mm->page_table_lock); > > perf_event_mmap(vma); > } > @@ -1137,6 +1124,8 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) > if (!(vma->vm_flags & VM_GROWSDOWN)) > return -EFAULT; > > + mmap_assert_write_locked(mm); > + > address &= PAGE_MASK; > if (address < mmap_min_addr || address < FIRST_USER_ADDRESS) > return -EPERM; > @@ -1166,11 +1155,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) > > /* Lock the VMA before expanding to prevent concurrent page faults */ > vma_start_write(vma); > - /* > - * vma->vm_start/vm_end cannot change under us because the caller > - * is required to hold the mmap_lock in read mode. We need the > - * anon_vma lock to serialize against concurrent expand_stacks. > - */ > + /* We update the anon VMA tree. */ > anon_vma_lock_write(vma->anon_vma); > > /* Somebody else might have raced and expanded it already */ > @@ -1184,16 +1169,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) > if (grow <= vma->vm_pgoff) { > error = acct_stack_growth(vma, size, grow); > if (!error) { > - /* > - * We only hold a shared mmap_lock lock here, so > - * we need to protect against concurrent vma > - * expansions. anon_vma_lock_write() doesn't > - * help here, as we don't guarantee that all > - * growable vmas in a mm share the same root > - * anon vma. So, we reuse mm->page_table_lock > - * to guard against concurrent vma expansions. > - */ > - spin_lock(&mm->page_table_lock); > if (vma->vm_flags & VM_LOCKED) > mm->locked_vm += grow; > vm_stat_account(mm, vma->vm_flags, grow); > @@ -1203,7 +1178,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) > /* Overwrite old entry in mtree. */ > vma_iter_store(&vmi, vma); > anon_vma_interval_tree_post_update_vma(vma); > - spin_unlock(&mm->page_table_lock); > > perf_event_mmap(vma); > } > -- > 2.47.0 >