On 04/23/2015 11:03 PM, Kirill A. Shutemov wrote: > With new refcounting THP can belong to several VMAs. This makes tricky > to track THP pages, when they partially mlocked. It can lead to leaking > mlocked pages to non-VM_LOCKED vmas and other problems. > > With this patch we will split all pages on mlock and avoid > fault-in/collapse new THP in VM_LOCKED vmas. > > I've tried alternative approach: do not mark THP pages mlocked and keep > them on normal LRUs. This way vmscan could try to split huge pages on > memory pressure and free up subpages which doesn't belong to VM_LOCKED > vmas. But this is user-visible change: we screw up Mlocked accouting > reported in meminfo, so I had to leave this approach aside. > > We can bring something better later, but this should be good enough for > now. > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > Tested-by: Sasha Levin <sasha.levin@xxxxxxxxxx> Acked-by: Jerome Marchand <jmarchan@xxxxxxxxxx> > --- > mm/gup.c | 2 ++ > mm/huge_memory.c | 5 ++++- > mm/memory.c | 3 ++- > mm/mlock.c | 51 +++++++++++++++++++-------------------------------- > 4 files changed, 27 insertions(+), 34 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index eaeeae15006b..7334eb24f414 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -882,6 +882,8 @@ long populate_vma_page_range(struct vm_area_struct *vma, > VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); > > gup_flags = FOLL_TOUCH | FOLL_POPULATE; > + if (vma->vm_flags & VM_LOCKED) > + gup_flags |= FOLL_SPLIT; > /* > * We want to touch writable mappings with a write fault in order > * to break COW, except for shared mappings because these don't COW > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index fd8af5b9917f..fa3d4f78b716 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -796,6 +796,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, > > if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) > return VM_FAULT_FALLBACK; > + if (vma->vm_flags & VM_LOCKED) > + return VM_FAULT_FALLBACK; > if (unlikely(anon_vma_prepare(vma))) > return VM_FAULT_OOM; > if (unlikely(khugepaged_enter(vma, vma->vm_flags))) > @@ -2467,7 +2469,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) > if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) || > (vma->vm_flags & VM_NOHUGEPAGE)) > return false; > - > + if (vma->vm_flags & VM_LOCKED) > + return false; > if (!vma->anon_vma || vma->vm_ops) > return false; > if (is_vma_temporary_stack(vma)) > diff --git a/mm/memory.c b/mm/memory.c > index 559c6651d6b6..8bbd3f88544b 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -2156,7 +2156,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, > > pte_unmap_unlock(page_table, ptl); > mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); > - if (old_page) { > + /* THP pages are never mlocked */ > + if (old_page && !PageTransCompound(old_page)) { > /* > * Don't let another task, with possibly unlocked vma, > * keep the mlocked page. > diff --git a/mm/mlock.c b/mm/mlock.c > index 6fd2cf15e868..76cde3967483 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -443,39 +443,26 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, > page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, > &page_mask); > > - if (page && !IS_ERR(page)) { > - if (PageTransHuge(page)) { > - lock_page(page); > - /* > - * Any THP page found by follow_page_mask() may > - * have gotten split before reaching > - * munlock_vma_page(), so we need to recompute > - * the page_mask here. > - */ > - page_mask = munlock_vma_page(page); > - unlock_page(page); > - put_page(page); /* follow_page_mask() */ > - } else { > - /* > - * Non-huge pages are handled in batches via > - * pagevec. The pin from follow_page_mask() > - * prevents them from collapsing by THP. > - */ > - pagevec_add(&pvec, page); > - zone = page_zone(page); > - zoneid = page_zone_id(page); > + if (page && !IS_ERR(page) && !PageTransCompound(page)) { > + /* > + * Non-huge pages are handled in batches via > + * pagevec. The pin from follow_page_mask() > + * prevents them from collapsing by THP. > + */ > + pagevec_add(&pvec, page); > + zone = page_zone(page); > + zoneid = page_zone_id(page); > > - /* > - * Try to fill the rest of pagevec using fast > - * pte walk. This will also update start to > - * the next page to process. Then munlock the > - * pagevec. > - */ > - start = __munlock_pagevec_fill(&pvec, vma, > - zoneid, start, end); > - __munlock_pagevec(&pvec, zone); > - goto next; > - } > + /* > + * Try to fill the rest of pagevec using fast > + * pte walk. This will also update start to > + * the next page to process. Then munlock the > + * pagevec. > + */ > + start = __munlock_pagevec_fill(&pvec, vma, > + zoneid, start, end); > + __munlock_pagevec(&pvec, zone); > + goto next; > } > /* It's a bug to munlock in the middle of a THP page */ > VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); >
Attachment:
signature.asc
Description: OpenPGP digital signature