On Wed, Jul 12, 2023 at 12:02 AM Yin Fengwei <fengwei.yin@xxxxxxxxx> wrote: > > If large folio is in the range of VM_LOCKED VMA, it should be > mlocked to avoid being picked by page reclaim. Which may split > the large folio and then mlock each pages again. > > Mlock this kind of large folio to prevent them being picked by > page reclaim. > > For the large folio which cross the boundary of VM_LOCKED VMA, > we'd better not to mlock it. So if the system is under memory > pressure, this kind of large folio will be split and the pages > ouf of VM_LOCKED VMA can be reclaimed. > > Signed-off-by: Yin Fengwei <fengwei.yin@xxxxxxxxx> > --- > mm/internal.h | 11 ++++++++--- > mm/rmap.c | 34 +++++++++++++++++++++++++++------- > 2 files changed, 35 insertions(+), 10 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index c7dd15d8de3ef..776141de2797a 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -643,7 +643,8 @@ static inline void mlock_vma_folio(struct folio *folio, > * still be set while VM_SPECIAL bits are added: so ignore it then. > */ > if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) && > - (compound || !folio_test_large(folio))) > + (compound || !folio_test_large(folio) || > + folio_in_range(folio, vma, vma->vm_start, vma->vm_end))) > mlock_folio(folio); > } This can be simplified: 1. remove the compound parameter 2. make the if if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) && folio_within_vma()) mlock_folio(folio); > @@ -651,8 +652,12 @@ void munlock_folio(struct folio *folio); > static inline void munlock_vma_folio(struct folio *folio, > struct vm_area_struct *vma, bool compound) Remove the compound parameter here too. > { > - if (unlikely(vma->vm_flags & VM_LOCKED) && > - (compound || !folio_test_large(folio))) > + /* > + * To handle the case that a mlocked large folio is unmapped from VMA > + * piece by piece, allow munlock the large folio which is partially > + * mapped to VMA. > + */ > + if (unlikely(vma->vm_flags & VM_LOCKED)) > munlock_folio(folio); > } > > diff --git a/mm/rmap.c b/mm/rmap.c > index 2668f5ea35342..455f415d8d9ca 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -803,6 +803,14 @@ struct folio_referenced_arg { > unsigned long vm_flags; > struct mem_cgroup *memcg; > }; > + > +static inline bool should_restore_mlock(struct folio *folio, > + struct vm_area_struct *vma, bool pmd_mapped) > +{ > + return !folio_test_large(folio) || > + pmd_mapped || folio_within_vma(folio, vma); > +} This is just folio_within_vma() :) > /* > * arg: folio_referenced_arg will be passed > */ > @@ -816,13 +824,25 @@ static bool folio_referenced_one(struct folio *folio, > while (page_vma_mapped_walk(&pvmw)) { > address = pvmw.address; > > - if ((vma->vm_flags & VM_LOCKED) && > - (!folio_test_large(folio) || !pvmw.pte)) { > - /* Restore the mlock which got missed */ > - mlock_vma_folio(folio, vma, !pvmw.pte); > - page_vma_mapped_walk_done(&pvmw); > - pra->vm_flags |= VM_LOCKED; > - return false; /* To break the loop */ > + if (vma->vm_flags & VM_LOCKED) { > + if (should_restore_mlock(folio, vma, !pvmw.pte)) { > + /* Restore the mlock which got missed */ > + mlock_vma_folio(folio, vma, !pvmw.pte); > + page_vma_mapped_walk_done(&pvmw); > + pra->vm_flags |= VM_LOCKED; > + return false; /* To break the loop */ > + } else { There is no need for "else", or just if (!folio_within_vma()) goto dec_pra_mapcount; > + /* > + * For large folio cross VMA boundaries, it's > + * expected to be picked by page reclaim. But > + * should skip reference of pages which are in > + * the range of VM_LOCKED vma. As page reclaim > + * should just count the reference of pages out > + * the range of VM_LOCKED vma. > + */ > + pra->mapcount--; > + continue; > + } > }