On Wed, Mar 20, 2019 at 02:35:56AM +0800, Yang Shi wrote: > When MPOL_MF_STRICT was specified and an existing page was already > on a node that does not follow the policy, mbind() should return -EIO. > But commit 6f4576e3687b ("mempolicy: apply page table walker on > queue_pages_range()") broke the rule. > > And, commit c8633798497c ("mm: mempolicy: mbind and migrate_pages > support thp migration") didn't return the correct value for THP mbind() > too. > > If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches > queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing > page was already on a node that does not follow the policy. And, > non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or > MPOL_MF_MOVE_ALL was specified. > > Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c > > Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") > Reported-by: Cyril Hrubis <chrubis@xxxxxxx> > Cc: Vlastimil Babka <vbabka@xxxxxxx> > Cc: stable@xxxxxxxxxxxxxxx > Suggested-by: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx> > Signed-off-by: Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx> > Signed-off-by: Oscar Salvador <osalvador@xxxxxxx> > --- > mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++------- > 1 file changed, 33 insertions(+), 7 deletions(-) > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index abe7a67..401c817 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -447,6 +447,13 @@ static inline bool queue_pages_required(struct page *page, > return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); > } > > +/* > + * The queue_pages_pmd() may have three kind of return value. > + * 1 - pages are placed on he right node or queued successfully. > + * 0 - THP get split. > + * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing > + * page was already on a node that does not follow the policy. > + */ > static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, > unsigned long end, struct mm_walk *walk) > { > @@ -456,7 +463,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, > unsigned long flags; > > if (unlikely(is_pmd_migration_entry(*pmd))) { > - ret = 1; > + ret = -EIO; > goto unlock; > } > page = pmd_page(*pmd); > @@ -473,8 +480,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, > ret = 1; > flags = qp->flags; > /* go to thp migration */ > - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) > + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { > + if (!vma_migratable(walk->vma)) { > + ret = -EIO; > + goto unlock; > + } > + > migrate_page_add(page, qp->pagelist, flags); > + } else > + ret = -EIO; > unlock: > spin_unlock(ptl); > out: > @@ -499,8 +513,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, > ptl = pmd_trans_huge_lock(pmd, vma); > if (ptl) { > ret = queue_pages_pmd(pmd, ptl, addr, end, walk); > - if (ret) > + if (ret > 0) > return 0; > + else if (ret < 0) > + return ret; > } > > if (pmd_trans_unstable(pmd)) > @@ -521,11 +537,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, > continue; > if (!queue_pages_required(page, qp)) > continue; > - migrate_page_add(page, qp->pagelist, flags); > + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { > + if (!vma_migratable(vma)) > + break; > + migrate_page_add(page, qp->pagelist, flags); > + } else > + break; > } > pte_unmap_unlock(pte - 1, ptl); > cond_resched(); > - return 0; > + return addr != end ? -EIO : 0; > } > > static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, > @@ -595,7 +616,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, > unsigned long endvma = vma->vm_end; > unsigned long flags = qp->flags; > > - if (!vma_migratable(vma)) > + /* > + * Need check MPOL_MF_STRICT to return -EIO if possible > + * regardless of vma_migratable > + */ > + if (!vma_migratable(vma) && > + !(flags & MPOL_MF_STRICT)) > return 1; > > if (endvma > end) > @@ -622,7 +648,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, > } > > /* queue pages from current vma */ > - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) > + if (flags & MPOL_MF_VALID) > return 0; > return 1; > } > -- > 1.8.3.1 > Acked-by: Rafael Aquini <aquini@xxxxxxxxxx>