Re: [PATCH] mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Mar 20, 2019 at 12:06 AM Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx> wrote:
>
> When MPOL_MF_STRICT was specified and an existing page was already
> on a node that does not follow the policy, mbind() should return -EIO.
> But commit 6f4576e3687b ("mempolicy: apply page table walker on
> queue_pages_range()") broke the rule.
>
> And, commit c8633798497c ("mm: mempolicy: mbind and migrate_pages
> support thp migration") didn't return the correct value for THP mbind()
> too.
>
> If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches
> queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing
> page was already on a node that does not follow the policy.  And,
> non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or
> MPOL_MF_MOVE_ALL was specified.
>
> Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c
>
> Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()")
> Reported-by: Cyril Hrubis <chrubis@xxxxxxx>
> Cc: Vlastimil Babka <vbabka@xxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Suggested-by: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx>
> Signed-off-by: Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx>
> Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
> ---
>  mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 33 insertions(+), 7 deletions(-)
>
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index abe7a67..401c817 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -447,6 +447,13 @@ static inline bool queue_pages_required(struct page *page,
>         return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
>  }
>
> +/*
> + * The queue_pages_pmd() may have three kind of return value.
> + * 1 - pages are placed on he right node or queued successfully.

Minor typo -> s/he/the ?

> + * 0 - THP get split.
> + * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
> + *        page was already on a node that does not follow the policy.
> + */
>  static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
>                                 unsigned long end, struct mm_walk *walk)
>  {
> @@ -456,7 +463,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
>         unsigned long flags;
>
>         if (unlikely(is_pmd_migration_entry(*pmd))) {
> -               ret = 1;
> +               ret = -EIO;
>                 goto unlock;
>         }
>         page = pmd_page(*pmd);
> @@ -473,8 +480,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
>         ret = 1;
>         flags = qp->flags;
>         /* go to thp migration */
> -       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
> +       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
> +               if (!vma_migratable(walk->vma)) {
> +                       ret = -EIO;
> +                       goto unlock;
> +               }
> +
>                 migrate_page_add(page, qp->pagelist, flags);
> +       } else
> +               ret = -EIO;
>  unlock:
>         spin_unlock(ptl);
>  out:
> @@ -499,8 +513,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
>         ptl = pmd_trans_huge_lock(pmd, vma);
>         if (ptl) {
>                 ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
> -               if (ret)
> +               if (ret > 0)
>                         return 0;
> +               else if (ret < 0)
> +                       return ret;
>         }
>
>         if (pmd_trans_unstable(pmd))
> @@ -521,11 +537,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
>                         continue;
>                 if (!queue_pages_required(page, qp))
>                         continue;
> -               migrate_page_add(page, qp->pagelist, flags);
> +               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
> +                       if (!vma_migratable(vma))
> +                               break;
> +                       migrate_page_add(page, qp->pagelist, flags);
> +               } else
> +                       break;
>         }
>         pte_unmap_unlock(pte - 1, ptl);
>         cond_resched();
> -       return 0;
> +       return addr != end ? -EIO : 0;
>  }
>
>  static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
> @@ -595,7 +616,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
>         unsigned long endvma = vma->vm_end;
>         unsigned long flags = qp->flags;
>
> -       if (!vma_migratable(vma))
> +       /*
> +        * Need check MPOL_MF_STRICT to return -EIO if possible
> +        * regardless of vma_migratable
> +        */
> +       if (!vma_migratable(vma) &&
> +           !(flags & MPOL_MF_STRICT))
>                 return 1;
>
>         if (endvma > end)
> @@ -622,7 +648,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
>         }
>
>         /* queue pages from current vma */
> -       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
> +       if (flags & MPOL_MF_VALID)
>                 return 0;
>         return 1;
>  }
> --
> 1.8.3.1
>



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux