queue_pages_range() will check for unmapped holes besides queue pages for migration. The rules for checking unmapped holes are: 1 Unmapped holes at any part of the specified range should be reported as EFAULT if mbind() for none MPOL_DEFAULT cases; 2 Unmapped holes at any part of the specified range should be ignored if mbind() for MPOL_DEFAULT case; Note that the second rule is the current implementation, but it seems conflicts the Linux API definition. queue_pages_test_walk() is fixed by introduce new fields in struct queue_pages which help to check: 1 holes at head and tail side of specified range; 2 the whole range is in a hole; Besides, queue_pages_test_walk() must update previous vma record no matter the current vma should be considered for queue pages or not. Fixes: 9d8cebd4bcd7 ("mm: fix mbind vma merge problem") Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") Fixes: 48684a65b4e3 ("mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)") Signed-off-by: Li Xinhai <lixinhai.li@xxxxxxxxx> --- Changes in v2: - Fix the unmapped holes checking in queue_pages_test_walk() instead of mbind_rnage(). mm/mempolicy.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4ae967bcf954..24087dfa4dcd 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -411,6 +411,9 @@ struct queue_pages { unsigned long flags; nodemask_t *nmask; struct vm_area_struct *prev; + unsigned long start; + unsigned long end; + int in_hole; }; /* @@ -618,28 +621,31 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; - /* - * Need check MPOL_MF_STRICT to return -EIO if possible - * regardless of vma_migratable - */ - if (!vma_migratable(vma) && - !(flags & MPOL_MF_STRICT)) - return 1; - + /* range check first */ if (endvma > end) endvma = end; - if (vma->vm_start > start) - start = vma->vm_start; + BUG_ON((vma->vm_start > start) || (vma->vm_end < end)); + qp->in_hole = 0; if (!(flags & MPOL_MF_DISCONTIG_OK)) { - if (!vma->vm_next && vma->vm_end < end) + if ((!vma->vm_next && vma->vm_end < qp->end) || + (vma->vm_next && qp->end < vma->vm_next->vm_start)) return -EFAULT; - if (qp->prev && qp->prev->vm_end < vma->vm_start) + if ((qp->prev && qp->prev->vm_end < vma->vm_start) || + (!qp->prev && qp->start < vma->vm_start)) return -EFAULT; } qp->prev = vma; + /* + * Need check MPOL_MF_STRICT to return -EIO if possible + * regardless of vma_migratable + */ + if (!vma_migratable(vma) && + !(flags & MPOL_MF_STRICT)) + return 1; + if (flags & MPOL_MF_LAZY) { /* Similar to task_numa_work, skip inaccessible VMAs */ if (!is_vm_hugetlb_page(vma) && @@ -679,14 +685,23 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, nodemask_t *nodes, unsigned long flags, struct list_head *pagelist) { + int err; struct queue_pages qp = { .pagelist = pagelist, .flags = flags, .nmask = nodes, .prev = NULL, + .start = start, + .end = end, + .in_hole = 1, }; - return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp); + err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp); + /* whole range in unmapped hole */ + if (qp->in_hole && !(flags & MPOL_MF_DISCONTIG_OK)) + err = -EFAULT; + + return err; } /* @@ -738,8 +753,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long vmend; vma = find_vma(mm, start); - if (!vma || vma->vm_start > start) - return -EFAULT; + BUG_ON(!vma); prev = vma->vm_prev; if (start > vma->vm_start) -- 2.22.0