[PATCH v4] mm: Fix checking unmapped holes for mbind

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Li Xinhai <lixinhai.lxh@xxxxxxxxx>

mbind() is required to report EFAULT if range, specified by addr and len,
contains unmapped holes. In current implementation, below rules are applied
for this checking:
1 Unmapped holes at any part of the specified range should be reported as
  EFAULT if mbind() for none MPOL_DEFAULT cases;
2 Unmapped holes at any part of the specified range should be ignored (do
  not reprot EFAULT) if mbind() for MPOL_DEFAULT case;
3 The whole range in an unmapped hole should be reported as EFAULT;
Note that rule 2 does not fullfill the mbind() API definition, but since
that behavior has existed for long days (refer the usage of internal flag
MPOL_MF_DISCONTIG_OK), this patch does not plan to change it.

Cases do not follow those rules and been fixed by this patch are:
case_1: unmapped hole at tail side of the sepcified range when mbind() for
        non MPOL_DEFAULT cases, EFAULT is not reported (conflicts rule 1).
        [  hole  ][  vma  ][  hole  ]
                       [  range  ]
case_2: unmapped hole at head side of the specified range when mbind() for
        MPOL_DEFAULT case, EFAULT is reported (conflicts rule 2).
        [  hole  ][  vma  ][  hole  ]
             [  range  ]

Fixes: 9d8cebd4bcd7 ("mm: fix mbind vma merge problem")
Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()")
Fixes: 48684a65b4e3 ("mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)")
Signed-off-by: Li Xinhai <lixinhai.lxh@xxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Naoya Horiguchi<n-horiguchi@xxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: linux-man <linux-man@xxxxxxxxxxxxxxx>
---
Changes v3->v4:
  - fix my email address;

Changes v2->v3:
  - Add more details in change log;
  - Check holes in .test_walk() and after call walk_page_range();


 mm/mempolicy.c | 52 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4ae967b..b2e10bf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -410,7 +410,9 @@ struct queue_pages {
 	struct list_head *pagelist;
 	unsigned long flags;
 	nodemask_t *nmask;
-	struct vm_area_struct *prev;
+	unsigned long start;
+	struct vm_area_struct *first;
+	struct vm_area_struct *last;
 };
 
 /*
@@ -618,6 +620,21 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 	unsigned long endvma = vma->vm_end;
 	unsigned long flags = qp->flags;
 
+	/* range check first */
+	VM_BUG_ON((vma->vm_start > start) || (vma->vm_end < end));
+
+	if (!qp->first) {
+		qp->first = vma;
+		if (!(flags & MPOL_MF_DISCONTIG_OK) &&
+			(qp->start < vma->vm_start))
+			/* hole at head side of range */
+			return -EFAULT;
+	} else if (!(flags & MPOL_MF_DISCONTIG_OK) &&
+		(vma->vm_prev->vm_end < vma->vm_start))
+		/* hole at middle of range */
+		return -EFAULT;
+	qp->last = vma;
+
 	/*
 	 * Need check MPOL_MF_STRICT to return -EIO if possible
 	 * regardless of vma_migratable
@@ -628,17 +645,6 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 
 	if (endvma > end)
 		endvma = end;
-	if (vma->vm_start > start)
-		start = vma->vm_start;
-
-	if (!(flags & MPOL_MF_DISCONTIG_OK)) {
-		if (!vma->vm_next && vma->vm_end < end)
-			return -EFAULT;
-		if (qp->prev && qp->prev->vm_end < vma->vm_start)
-			return -EFAULT;
-	}
-
-	qp->prev = vma;
 
 	if (flags & MPOL_MF_LAZY) {
 		/* Similar to task_numa_work, skip inaccessible VMAs */
@@ -679,14 +685,29 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 		nodemask_t *nodes, unsigned long flags,
 		struct list_head *pagelist)
 {
+	int err;
 	struct queue_pages qp = {
 		.pagelist = pagelist,
 		.flags = flags,
 		.nmask = nodes,
-		.prev = NULL,
+		.start = start,
+		.first = NULL,
+		.last = NULL,
 	};
 
-	return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+	err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+
+	if (err != -EFAULT) {
+		if (!qp.first)
+			/* whole range in hole */
+			err = -EFAULT;
+		else if (!(flags & MPOL_MF_DISCONTIG_OK) &&
+			(qp.last->vm_end < end))
+			/* hole at tail side of range */
+			err = -EFAULT;
+	}
+
+	return err;
 }
 
 /*
@@ -738,8 +759,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 	unsigned long vmend;
 
 	vma = find_vma(mm, start);
-	if (!vma || vma->vm_start > start)
-		return -EFAULT;
+	VM_BUG_ON(!vma);
 
 	prev = vma->vm_prev;
 	if (start > vma->vm_start)
-- 
1.8.3.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux