[RFC][PATCH 01/26] mm, mpol: Re-implement check_*_range() using walk_page_range()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Fixes-by: Dan Smith <danms@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
 mm/mempolicy.c |  141 ++++++++++++++++++---------------------------------------
 1 file changed, 45 insertions(+), 96 deletions(-)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -460,105 +460,45 @@ static const struct mempolicy_operations
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags);
 
-/* Scan through pages checking if pages follow certain conditions. */
-static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long addr, unsigned long end,
-		const nodemask_t *nodes, unsigned long flags,
-		void *private)
-{
-	pte_t *orig_pte;
-	pte_t *pte;
-	spinlock_t *ptl;
-
-	orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-	do {
-		struct page *page;
-		int nid;
-
-		if (!pte_present(*pte))
-			continue;
-		page = vm_normal_page(vma, addr, *pte);
-		if (!page)
-			continue;
-		/*
-		 * vm_normal_page() filters out zero pages, but there might
-		 * still be PageReserved pages to skip, perhaps in a VDSO.
-		 * And we cannot move PageKsm pages sensibly or safely yet.
-		 */
-		if (PageReserved(page) || PageKsm(page))
-			continue;
-		nid = page_to_nid(page);
-		if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
-			continue;
-
-		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
-			migrate_page_add(page, private, flags);
-		else
-			break;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	pte_unmap_unlock(orig_pte, ptl);
-	return addr != end;
-}
+struct mempol_walk_data {
+	struct vm_area_struct *vma;
+	const nodemask_t *nodes;
+	unsigned long flags;
+	void *private;
+};
 
-static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
-		unsigned long addr, unsigned long end,
-		const nodemask_t *nodes, unsigned long flags,
-		void *private)
+static int check_pte_entry(pte_t *pte, unsigned long addr,
+			   unsigned long end, struct mm_walk *walk)
 {
-	pmd_t *pmd;
-	unsigned long next;
+	struct mempol_walk_data *data = walk->private;
+	struct page *page;
+	int nid;
 
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		split_huge_page_pmd(vma->vm_mm, pmd);
-		if (pmd_none_or_clear_bad(pmd))
-			continue;
-		if (check_pte_range(vma, pmd, addr, next, nodes,
-				    flags, private))
-			return -EIO;
-	} while (pmd++, addr = next, addr != end);
-	return 0;
-}
+	if (!pte_present(*pte))
+		return 0;
 
-static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
-		unsigned long addr, unsigned long end,
-		const nodemask_t *nodes, unsigned long flags,
-		void *private)
-{
-	pud_t *pud;
-	unsigned long next;
+	page = vm_normal_page(data->vma, addr, *pte);
+	if (!page)
+		return 0;
 
-	pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		if (check_pmd_range(vma, pud, addr, next, nodes,
-				    flags, private))
-			return -EIO;
-	} while (pud++, addr = next, addr != end);
-	return 0;
-}
+	/*
+	 * vm_normal_page() filters out zero pages, but there might
+	 * still be PageReserved pages to skip, perhaps in a VDSO.
+	 * And we cannot move PageKsm pages sensibly or safely yet.
+	 */
+	if (PageReserved(page) || PageKsm(page))
+		return 0;
 
-static inline int check_pgd_range(struct vm_area_struct *vma,
-		unsigned long addr, unsigned long end,
-		const nodemask_t *nodes, unsigned long flags,
-		void *private)
-{
-	pgd_t *pgd;
-	unsigned long next;
+	nid = page_to_nid(page);
+	if (node_isset(nid, *data->nodes) == !!(data->flags & MPOL_MF_INVERT))
+		return 0;
 
-	pgd = pgd_offset(vma->vm_mm, addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		if (check_pud_range(vma, pgd, addr, next, nodes,
-				    flags, private))
-			return -EIO;
-	} while (pgd++, addr = next, addr != end);
-	return 0;
+	if (data->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+		migrate_page_add(page, data->private, data->flags);
+		return 0;
+	}
+
+	return -EIO;
 }
 
 /*
@@ -570,9 +510,18 @@ static struct vm_area_struct *
 check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 		const nodemask_t *nodes, unsigned long flags, void *private)
 {
-	int err;
 	struct vm_area_struct *first, *vma, *prev;
-
+	struct mempol_walk_data data = {
+		.nodes = nodes,
+		.flags = flags,
+		.private = private,
+	};
+	struct mm_walk walk = {
+		.pte_entry = check_pte_entry,
+		.mm = mm,
+		.private = &data,
+	};
+	int err;
 
 	first = find_vma(mm, start);
 	if (!first)
@@ -595,8 +544,8 @@ check_range(struct mm_struct *mm, unsign
 				endvma = end;
 			if (vma->vm_start > start)
 				start = vma->vm_start;
-			err = check_pgd_range(vma, start, endvma, nodes,
-						flags, private);
+			data.vma = vma;
+			err = walk_page_range(start, endvma, &walk);
 			if (err) {
 				first = ERR_PTR(err);
 				break;


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]