Re: AutoNUMA15

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 01, 2012 at 12:54:06AM +0200, Andrea Arcangeli wrote:
> I'll push a fix in the origin/autonuma branch as soon as I figure it
> out...

7f9729f89000-7f9729faa000 rw-p 00000000 00:00 0
7f9729faa000-7f9729fea000 rw-s 000c0000 00:15 15364 /dev/mem

addr:00007f9729fc5000 vm_flags:08000875 anon_vma:          (null)
mapping:ffff880430025410 index:b8

The reason for the false positive, was that there are multiple vmas in
the same pmd range, and I was passing the single vma belonging to the
page fault address, for all ptes in that pmd.

The vma is only used for that check, this is why it was harmless.

The vma found during page fault would have been valid for the pmd huge
numa fixup and the pte numa fixup, but not for the less granular pmd
numa fixup (not huge).

This also explains why echo 0 >/sys/kernel/mm/autonuma/knuma_scand/pmd
avoided the warnings.

Can you test the below? I'll push the fix in the origin/autonuma branch.

Thanks!

---
 include/linux/autonuma.h |    4 ++--
 mm/autonuma.c            |   19 +++++++++++++++++--
 mm/memory.c              |    5 ++---
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/include/linux/autonuma.h b/include/linux/autonuma.h
index b0a8d87..67af86a 100644
--- a/include/linux/autonuma.h
+++ b/include/linux/autonuma.h
@@ -46,8 +46,8 @@ static inline void autonuma_free_page(struct page *page) {}
 
 extern pte_t __pte_numa_fixup(struct mm_struct *mm, struct vm_area_struct *vma,
 			      unsigned long addr, pte_t pte, pte_t *ptep);
-extern void __pmd_numa_fixup(struct mm_struct *mm, struct vm_area_struct *vma,
-			     unsigned long addr, pmd_t *pmd);
+extern void __pmd_numa_fixup(struct mm_struct *mm, unsigned long addr,
+			     pmd_t *pmd);
 extern void numa_hinting_fault(struct page *page, int numpages);
 
 #endif /* _LINUX_AUTONUMA_H */
diff --git a/mm/autonuma.c b/mm/autonuma.c
index d37647a..ca4c189 100644
--- a/mm/autonuma.c
+++ b/mm/autonuma.c
@@ -349,14 +349,16 @@ pte_t __pte_numa_fixup(struct mm_struct *mm, struct vm_area_struct *vma,
 	return pte;
 }
 
-void __pmd_numa_fixup(struct mm_struct *mm, struct vm_area_struct *vma,
+void __pmd_numa_fixup(struct mm_struct *mm,
 		      unsigned long addr, pmd_t *pmdp)
 {
 	pmd_t pmd;
 	pte_t *pte;
 	unsigned long _addr = addr & PMD_MASK;
+	unsigned long offset;
 	spinlock_t *ptl;
 	bool numa = false;
+	struct vm_area_struct *vma;
 
 	spin_lock(&mm->page_table_lock);
 	pmd = *pmdp;
@@ -369,12 +371,25 @@ void __pmd_numa_fixup(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!numa)
 		return;
 
+	vma = find_vma(mm, _addr);
+	/* we're in a page fault so some vma must be in the range */
+	BUG_ON(!vma);
+	BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
+	offset = max(_addr, vma->vm_start) & ~PMD_MASK;
+	VM_BUG_ON(offset >= PMD_SIZE);
 	pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
-	for (addr = _addr; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
+	pte += offset >> PAGE_SHIFT;
+	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
 		pte_t pteval = *pte;
 		struct page * page;
 		if (!pte_present(pteval))
 			continue;
+		if (addr >= vma->vm_end) {
+			vma = find_vma(mm, addr);
+			/* there's a pte present so there must be a vma */
+			BUG_ON(!vma);
+			BUG_ON(addr < vma->vm_start);
+		}
 		if (pte_numa(pteval)) {
 			pteval = pte_mknotnuma(pteval);
 			set_pte_at(mm, addr, pte, pteval);
diff --git a/mm/memory.c b/mm/memory.c
index f46cf8a..bbf10c7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3409,11 +3409,10 @@ static inline pte_t pte_numa_fixup(struct mm_struct *mm,
 }
 
 static inline void pmd_numa_fixup(struct mm_struct *mm,
-				  struct vm_area_struct *vma,
 				  unsigned long addr, pmd_t *pmd)
 {
 	if (pmd_numa(*pmd))
-		__pmd_numa_fixup(mm, vma, addr, pmd);
+		__pmd_numa_fixup(mm, addr, pmd);
 }
 
 static inline pmd_t huge_pmd_numa_fixup(struct mm_struct *mm,
@@ -3552,7 +3551,7 @@ retry:
 		}
 	}
 
-	pmd_numa_fixup(mm, vma, address, pmd);
+	pmd_numa_fixup(mm, address, pmd);
 
 	/*
 	 * Use __pte_alloc instead of pte_alloc_map, because we can't

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]