[RFC 2/2] Prevent promotion of page in madvise_dontneed

Minchan Kim <minchan.kim@xxxxxxxxx> · Sun, 21 Nov 2010 23:30:24 +0900

Now zap_pte_range alwayas promotes pages which are pte_young &&
!VM_SequentialReadHint(vma). But in case of calling MADV_DONTNEED,
it's unnecessary since the page wouldn't use any more.

If the page is sharred by other processes and it's real working set

Signed-off-by: Minchan Kim <minchan.kim@xxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: Nick Piggin <npiggin@xxxxxxxxx>
---
 include/linux/mm.h |    4 ++--
 mm/madvise.c       |    4 ++--
 mm/memory.c        |    9 ++++++---
 mm/mmap.c          |    4 ++--
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 721f451..1555abe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -778,11 +778,11 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
-		unsigned long size, struct zap_details *);
+		unsigned long size, struct zap_details *, int promote);
 unsigned long unmap_vmas(struct mmu_gather **tlb,
 		struct vm_area_struct *start_vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
-		struct zap_details *);
+		struct zap_details *, int promote);
 
 /**
  * mm_walk - callbacks for walk_page_range
diff --git a/mm/madvise.c b/mm/madvise.c
index 319528b..247e5fd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -171,9 +171,9 @@ static long madvise_dontneed(struct vm_area_struct * vma,
 			.nonlinear_vma = vma,
 			.last_index = ULONG_MAX,
 		};
-		zap_page_range(vma, start, end - start, &details);
+		zap_page_range(vma, start, end - start, &details, 0);
 	} else
-		zap_page_range(vma, start, end - start, NULL);
+		zap_page_range(vma, start, end - start, NULL, 0);
 	return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 02e48aa..276abdb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1075,6 +1075,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
  * @end_addr: virtual address at which to end unmapping
  * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
  * @details: details of nonlinear truncation or shared cache invalidation
+ * @promote: whether pages inclued vma would be promoted or not
  *
  * Returns the end address of the unmapping (restart addr if interrupted).
  *
@@ -1096,7 +1097,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
 unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		struct vm_area_struct *vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
-		struct zap_details *details)
+		struct zap_details *details, int promote)
 {
 	long zap_work = ZAP_BLOCK_SIZE;
 	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
@@ -1184,9 +1185,10 @@ out:
  * @address: starting address of pages to zap
  * @size: number of bytes to zap
  * @details: details of nonlinear truncation or shared cache invalidation
+ * @promote: whether the page would be promoted or not
  */
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
-		unsigned long size, struct zap_details *details)
+		unsigned long size, struct zap_details *details, int promote)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct mmu_gather *tlb;
@@ -1196,7 +1198,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 	lru_add_drain();
 	tlb = tlb_gather_mmu(mm, 0);
 	update_hiwater_rss(mm);
-	end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+	end = unmap_vmas(&tlb, vma, address, end,
+			&nr_accounted, details, promote);
 	if (tlb)
 		tlb_finish_mmu(tlb, address, end);
 	return end;
diff --git a/mm/mmap.c b/mm/mmap.c
index b179abb..0d42c08 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1904,7 +1904,7 @@ static void unmap_region(struct mm_struct *mm,
 	lru_add_drain();
 	tlb = tlb_gather_mmu(mm, 0);
 	update_hiwater_rss(mm);
-	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
+	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL, 1);
 	vm_unacct_memory(nr_accounted);
 	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
 				 next? next->vm_start: 0);
@@ -2278,7 +2278,7 @@ void exit_mmap(struct mm_struct *mm)
 	tlb = tlb_gather_mmu(mm, 1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
-	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
+	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL, 1);
 	vm_unacct_memory(nr_accounted);
 
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>