[patch 101/119] mm: fix races between swapoff and flush dcache

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Huang Ying <ying.huang@xxxxxxxxx>
Subject: mm: fix races between swapoff and flush dcache

Thanks to 4b3ef9daa4fc ("mm/swap: split swap cache into 64MB trunks"),
after swapoff the address_space associated with the swap device will be
freed.  So page_mapping() users which may touch the address_space need
some kind of mechanism to prevent the address_space from being freed
during accessing.

The dcache flushing functions (flush_dcache_page(), etc) in architecture
specific code may access the address_space of swap device for anonymous
pages in swap cache via page_mapping() function.  But in some cases there
are no mechanisms to prevent the swap device from being swapoff, for
example,

CPU1					CPU2
__get_user_pages()			swapoff()
  flush_dcache_page()
    mapping = page_mapping()
      ...				  exit_swap_address_space()
      ...				    kvfree(spaces)
      mapping_mapped(mapping)

The address space may be accessed after being freed.

But from cachetlb.txt and Russell King, flush_dcache_page() only care
about file cache pages, for anonymous pages, flush_anon_page() should be
used.  The implementation of flush_dcache_page() in all architectures
follows this too.  They will check whether page_mapping() is NULL and
whether mapping_mapped() is true to determine whether to flush the dcache
immediately.  And they will use interval tree (mapping->i_mmap) to find
all user space mappings.  While mapping_mapped() and mapping->i_mmap isn't
used by anonymous pages in swap cache at all.

So, to fix the race between swapoff and flush dcache, __page_mapping() is
add to return the address_space for file cache pages and NULL otherwise. 
All page_mapping() invoking in flush dcache functions are replaced with
page_mapping_file().

[akpm@xxxxxxxxxxxxxxxxxxxx: simplify page_mapping_file(), per Mike]
Link: http://lkml.kernel.org/r/20180305083634.15174-1-ying.huang@xxxxxxxxx
Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx>
Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
Cc: Chen Liqin <liqin.linux@xxxxxxxxx>
Cc: Russell King <linux@xxxxxxxxxxxxxxx>
Cc: Yoshinori Sato <ysato@xxxxxxxxxxxxxxxxxxxx>
Cc: "James E.J. Bottomley" <jejb@xxxxxxxxxxxxxxxx>
Cc: Guan Xuetao <gxt@xxxxxxxxxxxxxxx>
Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
Cc: Chris Zankel <chris@xxxxxxxxxx>
Cc: Vineet Gupta <vgupta@xxxxxxxxxxxx>
Cc: Ley Foon Tan <lftan@xxxxxxxxxx>
Cc: Ralf Baechle <ralf@xxxxxxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Mike Rapoport <rppt@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 arch/arc/mm/cache.c           |    2 +-
 arch/arm/mm/copypage-v4mc.c   |    2 +-
 arch/arm/mm/copypage-v6.c     |    2 +-
 arch/arm/mm/copypage-xscale.c |    2 +-
 arch/arm/mm/fault-armv.c      |    2 +-
 arch/arm/mm/flush.c           |    6 +++---
 arch/mips/mm/cache.c          |    2 +-
 arch/nios2/mm/cacheflush.c    |    4 ++--
 arch/parisc/kernel/cache.c    |    5 +++--
 arch/sh/mm/cache-sh4.c        |    2 +-
 arch/sh/mm/cache-sh7705.c     |    2 +-
 arch/sparc/kernel/smp_64.c    |    8 ++++----
 arch/sparc/mm/init_64.c       |    6 +++---
 arch/sparc/mm/tlb.c           |    2 +-
 arch/unicore32/mm/flush.c     |    2 +-
 arch/unicore32/mm/mmu.c       |    2 +-
 arch/xtensa/mm/cache.c        |    2 +-
 include/linux/mm.h            |    1 +
 mm/util.c                     |   10 ++++++++++
 19 files changed, 38 insertions(+), 26 deletions(-)

diff -puN arch/arc/mm/cache.c~mm-fix-races-between-swapoff-and-flush-dcache arch/arc/mm/cache.c
--- a/arch/arc/mm/cache.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/arc/mm/cache.c
@@ -833,7 +833,7 @@ void flush_dcache_page(struct page *page
 	}
 
 	/* don't handle anon pages here */
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 	if (!mapping)
 		return;
 
diff -puN arch/arm/mm/copypage-v4mc.c~mm-fix-races-between-swapoff-and-flush-dcache arch/arm/mm/copypage-v4mc.c
--- a/arch/arm/mm/copypage-v4mc.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/arm/mm/copypage-v4mc.c
@@ -70,7 +70,7 @@ void v4_mc_copy_user_highpage(struct pag
 	void *kto = kmap_atomic(to);
 
 	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-		__flush_dcache_page(page_mapping(from), from);
+		__flush_dcache_page(page_mapping_file(from), from);
 
 	raw_spin_lock(&minicache_lock);
 
diff -puN arch/arm/mm/copypage-v6.c~mm-fix-races-between-swapoff-and-flush-dcache arch/arm/mm/copypage-v6.c
--- a/arch/arm/mm/copypage-v6.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/arm/mm/copypage-v6.c
@@ -76,7 +76,7 @@ static void v6_copy_user_highpage_aliasi
 	unsigned long kfrom, kto;
 
 	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-		__flush_dcache_page(page_mapping(from), from);
+		__flush_dcache_page(page_mapping_file(from), from);
 
 	/* FIXME: not highmem safe */
 	discard_old_kernel_data(page_address(to));
diff -puN arch/arm/mm/copypage-xscale.c~mm-fix-races-between-swapoff-and-flush-dcache arch/arm/mm/copypage-xscale.c
--- a/arch/arm/mm/copypage-xscale.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/arm/mm/copypage-xscale.c
@@ -90,7 +90,7 @@ void xscale_mc_copy_user_highpage(struct
 	void *kto = kmap_atomic(to);
 
 	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-		__flush_dcache_page(page_mapping(from), from);
+		__flush_dcache_page(page_mapping_file(from), from);
 
 	raw_spin_lock(&minicache_lock);
 
diff -puN arch/arm/mm/fault-armv.c~mm-fix-races-between-swapoff-and-flush-dcache arch/arm/mm/fault-armv.c
--- a/arch/arm/mm/fault-armv.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/arm/mm/fault-armv.c
@@ -195,7 +195,7 @@ void update_mmu_cache(struct vm_area_str
 	if (page == ZERO_PAGE(0))
 		return;
 
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		__flush_dcache_page(mapping, page);
 	if (mapping) {
diff -puN arch/arm/mm/flush.c~mm-fix-races-between-swapoff-and-flush-dcache arch/arm/mm/flush.c
--- a/arch/arm/mm/flush.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/arm/mm/flush.c
@@ -285,7 +285,7 @@ void __sync_icache_dcache(pte_t pteval)
 
 	page = pfn_to_page(pfn);
 	if (cache_is_vipt_aliasing())
-		mapping = page_mapping(page);
+		mapping = page_mapping_file(page);
 	else
 		mapping = NULL;
 
@@ -333,7 +333,7 @@ void flush_dcache_page(struct page *page
 		return;
 	}
 
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 
 	if (!cache_ops_need_broadcast() &&
 	    mapping && !page_mapcount(page))
@@ -363,7 +363,7 @@ void flush_kernel_dcache_page(struct pag
 	if (cache_is_vivt() || cache_is_vipt_aliasing()) {
 		struct address_space *mapping;
 
-		mapping = page_mapping(page);
+		mapping = page_mapping_file(page);
 
 		if (!mapping || mapping_mapped(mapping)) {
 			void *addr;
diff -puN arch/mips/mm/cache.c~mm-fix-races-between-swapoff-and-flush-dcache arch/mips/mm/cache.c
--- a/arch/mips/mm/cache.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/mips/mm/cache.c
@@ -86,7 +86,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned lon
 
 void __flush_dcache_page(struct page *page)
 {
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping = page_mapping_file(page);
 	unsigned long addr;
 
 	if (mapping && !mapping_mapped(mapping)) {
diff -puN arch/nios2/mm/cacheflush.c~mm-fix-races-between-swapoff-and-flush-dcache arch/nios2/mm/cacheflush.c
--- a/arch/nios2/mm/cacheflush.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/nios2/mm/cacheflush.c
@@ -180,7 +180,7 @@ void flush_dcache_page(struct page *page
 	if (page == ZERO_PAGE(0))
 		return;
 
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 
 	/* Flush this page if there are aliases. */
 	if (mapping && !mapping_mapped(mapping)) {
@@ -215,7 +215,7 @@ void update_mmu_cache(struct vm_area_str
 	if (page == ZERO_PAGE(0))
 		return;
 
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		__flush_dcache_page(mapping, page);
 
diff -puN arch/parisc/kernel/cache.c~mm-fix-races-between-swapoff-and-flush-dcache arch/parisc/kernel/cache.c
--- a/arch/parisc/kernel/cache.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/parisc/kernel/cache.c
@@ -88,7 +88,8 @@ update_mmu_cache(struct vm_area_struct *
 		return;
 
 	page = pfn_to_page(pfn);
-	if (page_mapping(page) && test_bit(PG_dcache_dirty, &page->flags)) {
+	if (page_mapping_file(page) &&
+	    test_bit(PG_dcache_dirty, &page->flags)) {
 		flush_kernel_dcache_page_addr(pfn_va(pfn));
 		clear_bit(PG_dcache_dirty, &page->flags);
 	} else if (parisc_requires_coherency())
@@ -304,7 +305,7 @@ __flush_cache_page(struct vm_area_struct
 
 void flush_dcache_page(struct page *page)
 {
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping = page_mapping_file(page);
 	struct vm_area_struct *mpnt;
 	unsigned long offset;
 	unsigned long addr, old_addr = 0;
diff -puN arch/sh/mm/cache-sh4.c~mm-fix-races-between-swapoff-and-flush-dcache arch/sh/mm/cache-sh4.c
--- a/arch/sh/mm/cache-sh4.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/sh/mm/cache-sh4.c
@@ -112,7 +112,7 @@ static void sh4_flush_dcache_page(void *
 	struct page *page = arg;
 	unsigned long addr = (unsigned long)page_address(page);
 #ifndef CONFIG_SMP
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping = page_mapping_file(page);
 
 	if (mapping && !mapping_mapped(mapping))
 		clear_bit(PG_dcache_clean, &page->flags);
diff -puN arch/sh/mm/cache-sh7705.c~mm-fix-races-between-swapoff-and-flush-dcache arch/sh/mm/cache-sh7705.c
--- a/arch/sh/mm/cache-sh7705.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/sh/mm/cache-sh7705.c
@@ -136,7 +136,7 @@ static void __flush_dcache_page(unsigned
 static void sh7705_flush_dcache_page(void *arg)
 {
 	struct page *page = arg;
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping = page_mapping_file(page);
 
 	if (mapping && !mapping_mapped(mapping))
 		clear_bit(PG_dcache_clean, &page->flags);
diff -puN arch/sparc/kernel/smp_64.c~mm-fix-races-between-swapoff-and-flush-dcache arch/sparc/kernel/smp_64.c
--- a/arch/sparc/kernel/smp_64.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/sparc/kernel/smp_64.c
@@ -929,9 +929,9 @@ static inline void __local_flush_dcache_
 #ifdef DCACHE_ALIASING_POSSIBLE
 	__flush_dcache_page(page_address(page),
 			    ((tlb_type == spitfire) &&
-			     page_mapping(page) != NULL));
+			     page_mapping_file(page) != NULL));
 #else
-	if (page_mapping(page) != NULL &&
+	if (page_mapping_file(page) != NULL &&
 	    tlb_type == spitfire)
 		__flush_icache_page(__pa(page_address(page)));
 #endif
@@ -958,7 +958,7 @@ void smp_flush_dcache_page_impl(struct p
 
 		if (tlb_type == spitfire) {
 			data0 = ((u64)&xcall_flush_dcache_page_spitfire);
-			if (page_mapping(page) != NULL)
+			if (page_mapping_file(page) != NULL)
 				data0 |= ((u64)1 << 32);
 		} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 #ifdef DCACHE_ALIASING_POSSIBLE
@@ -994,7 +994,7 @@ void flush_dcache_page_all(struct mm_str
 	pg_addr = page_address(page);
 	if (tlb_type == spitfire) {
 		data0 = ((u64)&xcall_flush_dcache_page_spitfire);
-		if (page_mapping(page) != NULL)
+		if (page_mapping_file(page) != NULL)
 			data0 |= ((u64)1 << 32);
 	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 #ifdef DCACHE_ALIASING_POSSIBLE
diff -puN arch/sparc/mm/init_64.c~mm-fix-races-between-swapoff-and-flush-dcache arch/sparc/mm/init_64.c
--- a/arch/sparc/mm/init_64.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/sparc/mm/init_64.c
@@ -206,9 +206,9 @@ inline void flush_dcache_page_impl(struc
 #ifdef DCACHE_ALIASING_POSSIBLE
 	__flush_dcache_page(page_address(page),
 			    ((tlb_type == spitfire) &&
-			     page_mapping(page) != NULL));
+			     page_mapping_file(page) != NULL));
 #else
-	if (page_mapping(page) != NULL &&
+	if (page_mapping_file(page) != NULL &&
 	    tlb_type == spitfire)
 		__flush_icache_page(__pa(page_address(page)));
 #endif
@@ -490,7 +490,7 @@ void flush_dcache_page(struct page *page
 
 	this_cpu = get_cpu();
 
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 	if (mapping && !mapping_mapped(mapping)) {
 		int dirty = test_bit(PG_dcache_dirty, &page->flags);
 		if (dirty) {
diff -puN arch/sparc/mm/tlb.c~mm-fix-races-between-swapoff-and-flush-dcache arch/sparc/mm/tlb.c
--- a/arch/sparc/mm/tlb.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/sparc/mm/tlb.c
@@ -128,7 +128,7 @@ void tlb_batch_add(struct mm_struct *mm,
 			goto no_cache_flush;
 
 		/* A real file page? */
-		mapping = page_mapping(page);
+		mapping = page_mapping_file(page);
 		if (!mapping)
 			goto no_cache_flush;
 
diff -puN arch/unicore32/mm/flush.c~mm-fix-races-between-swapoff-and-flush-dcache arch/unicore32/mm/flush.c
--- a/arch/unicore32/mm/flush.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/unicore32/mm/flush.c
@@ -83,7 +83,7 @@ void flush_dcache_page(struct page *page
 	if (page == ZERO_PAGE(0))
 		return;
 
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 
 	if (mapping && !mapping_mapped(mapping))
 		clear_bit(PG_dcache_clean, &page->flags);
diff -puN arch/unicore32/mm/mmu.c~mm-fix-races-between-swapoff-and-flush-dcache arch/unicore32/mm/mmu.c
--- a/arch/unicore32/mm/mmu.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/unicore32/mm/mmu.c
@@ -503,7 +503,7 @@ void update_mmu_cache(struct vm_area_str
 	if (page == ZERO_PAGE(0))
 		return;
 
-	mapping = page_mapping(page);
+	mapping = page_mapping_file(page);
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		__flush_dcache_page(mapping, page);
 	if (mapping)
diff -puN arch/xtensa/mm/cache.c~mm-fix-races-between-swapoff-and-flush-dcache arch/xtensa/mm/cache.c
--- a/arch/xtensa/mm/cache.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/arch/xtensa/mm/cache.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(copy_user_highpage);
 
 void flush_dcache_page(struct page *page)
 {
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping = page_mapping_file(page);
 
 	/*
 	 * If we have a mapping but the page is not mapped to user-space
diff -puN include/linux/mm.h~mm-fix-races-between-swapoff-and-flush-dcache include/linux/mm.h
--- a/include/linux/mm.h~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/include/linux/mm.h
@@ -1155,6 +1155,7 @@ static inline pgoff_t page_index(struct
 
 bool page_mapped(struct page *page);
 struct address_space *page_mapping(struct page *page);
+struct address_space *page_mapping_file(struct page *page);
 
 /*
  * Return true only if the page has been allocated with
diff -puN mm/util.c~mm-fix-races-between-swapoff-and-flush-dcache mm/util.c
--- a/mm/util.c~mm-fix-races-between-swapoff-and-flush-dcache
+++ a/mm/util.c
@@ -515,6 +515,16 @@ struct address_space *page_mapping(struc
 }
 EXPORT_SYMBOL(page_mapping);
 
+/*
+ * For file cache pages, return the address_space, otherwise return NULL
+ */
+struct address_space *page_mapping_file(struct page *page)
+{
+	if (unlikely(PageSwapCache(page)))
+		return NULL;
+	return page_mapping(page);
+}
+
 /* Slow path of page_mapcount() for compound pages */
 int __page_mapcount(struct page *page)
 {
_
--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux