[patch 041/118] lockdep: fix fs_reclaim annotation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Omar Sandoval <osandov@xxxxxx>
Subject: lockdep: fix fs_reclaim annotation

While revisiting my Btrfs swapfile series [1], I introduced a situation in
which reclaim would lock i_rwsem, and even though the swapon() path
clearly made GFP_KERNEL allocations while holding i_rwsem, I got no
complaints from lockdep.  It turns out that the rework of the fs_reclaim
annotation was broken: if the current task has PF_MEMALLOC set, we don't
acquire the dummy fs_reclaim lock, but when reclaiming we always check
this _after_ we've just set the PF_MEMALLOC flag.  In most cases, we can
fix this by moving the fs_reclaim_{acquire,release}() outside of the
memalloc_noreclaim_{save,restore}(), althought kswapd is slightly
different.  After applying this, I got the expected lockdep splats.

1: https://lwn.net/Articles/625412/

Link: http://lkml.kernel.org/r/9f8aa70652a98e98d7c4de0fc96a4addcee13efe.1523778026.git.osandov@xxxxxx
Fixes: d92a8cfcb37e ("locking/lockdep: Rework FS_RECLAIM annotation")
Signed-off-by: Omar Sandoval <osandov@xxxxxx>
Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/sched/mm.h |    4 ++++
 mm/page_alloc.c          |   20 +++++++++++++++-----
 mm/vmscan.c              |   20 +++++++++++++-------
 3 files changed, 32 insertions(+), 12 deletions(-)

diff -puN include/linux/sched/mm.h~lockdep-fix-fs_reclaim-annotation include/linux/sched/mm.h
--- a/include/linux/sched/mm.h~lockdep-fix-fs_reclaim-annotation
+++ a/include/linux/sched/mm.h
@@ -163,9 +163,13 @@ static inline gfp_t current_gfp_context(
 }
 
 #ifdef CONFIG_LOCKDEP
+extern void __fs_reclaim_acquire(void);
+extern void __fs_reclaim_release(void);
 extern void fs_reclaim_acquire(gfp_t gfp_mask);
 extern void fs_reclaim_release(gfp_t gfp_mask);
 #else
+static inline void __fs_reclaim_acquire(void) { }
+static inline void __fs_reclaim_release(void) { }
 static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
 static inline void fs_reclaim_release(gfp_t gfp_mask) { }
 #endif
diff -puN mm/page_alloc.c~lockdep-fix-fs_reclaim-annotation mm/page_alloc.c
--- a/mm/page_alloc.c~lockdep-fix-fs_reclaim-annotation
+++ a/mm/page_alloc.c
@@ -3701,7 +3701,7 @@ should_compact_retry(struct alloc_contex
 #endif /* CONFIG_COMPACTION */
 
 #ifdef CONFIG_LOCKDEP
-struct lockdep_map __fs_reclaim_map =
+static struct lockdep_map __fs_reclaim_map =
 	STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
 
 static bool __need_fs_reclaim(gfp_t gfp_mask)
@@ -3726,17 +3726,27 @@ static bool __need_fs_reclaim(gfp_t gfp_
 	return true;
 }
 
+void __fs_reclaim_acquire(void)
+{
+	lock_map_acquire(&__fs_reclaim_map);
+}
+
+void __fs_reclaim_release(void)
+{
+	lock_map_release(&__fs_reclaim_map);
+}
+
 void fs_reclaim_acquire(gfp_t gfp_mask)
 {
 	if (__need_fs_reclaim(gfp_mask))
-		lock_map_acquire(&__fs_reclaim_map);
+		__fs_reclaim_acquire();
 }
 EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
 
 void fs_reclaim_release(gfp_t gfp_mask)
 {
 	if (__need_fs_reclaim(gfp_mask))
-		lock_map_release(&__fs_reclaim_map);
+		__fs_reclaim_release();
 }
 EXPORT_SYMBOL_GPL(fs_reclaim_release);
 #endif
@@ -3754,8 +3764,8 @@ __perform_reclaim(gfp_t gfp_mask, unsign
 
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
-	noreclaim_flag = memalloc_noreclaim_save();
 	fs_reclaim_acquire(gfp_mask);
+	noreclaim_flag = memalloc_noreclaim_save();
 	reclaim_state.reclaimed_slab = 0;
 	current->reclaim_state = &reclaim_state;
 
@@ -3763,8 +3773,8 @@ __perform_reclaim(gfp_t gfp_mask, unsign
 								ac->nodemask);
 
 	current->reclaim_state = NULL;
-	fs_reclaim_release(gfp_mask);
 	memalloc_noreclaim_restore(noreclaim_flag);
+	fs_reclaim_release(gfp_mask);
 
 	cond_resched();
 
diff -puN mm/vmscan.c~lockdep-fix-fs_reclaim-annotation mm/vmscan.c
--- a/mm/vmscan.c~lockdep-fix-fs_reclaim-annotation
+++ a/mm/vmscan.c
@@ -3318,11 +3318,15 @@ static int balance_pgdat(pg_data_t *pgda
 		.may_unmap = 1,
 		.may_swap = 1,
 	};
+
+	__fs_reclaim_acquire();
+
 	count_vm_event(PAGEOUTRUN);
 
 	do {
 		unsigned long nr_reclaimed = sc.nr_reclaimed;
 		bool raise_priority = true;
+		bool ret;
 
 		sc.reclaim_idx = classzone_idx;
 
@@ -3395,7 +3399,10 @@ static int balance_pgdat(pg_data_t *pgda
 			wake_up_all(&pgdat->pfmemalloc_wait);
 
 		/* Check if kswapd should be suspending */
-		if (try_to_freeze() || kthread_should_stop())
+		__fs_reclaim_release();
+		ret = try_to_freeze();
+		__fs_reclaim_acquire();
+		if (ret || kthread_should_stop())
 			break;
 
 		/*
@@ -3412,6 +3419,7 @@ static int balance_pgdat(pg_data_t *pgda
 
 out:
 	snapshot_refaults(NULL, pgdat);
+	__fs_reclaim_release();
 	/*
 	 * Return the order kswapd stopped reclaiming at as
 	 * prepare_kswapd_sleep() takes it into account. If another caller
@@ -3600,9 +3608,7 @@ kswapd_try_sleep:
 		 */
 		trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
 						alloc_order);
-		fs_reclaim_acquire(GFP_KERNEL);
 		reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
-		fs_reclaim_release(GFP_KERNEL);
 		if (reclaim_order < alloc_order)
 			goto kswapd_try_sleep;
 	}
@@ -3684,16 +3690,16 @@ unsigned long shrink_all_memory(unsigned
 	unsigned long nr_reclaimed;
 	unsigned int noreclaim_flag;
 
-	noreclaim_flag = memalloc_noreclaim_save();
 	fs_reclaim_acquire(sc.gfp_mask);
+	noreclaim_flag = memalloc_noreclaim_save();
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
 	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
 	p->reclaim_state = NULL;
-	fs_reclaim_release(sc.gfp_mask);
 	memalloc_noreclaim_restore(noreclaim_flag);
+	fs_reclaim_release(sc.gfp_mask);
 
 	return nr_reclaimed;
 }
@@ -3870,6 +3876,7 @@ static int __node_reclaim(struct pglist_
 	};
 
 	cond_resched();
+	fs_reclaim_acquire(sc.gfp_mask);
 	/*
 	 * We need to be able to allocate from the reserves for RECLAIM_UNMAP
 	 * and we also need to be able to write out pages for RECLAIM_WRITE
@@ -3877,7 +3884,6 @@ static int __node_reclaim(struct pglist_
 	 */
 	noreclaim_flag = memalloc_noreclaim_save();
 	p->flags |= PF_SWAPWRITE;
-	fs_reclaim_acquire(sc.gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
@@ -3892,9 +3898,9 @@ static int __node_reclaim(struct pglist_
 	}
 
 	p->reclaim_state = NULL;
-	fs_reclaim_release(gfp_mask);
 	current->flags &= ~PF_SWAPWRITE;
 	memalloc_noreclaim_restore(noreclaim_flag);
+	fs_reclaim_release(sc.gfp_mask);
 	return sc.nr_reclaimed >= nr_pages;
 }
 
_
--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux