The patch titled Subject: lockdep: fix fs_reclaim annotation has been added to the -mm tree. Its filename is lockdep-fix-fs_reclaim-annotation.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/lockdep-fix-fs_reclaim-annotation.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/lockdep-fix-fs_reclaim-annotation.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Omar Sandoval <osandov@xxxxxx> Subject: lockdep: fix fs_reclaim annotation While revisiting my Btrfs swapfile series [1], I introduced a situation in which reclaim would lock i_rwsem, and even though the swapon() path clearly made GFP_KERNEL allocations while holding i_rwsem, I got no complaints from lockdep. It turns out that the rework of the fs_reclaim annotation was broken: if the current task has PF_MEMALLOC set, we don't acquire the dummy fs_reclaim lock, but when reclaiming we always check this _after_ we've just set the PF_MEMALLOC flag. In most cases, we can fix this by moving the fs_reclaim_{acquire,release}() outside of the memalloc_noreclaim_{save,restore}(), althought kswapd is slightly different. After applying this, I got the expected lockdep splats. 1: https://lwn.net/Articles/625412/ Link: http://lkml.kernel.org/r/9f8aa70652a98e98d7c4de0fc96a4addcee13efe.1523778026.git.osandov@xxxxxx Fixes: d92a8cfcb37e ("locking/lockdep: Rework FS_RECLAIM annotation") Signed-off-by: Omar Sandoval <osandov@xxxxxx> Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/sched/mm.h | 4 ++++ mm/page_alloc.c | 20 +++++++++++++++----- mm/vmscan.c | 20 +++++++++++++------- 3 files changed, 32 insertions(+), 12 deletions(-) diff -puN include/linux/sched/mm.h~lockdep-fix-fs_reclaim-annotation include/linux/sched/mm.h --- a/include/linux/sched/mm.h~lockdep-fix-fs_reclaim-annotation +++ a/include/linux/sched/mm.h @@ -163,9 +163,13 @@ static inline gfp_t current_gfp_context( } #ifdef CONFIG_LOCKDEP +extern void __fs_reclaim_acquire(void); +extern void __fs_reclaim_release(void); extern void fs_reclaim_acquire(gfp_t gfp_mask); extern void fs_reclaim_release(gfp_t gfp_mask); #else +static inline void __fs_reclaim_acquire(void) { } +static inline void __fs_reclaim_release(void) { } static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif diff -puN mm/page_alloc.c~lockdep-fix-fs_reclaim-annotation mm/page_alloc.c --- a/mm/page_alloc.c~lockdep-fix-fs_reclaim-annotation +++ a/mm/page_alloc.c @@ -3708,7 +3708,7 @@ should_compact_retry(struct alloc_contex #endif /* CONFIG_COMPACTION */ #ifdef CONFIG_LOCKDEP -struct lockdep_map __fs_reclaim_map = +static struct lockdep_map __fs_reclaim_map = STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map); static bool __need_fs_reclaim(gfp_t gfp_mask) @@ -3733,17 +3733,27 @@ static bool __need_fs_reclaim(gfp_t gfp_ return true; } +void __fs_reclaim_acquire(void) +{ + lock_map_acquire(&__fs_reclaim_map); +} + +void __fs_reclaim_release(void) +{ + lock_map_release(&__fs_reclaim_map); +} + void fs_reclaim_acquire(gfp_t gfp_mask) { if (__need_fs_reclaim(gfp_mask)) - lock_map_acquire(&__fs_reclaim_map); + __fs_reclaim_acquire(); } EXPORT_SYMBOL_GPL(fs_reclaim_acquire); void fs_reclaim_release(gfp_t gfp_mask) { if (__need_fs_reclaim(gfp_mask)) - lock_map_release(&__fs_reclaim_map); + __fs_reclaim_release(); } EXPORT_SYMBOL_GPL(fs_reclaim_release); #endif @@ -3761,8 +3771,8 @@ __perform_reclaim(gfp_t gfp_mask, unsign /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); - noreclaim_flag = memalloc_noreclaim_save(); fs_reclaim_acquire(gfp_mask); + noreclaim_flag = memalloc_noreclaim_save(); reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; @@ -3770,8 +3780,8 @@ __perform_reclaim(gfp_t gfp_mask, unsign ac->nodemask); current->reclaim_state = NULL; - fs_reclaim_release(gfp_mask); memalloc_noreclaim_restore(noreclaim_flag); + fs_reclaim_release(gfp_mask); cond_resched(); diff -puN mm/vmscan.c~lockdep-fix-fs_reclaim-annotation mm/vmscan.c --- a/mm/vmscan.c~lockdep-fix-fs_reclaim-annotation +++ a/mm/vmscan.c @@ -3318,11 +3318,15 @@ static int balance_pgdat(pg_data_t *pgda .may_unmap = 1, .may_swap = 1, }; + + __fs_reclaim_acquire(); + count_vm_event(PAGEOUTRUN); do { unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; + bool ret; sc.reclaim_idx = classzone_idx; @@ -3395,7 +3399,10 @@ static int balance_pgdat(pg_data_t *pgda wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ - if (try_to_freeze() || kthread_should_stop()) + __fs_reclaim_release(); + ret = try_to_freeze(); + __fs_reclaim_acquire(); + if (ret || kthread_should_stop()) break; /* @@ -3412,6 +3419,7 @@ static int balance_pgdat(pg_data_t *pgda out: snapshot_refaults(NULL, pgdat); + __fs_reclaim_release(); /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller @@ -3600,9 +3608,7 @@ kswapd_try_sleep: */ trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, alloc_order); - fs_reclaim_acquire(GFP_KERNEL); reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); - fs_reclaim_release(GFP_KERNEL); if (reclaim_order < alloc_order) goto kswapd_try_sleep; } @@ -3684,16 +3690,16 @@ unsigned long shrink_all_memory(unsigned unsigned long nr_reclaimed; unsigned int noreclaim_flag; - noreclaim_flag = memalloc_noreclaim_save(); fs_reclaim_acquire(sc.gfp_mask); + noreclaim_flag = memalloc_noreclaim_save(); reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; nr_reclaimed = do_try_to_free_pages(zonelist, &sc); p->reclaim_state = NULL; - fs_reclaim_release(sc.gfp_mask); memalloc_noreclaim_restore(noreclaim_flag); + fs_reclaim_release(sc.gfp_mask); return nr_reclaimed; } @@ -3870,6 +3876,7 @@ static int __node_reclaim(struct pglist_ }; cond_resched(); + fs_reclaim_acquire(sc.gfp_mask); /* * We need to be able to allocate from the reserves for RECLAIM_UNMAP * and we also need to be able to write out pages for RECLAIM_WRITE @@ -3877,7 +3884,6 @@ static int __node_reclaim(struct pglist_ */ noreclaim_flag = memalloc_noreclaim_save(); p->flags |= PF_SWAPWRITE; - fs_reclaim_acquire(sc.gfp_mask); reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; @@ -3892,9 +3898,9 @@ static int __node_reclaim(struct pglist_ } p->reclaim_state = NULL; - fs_reclaim_release(gfp_mask); current->flags &= ~PF_SWAPWRITE; memalloc_noreclaim_restore(noreclaim_flag); + fs_reclaim_release(sc.gfp_mask); return sc.nr_reclaimed >= nr_pages; } _ Patches currently in -mm which might be from osandov@xxxxxx are lockdep-fix-fs_reclaim-annotation.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html