On Wed, Jul 27, 2016 at 03:22:26PM +0100, Mel Gorman wrote: > ---8<--- > From: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> > Subject: [PATCH] mm, vmscan: Wait on a waitqueue when too many pages are > isolated > This is potentially a much better version as it avoids wakeup storms and do a better job of handling the case where pages could not be reclaimed. ---8<--- mm, vmscan: Wait on a waitqueue when too many pages are isolated When too many pages are isolated, direct reclaim waits on congestion to clear for up to a tenth of a second. There is no reason to believe that too many pages are isolated due to dirty pages, reclaim efficiency or congestion. It may simply be because an extremely large number of processes have entered direct reclaim at the same time. This patch has processes wait on a waitqueue when too many pages are isolated. When parallel reclaimers finish shrink_page_list, they wake the waiters to recheck whether too many pages are isolated. While it is difficult to trigger this corner case, it's possible by lauching an extremely large number of hackbench processes on a 32-bit system with limited memory. Without the patch, a large number of processes wait uselessly and with the patch applied, I was unable to stall the system. Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> --- include/linux/mmzone.h | 1 + mm/page_alloc.c | 1 + mm/vmscan.c | 24 +++++++++++++++--------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d572b78b65e1..467878d7af33 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -653,6 +653,7 @@ typedef struct pglist_data { int node_id; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; + wait_queue_head_t isolated_wait; struct task_struct *kswapd; /* Protected by mem_hotplug_begin/end() */ int kswapd_order; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fbd329e61bf6..3800972f240e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5859,6 +5859,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) #endif init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); + init_waitqueue_head(&pgdat->isolated_wait); #ifdef CONFIG_COMPACTION init_waitqueue_head(&pgdat->kcompactd_wait); #endif diff --git a/mm/vmscan.c b/mm/vmscan.c index 9c0b2b0fc164..e264fcb7556b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1554,16 +1554,16 @@ int isolate_lru_page(struct page *page) * the LRU list will go small and be scanned faster than necessary, leading to * unnecessary swapping, thrashing and OOM. */ -static int too_many_isolated(struct pglist_data *pgdat, int file, +static bool safe_to_isolate(struct pglist_data *pgdat, int file, struct scan_control *sc) { unsigned long inactive, isolated; if (current_is_kswapd()) - return 0; + return true; - if (!sane_reclaim(sc)) - return 0; + if (sane_reclaim(sc)) + return true; if (file) { inactive = node_page_state(pgdat, NR_INACTIVE_FILE); @@ -1581,7 +1581,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) inactive >>= 3; - return isolated > inactive; + return isolated < inactive; } static noinline_for_stack void @@ -1701,12 +1701,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (!inactive_reclaimable_pages(lruvec, sc, lru)) return 0; - while (unlikely(too_many_isolated(pgdat, file, sc))) { - congestion_wait(BLK_RW_ASYNC, HZ/10); + if (!safe_to_isolate(pgdat, file, sc)) { + wait_event_killable(pgdat->isolated_wait, + safe_to_isolate(pgdat, file, sc)); /* We are about to die and free our memory. Return now. */ - if (fatal_signal_pending(current)) - return SWAP_CLUSTER_MAX; + if (fatal_signal_pending(current)) { + nr_reclaimed = SWAP_CLUSTER_MAX; + goto out; + } } lru_add_drain(); @@ -1819,6 +1822,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, nr_scanned, nr_reclaimed, sc->priority, file); + +out: + wake_up(&pgdat->isolated_wait); return nr_reclaimed; } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>