Tetsuo Handa wrote: > Uptime between 101 and 300 is a silent hang up (i.e. no OOM killer messages, > no SIGKILL pending tasks, no TIF_MEMDIE tasks) which I solved using SysRq-f > at uptime = 289. I don't know the reason of this silent hang up, but the > memory unzapping kernel thread will not help because there is no OOM victim. > > ---------- > [ 101.438951] MemAlloc-Info: 10 stalling task, 0 dying task, 0 victim task. > (...snipped...) > [ 111.817922] MemAlloc-Info: 12 stalling task, 0 dying task, 0 victim task. > (...snipped...) > [ 122.281828] MemAlloc-Info: 13 stalling task, 0 dying task, 0 victim task. > (...snipped...) > [ 132.793724] MemAlloc-Info: 14 stalling task, 0 dying task, 0 victim task. > (...snipped...) > [ 143.336154] MemAlloc-Info: 16 stalling task, 0 dying task, 0 victim task. > (...snipped...) > [ 289.343187] sysrq: SysRq : Manual OOM execution > (...snipped...) > [ 292.065650] MemAlloc-Info: 16 stalling task, 0 dying task, 0 victim task. > (...snipped...) > [ 302.590736] kworker/3:2 invoked oom-killer: gfp_mask=0x24000c0, order=-1, oom_score_adj=0 > (...snipped...) > [ 302.690047] MemAlloc-Info: 4 stalling task, 0 dying task, 0 victim task. > ---------- I examined this hang up using additional debug printk() patch. And it was observed that when this silent hang up occurs, zone_reclaimable() called from shrink_zones() called from a __GFP_FS memory allocation request is returning true forever. Since the __GFP_FS memory allocation request can never call out_of_memory() due to did_some_progree > 0, the system will silently hang up with 100% CPU usage. ---------- diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0473eec..fda0bb5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2821,6 +2821,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } #endif /* CONFIG_COMPACTION */ +pid_t dump_target_pid; + /* Perform direct synchronous page reclaim */ static int __perform_reclaim(gfp_t gfp_mask, unsigned int order, @@ -2847,6 +2849,9 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, cond_resched(); + if (dump_target_pid == current->pid) + printk(KERN_INFO "__perform_reclaim returned %u at line %u\n", + progress, __LINE__); return progress; } @@ -3007,6 +3012,7 @@ static int malloc_watchdog(void *unused) unsigned int memdie_pending; unsigned int stalling_tasks; u8 index; + pid_t pid; not_stalling: /* Healty case. */ /* @@ -3025,12 +3031,16 @@ static int malloc_watchdog(void *unused) * and stop_memalloc_timer() within timeout duration. */ if (likely(!memalloc_counter[index])) + { + dump_target_pid = 0; goto not_stalling; + } maybe_stalling: /* Maybe something is wrong. Let's check. */ /* First, report whether there are SIGKILL tasks and/or OOM victims. */ sigkill_pending = 0; memdie_pending = 0; stalling_tasks = 0; + pid = 0; preempt_disable(); rcu_read_lock(); for_each_process_thread(g, p) { @@ -3062,8 +3072,11 @@ static int malloc_watchdog(void *unused) (fatal_signal_pending(p) ? "-dying" : ""), p->comm, p->pid, m->gfp, m->order, spent); show_stack(p, NULL); + if (!pid && (m->gfp & __GFP_FS)) + pid = p->pid; } spin_unlock(&memalloc_list_lock); + dump_target_pid = -pid; /* Wait until next timeout duration. */ schedule_timeout_interruptible(timeout); if (memalloc_counter[index]) @@ -3155,6 +3168,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto nopage; retry: + if (dump_target_pid == -current->pid) + dump_target_pid = -dump_target_pid; + if (gfp_mask & __GFP_KSWAPD_RECLAIM) wake_all_kswapds(order, ac); @@ -3280,6 +3296,11 @@ retry: goto noretry; /* Keep reclaiming pages as long as there is reasonable progress */ + if (dump_target_pid == current->pid) { + printk(KERN_INFO "did_some_progress=%lu at line %u\n", + did_some_progress, __LINE__); + dump_target_pid = 0; + } pages_reclaimed += did_some_progress; if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) || ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 27d580b..cb0c22e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2527,6 +2527,8 @@ static inline bool compaction_ready(struct zone *zone, int order) return watermark_ok; } +extern pid_t dump_target_pid; + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -2619,16 +2621,41 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) sc->nr_reclaimed += nr_soft_reclaimed; sc->nr_scanned += nr_soft_scanned; if (nr_soft_reclaimed) + { + if (dump_target_pid == current->pid) + printk(KERN_INFO "nr_soft_reclaimed=%lu at line %u\n", + nr_soft_reclaimed, __LINE__); reclaimable = true; + } /* need some check for avoid more shrink_zone() */ } if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx)) + { + if (dump_target_pid == current->pid) + printk(KERN_INFO "shrink_zone returned 1 at line %u\n", + __LINE__); reclaimable = true; + } if (global_reclaim(sc) && !reclaimable && zone_reclaimable(zone)) + { + if (dump_target_pid == current->pid) { + printk(KERN_INFO "zone_reclaimable returned 1 at line %u\n", + __LINE__); + printk(KERN_INFO "(ACTIVE_FILE=%lu+INACTIVE_FILE=%lu", + zone_page_state(zone, NR_ACTIVE_FILE), + zone_page_state(zone, NR_INACTIVE_FILE)); + if (get_nr_swap_pages() > 0) + printk(KERN_CONT "+ACTIVE_ANON=%lu+INACTIVE_ANON=%lu", + zone_page_state(zone, NR_ACTIVE_ANON), + zone_page_state(zone, NR_INACTIVE_ANON)); + printk(KERN_CONT ") * 6 > PAGES_SCANNED=%lu\n", + zone_page_state(zone, NR_PAGES_SCANNED)); + } reclaimable = true; + } } /* @@ -2674,6 +2701,9 @@ retry: sc->priority); sc->nr_scanned = 0; zones_reclaimable = shrink_zones(zonelist, sc); + if (dump_target_pid == current->pid) + printk(KERN_INFO "shrink_zones returned %u at line %u\n", + zones_reclaimable, __LINE__); total_scanned += sc->nr_scanned; if (sc->nr_reclaimed >= sc->nr_to_reclaim) @@ -2707,11 +2737,21 @@ retry: delayacct_freepages_end(); if (sc->nr_reclaimed) + { + if (dump_target_pid == current->pid) + printk(KERN_INFO "sc->nr_reclaimed=%lu at line %u\n", + sc->nr_reclaimed, __LINE__); return sc->nr_reclaimed; + } /* Aborted reclaim to try compaction? don't OOM, then */ if (sc->compaction_ready) + { + if (dump_target_pid == current->pid) + printk(KERN_INFO "sc->compaction_ready=%u at line %u\n", + sc->compaction_ready, __LINE__); return 1; + } /* Untapped cgroup reserves? Don't OOM, retry. */ if (!sc->may_thrash) { @@ -2720,6 +2760,9 @@ retry: goto retry; } + if (dump_target_pid == current->pid) + printk(KERN_INFO "zones_reclaimable=%u at line %u\n", + zones_reclaimable, __LINE__); /* Any of the zones still reclaimable? Don't OOM. */ if (zones_reclaimable) return 1; @@ -2875,7 +2918,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, * point. */ if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask)) + { + if (dump_target_pid == current->pid) + printk(KERN_INFO "throttle_direct_reclaim returned 1 at line %u\n", + __LINE__); return 1; + } trace_mm_vmscan_direct_reclaim_begin(order, sc.may_writepage, @@ -2885,6 +2933,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); + if (dump_target_pid == current->pid) + printk(KERN_INFO "do_try_to_free_pages returned %lu at line %u\n", + nr_reclaimed, __LINE__); return nr_reclaimed; } ---------- What is strange, the values printed by this debug printk() patch did not change as time went by. Thus, I think that this is not a problem of lack of CPU time for scanning pages. I suspect that there is a bug that nobody is scanning pages. ---------- [ 66.821450] zone_reclaimable returned 1 at line 2646 [ 66.823020] (ACTIVE_FILE=26+INACTIVE_FILE=10) * 6 > PAGES_SCANNED=32 [ 66.824935] shrink_zones returned 1 at line 2706 [ 66.826392] zones_reclaimable=1 at line 2765 [ 66.827865] do_try_to_free_pages returned 1 at line 2938 [ 67.102322] __perform_reclaim returned 1 at line 2854 [ 67.103968] did_some_progress=1 at line 3301 (...snipped...) [ 281.439977] zone_reclaimable returned 1 at line 2646 [ 281.439977] (ACTIVE_FILE=26+INACTIVE_FILE=10) * 6 > PAGES_SCANNED=32 [ 281.439978] shrink_zones returned 1 at line 2706 [ 281.439978] zones_reclaimable=1 at line 2765 [ 281.439979] do_try_to_free_pages returned 1 at line 2938 [ 281.439979] __perform_reclaim returned 1 at line 2854 [ 281.439980] did_some_progress=1 at line 3301 ---------- Complete log is at http://I-love.SAKURA.ne.jp/tmp/serial-20151013.txt.xz -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>