On Fri, 30 Apr 2021 13:33:57 +0800 Xing Zhengjun wrote: > > I use my compaction test case to test it, 1/10 ratio can reproduce 100ms > sleep. > > 60) @ 103942.6 us | shrink_node(); > > 60) @ 103795.8 us | shrink_node(); Thanks for your test. In bid to cut the number of 100ms sleepers further down, add another place for them to nap by flushing lru cache before falling in sleep, instead of mulling why 50ms or 10ms is more adequate. Alternatively, and simpler IMHO, take a 5ms nap one time until !tmi. --- y/mm/vmscan.c +++ x/mm/vmscan.c @@ -118,6 +118,9 @@ struct scan_control { /* The file pages on the current node are dangerously low */ unsigned int file_is_tiny:1; + unsigned int file_tmi:1; /* too many isolated */ + unsigned int anon_tmi:1; + /* Allocation order */ s8 order; @@ -2092,6 +2095,22 @@ static int current_may_throttle(void) bdi_write_congested(current->backing_dev_info); } +static void set_sc_tmi(struct scan_control *sc, bool file, int tmi) +{ + if (file) + sc->file_tmi = tmi; + else + sc->anon_tmi = tmi; +} + +static bool is_sc_tmi(struct scan_control *sc, bool file) +{ + if (file) + return sc->file_tmi != 0; + else + return sc->anon_tmi != 0; +} + /* * shrink_inactive_list() is a helper for shrink_node(). It returns the number * of reclaimed pages @@ -2109,11 +2128,23 @@ shrink_inactive_list(unsigned long nr_to enum vm_event_item item; struct pglist_data *pgdat = lruvec_pgdat(lruvec); bool stalled = false; + bool drained = false; while (unlikely(too_many_isolated(pgdat, file, sc))) { if (stalled) return 0; + if (!is_sc_tmi(sc, file)) { + set_sc_tmi(sc, file, 1); + return 0; + } + + if (!drained) { + drained = true; + lru_add_drain_all(); + continue; + } + /* wait a bit for the reclaimer. */ msleep(100); stalled = true; @@ -2123,6 +2154,9 @@ shrink_inactive_list(unsigned long nr_to return SWAP_CLUSTER_MAX; } + if (is_sc_tmi(sc, file)) + set_sc_tmi(sc, file, 0); + lru_add_drain(); spin_lock_irq(&lruvec->lru_lock);