From: Kairui Song <kasong@xxxxxxxxxxx> When lru_gen is aging, it will update mm counters page by page, which causes a higher overhead if age happens frequently or there are a lot of pages in one generation getting moved. Optimize this by doing the counter update in batch. Although most __mod_*_state has its own caches the overhead is still observable. Tested in a 4G memcg on a EPYC 7K62 with: memcached -u nobody -m 16384 -s /tmp/memcached.socket \ -a 0766 -t 16 -B binary & memtier_benchmark -S /tmp/memcached.socket \ -P memcache_binary -n allkeys \ --key-minimum=1 --key-maximum=16000000 -d 1024 \ --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6 Average result of 18 test runs: Before: 44017.78 Ops/sec After: 44687.08 Ops/sec (+1.5%) Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> --- mm/vmscan.c | 64 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 4f9c854ce6cc..185d53607c7e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3113,9 +3113,47 @@ static int folio_update_gen(struct folio *folio, int gen) return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } +/* + * Update LRU gen in batch for each lru_gen LRU list. The batch is limited to + * each gen / type / zone level LRU. Batch is applied after finished or aborted + * scanning one LRU list. + */ +struct gen_update_batch { + int delta[MAX_NR_GENS]; +}; + +static void lru_gen_update_batch(struct lruvec *lruvec, int type, int zone, + struct gen_update_batch *batch) +{ + int gen; + int promoted = 0; + struct lru_gen_folio *lrugen = &lruvec->lrugen; + enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + + for (gen = 0; gen < MAX_NR_GENS; gen++) { + int delta = batch->delta[gen]; + + if (!delta) + continue; + + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], + lrugen->nr_pages[gen][type][zone] + delta); + + if (lru_gen_is_active(lruvec, gen)) + promoted += delta; + } + + if (promoted) { + __update_lru_size(lruvec, lru, zone, -promoted); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, promoted); + } +} + /* protect pages accessed multiple times through file descriptors */ -static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, + bool reclaiming, struct gen_update_batch *batch) { + int delta = folio_nr_pages(folio); int type = folio_is_file_lru(folio); struct lru_gen_folio *lrugen = &lruvec->lrugen; int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); @@ -3138,7 +3176,8 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai new_flags |= BIT(PG_reclaim); } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); - lru_gen_update_size(lruvec, folio, old_gen, new_gen); + batch->delta[old_gen] -= delta; + batch->delta[new_gen] += delta; return new_gen; } @@ -3672,6 +3711,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { int zone; int remaining = MAX_LRU_BATCH; + struct gen_update_batch batch = { }; struct lru_gen_folio *lrugen = &lruvec->lrugen; int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); @@ -3690,12 +3730,15 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); - new_gen = folio_inc_gen(lruvec, folio, false); + new_gen = folio_inc_gen(lruvec, folio, false, &batch); list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]); - if (!--remaining) + if (!--remaining) { + lru_gen_update_batch(lruvec, type, zone, &batch); return false; + } } + lru_gen_update_batch(lruvec, type, zone, &batch); } done: reset_ctrl_pos(lruvec, type, true); @@ -4215,7 +4258,7 @@ void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) ******************************************************************************/ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc, - int tier_idx) + int tier_idx, struct gen_update_batch *batch) { bool success; int gen = folio_lru_gen(folio); @@ -4257,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); - gen = folio_inc_gen(lruvec, folio, false); + gen = folio_inc_gen(lruvec, folio, false, batch); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); WRITE_ONCE(lrugen->protected[hist][type][tier - 1], @@ -4267,7 +4310,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c /* ineligible */ if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { - gen = folio_inc_gen(lruvec, folio, false); + gen = folio_inc_gen(lruvec, folio, false, batch); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } @@ -4275,7 +4318,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c /* waiting for writeback */ if (folio_test_locked(folio) || folio_test_writeback(folio) || (type == LRU_GEN_FILE && folio_test_dirty(folio))) { - gen = folio_inc_gen(lruvec, folio, true); + gen = folio_inc_gen(lruvec, folio, true, batch); list_move(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } @@ -4341,6 +4384,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, for (i = MAX_NR_ZONES; i > 0; i--) { LIST_HEAD(moved); int skipped_zone = 0; + struct gen_update_batch batch = { }; int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; struct list_head *head = &lrugen->folios[gen][type][zone]; @@ -4355,7 +4399,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, scanned += delta; - if (sort_folio(lruvec, folio, sc, tier)) + if (sort_folio(lruvec, folio, sc, tier, &batch)) sorted += delta; else if (isolate_folio(lruvec, folio, sc)) { list_add(&folio->lru, list); @@ -4375,6 +4419,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, skipped += skipped_zone; } + lru_gen_update_batch(lruvec, type, zone, &batch); + if (!remaining || isolated >= MIN_LRU_BATCH) break; } -- 2.43.0