On Fri, Dec 22, 2023 at 3:24 AM Kairui Song <ryncsn@xxxxxxxxx> wrote: > > From: Kairui Song <kasong@xxxxxxxxxxx> > > Another overhead of aging is page moving. Actually, in most cases, > pages are being moved to the same gen after folio_inc_gen is called, > especially the protected pages. So it's better to move them in bulk. > > This also has a good effect on LRU ordering. Currently when MGLRU > ages, it walks the LRU backward, and the protected pages are moved to > the tail of newer gen one by one, which reverses the order of pages in > LRU. Moving them in batches can help keep their order, only in a small > scope though due to the scan limit of MAX_LRU_BATCH pages. > > After this commit, we can see a performance gain: > > Tested in a 4G memcg on a EPYC 7K62 with: > > memcached -u nobody -m 16384 -s /tmp/memcached.socket \ > -a 0766 -t 16 -B binary & > > memtier_benchmark -S /tmp/memcached.socket \ > -P memcache_binary -n allkeys \ > --key-minimum=1 --key-maximum=16000000 -d 1024 \ > --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6 > > Average result of 18 test runs: > > Before: 44017.78 Ops/sec > After patch 1-2: 44810.01 Ops/sec (+1.8%) Was it tested with CONFIG_DEBUG_LIST=y? Also, the (44810.01-44687.08)/44687.08=0.0027 improvement also sounded like a noise to me. > Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> > --- > mm/vmscan.c | 84 ++++++++++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 71 insertions(+), 13 deletions(-) > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index e3b4797b9729..af1266129c1b 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -3102,9 +3102,46 @@ static int folio_update_gen(struct folio *folio, int gen) > */ > struct gen_update_batch { > int delta[MAX_NR_GENS]; > + struct folio *head, *tail; > }; > > -static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone, > +static void inline lru_gen_inc_bulk_finish(struct lru_gen_folio *lrugen, > + int bulk_gen, bool type, int zone, > + struct gen_update_batch *batch) > +{ > + if (!batch->head) > + return; > + > + list_bulk_move_tail(&lrugen->folios[bulk_gen][type][zone], > + &batch->head->lru, > + &batch->tail->lru); > + > + batch->head = NULL; > +} > + > +/* > + * When aging, protected pages will go to the tail of the same higher > + * gen, so the can be moved in batches. Besides reduced overhead, this > + * also avoids changing their LRU order in a small scope. > + */ > +static void inline lru_gen_try_inc_bulk(struct lru_gen_folio *lrugen, struct folio *folio, > + int bulk_gen, int gen, bool type, int zone, > + struct gen_update_batch *batch) > +{ > + /* > + * If folio not moving to the bulk_gen, it's raced with promotion > + * so it need to go to the head of another LRU. > + */ > + if (bulk_gen != gen) > + list_move(&folio->lru, &lrugen->folios[gen][type][zone]); > + > + if (!batch->head) > + batch->tail = folio; > + > + batch->head = folio; > +} > + > +static void lru_gen_update_batch(struct lruvec *lruvec, int bulk_gen, bool type, int zone, > struct gen_update_batch *batch) > { > int gen; > @@ -3112,6 +3149,8 @@ static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone, > struct lru_gen_folio *lrugen = &lruvec->lrugen; > enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; > > + lru_gen_inc_bulk_finish(lrugen, bulk_gen, type, zone, batch); > + > for (gen = 0; gen < MAX_NR_GENS; gen++) { > int delta = batch->delta[gen]; > > @@ -3705,6 +3744,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) > struct gen_update_batch batch = { }; > struct lru_gen_folio *lrugen = &lruvec->lrugen; > int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); > + int bulk_gen = (old_gen + 1) % MAX_NR_GENS; > > if (type == LRU_GEN_ANON && !can_swap) > goto done; > @@ -3712,24 +3752,33 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) > /* prevent cold/hot inversion if force_scan is true */ > for (zone = 0; zone < MAX_NR_ZONES; zone++) { > struct list_head *head = &lrugen->folios[old_gen][type][zone]; > + struct folio *prev = NULL; > > - while (!list_empty(head)) { > - struct folio *folio = lru_to_folio(head); > + if (!list_empty(head)) > + prev = lru_to_folio(head); > > + while (prev) { > + struct folio *folio = prev; > VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); > VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio); > VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); > VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); > > + if (unlikely(list_is_first(&folio->lru, head))) > + prev = NULL; > + else > + prev = lru_to_folio(&folio->lru); > + > new_gen = folio_inc_gen(lruvec, folio, false, &batch); > - list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]); > + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, new_gen, type, zone, &batch); > > if (!--remaining) { > - lru_gen_update_batch(lruvec, type, zone, &batch); > + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch); > return false; > } > } > - lru_gen_update_batch(lruvec, type, zone, &batch); > + > + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch); > } > done: > reset_ctrl_pos(lruvec, type, true); > @@ -4240,7 +4289,7 @@ static int lru_gen_memcg_seg(struct lruvec *lruvec) > ******************************************************************************/ > > static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc, > - int tier_idx, struct gen_update_batch *batch) > + int tier_idx, int bulk_gen, struct gen_update_batch *batch) > { > bool success; > int gen = folio_lru_gen(folio); > @@ -4283,7 +4332,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c > int hist = lru_hist_from_seq(lrugen->min_seq[type]); > > gen = folio_inc_gen(lruvec, folio, false, batch); > - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); > + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch); > > WRITE_ONCE(lrugen->protected[hist][type][tier - 1], > lrugen->protected[hist][type][tier - 1] + delta); > @@ -4293,7 +4342,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c > /* ineligible */ > if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { > gen = folio_inc_gen(lruvec, folio, false, batch); > - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); > + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch); > return true; > } > > @@ -4367,11 +4416,16 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, > LIST_HEAD(moved); > int skipped_zone = 0; > struct gen_update_batch batch = { }; > + int bulk_gen = (gen + 1) % MAX_NR_GENS; > int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; > struct list_head *head = &lrugen->folios[gen][type][zone]; > + struct folio *prev = NULL; > > - while (!list_empty(head)) { > - struct folio *folio = lru_to_folio(head); > + if (!list_empty(head)) > + prev = lru_to_folio(head); > + > + while (prev) { > + struct folio *folio = prev; > int delta = folio_nr_pages(folio); > > VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); > @@ -4380,8 +4434,12 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, > VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); > > scanned += delta; > + if (unlikely(list_is_first(&folio->lru, head))) > + prev = NULL; > + else > + prev = lru_to_folio(&folio->lru); > > - if (sort_folio(lruvec, folio, sc, tier, &batch)) > + if (sort_folio(lruvec, folio, sc, tier, bulk_gen, &batch)) > sorted += delta; > else if (isolate_folio(lruvec, folio, sc)) { > list_add(&folio->lru, list); > @@ -4401,7 +4459,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, > skipped += skipped_zone; > } > > - lru_gen_update_batch(lruvec, type, zone, &batch); > + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch); > > if (!remaining || isolated >= MIN_LRU_BATCH) > break; > -- > 2.43.0 >