Re: [PATCH 2/3] mm, lru_gen: move pages in bulk when aging

Yu Zhao <yuzhao@xxxxxxxxxx> · Sun, 24 Dec 2023 23:58:00 -0700

On Fri, Dec 22, 2023 at 3:24 AM Kairui Song <ryncsn@xxxxxxxxx> wrote:
>
> From: Kairui Song <kasong@xxxxxxxxxxx>
>
> Another overhead of aging is page moving. Actually, in most cases,
> pages are being moved to the same gen after folio_inc_gen is called,
> especially the protected pages.  So it's better to move them in bulk.
>
> This also has a good effect on LRU ordering. Currently when MGLRU
> ages, it walks the LRU backward, and the protected pages are moved to
> the tail of newer gen one by one, which reverses the order of pages in
> LRU. Moving them in batches can help keep their order, only in a small
> scope though due to the scan limit of MAX_LRU_BATCH pages.
>
> After this commit, we can see a performance gain:
>
> Tested in a 4G memcg on a EPYC 7K62 with:
>
>   memcached -u nobody -m 16384 -s /tmp/memcached.socket \
>     -a 0766 -t 16 -B binary &
>
>   memtier_benchmark -S /tmp/memcached.socket \
>     -P memcache_binary -n allkeys \
>     --key-minimum=1 --key-maximum=16000000 -d 1024 \
>     --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6
>
> Average result of 18 test runs:
>
> Before:           44017.78 Ops/sec
> After patch 1-2:  44810.01 Ops/sec (+1.8%)

Was it tested with CONFIG_DEBUG_LIST=y?

Also, the (44810.01-44687.08)/44687.08=0.0027 improvement also sounded
like a noise to me.

> Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx>
> ---
>  mm/vmscan.c | 84 ++++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 71 insertions(+), 13 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index e3b4797b9729..af1266129c1b 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3102,9 +3102,46 @@ static int folio_update_gen(struct folio *folio, int gen)
>   */
>  struct gen_update_batch {
>         int delta[MAX_NR_GENS];
> +       struct folio *head, *tail;
>  };
>
> -static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
> +static void inline lru_gen_inc_bulk_finish(struct lru_gen_folio *lrugen,
> +                                          int bulk_gen, bool type, int zone,
> +                                          struct gen_update_batch *batch)
> +{
> +       if (!batch->head)
> +               return;
> +
> +       list_bulk_move_tail(&lrugen->folios[bulk_gen][type][zone],
> +                           &batch->head->lru,
> +                           &batch->tail->lru);
> +
> +       batch->head = NULL;
> +}
> +
> +/*
> + * When aging, protected pages will go to the tail of the same higher
> + * gen, so the can be moved in batches. Besides reduced overhead, this
> + * also avoids changing their LRU order in a small scope.
> + */
> +static void inline lru_gen_try_inc_bulk(struct lru_gen_folio *lrugen, struct folio *folio,
> +                                       int bulk_gen, int gen, bool type, int zone,
> +                                       struct gen_update_batch *batch)
> +{
> +       /*
> +        * If folio not moving to the bulk_gen, it's raced with promotion
> +        * so it need to go to the head of another LRU.
> +        */
> +       if (bulk_gen != gen)
> +               list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +
> +       if (!batch->head)
> +               batch->tail = folio;
> +
> +       batch->head = folio;
> +}
> +
> +static void lru_gen_update_batch(struct lruvec *lruvec, int bulk_gen, bool type, int zone,
>                                  struct gen_update_batch *batch)
>  {
>         int gen;
> @@ -3112,6 +3149,8 @@ static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
>         struct lru_gen_folio *lrugen = &lruvec->lrugen;
>         enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
>
> +       lru_gen_inc_bulk_finish(lrugen, bulk_gen, type, zone, batch);
> +
>         for (gen = 0; gen < MAX_NR_GENS; gen++) {
>                 int delta = batch->delta[gen];
>
> @@ -3705,6 +3744,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
>         struct gen_update_batch batch = { };
>         struct lru_gen_folio *lrugen = &lruvec->lrugen;
>         int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
> +       int bulk_gen = (old_gen + 1) % MAX_NR_GENS;
>
>         if (type == LRU_GEN_ANON && !can_swap)
>                 goto done;
> @@ -3712,24 +3752,33 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
>         /* prevent cold/hot inversion if force_scan is true */
>         for (zone = 0; zone < MAX_NR_ZONES; zone++) {
>                 struct list_head *head = &lrugen->folios[old_gen][type][zone];
> +               struct folio *prev = NULL;
>
> -               while (!list_empty(head)) {
> -                       struct folio *folio = lru_to_folio(head);
> +               if (!list_empty(head))
> +                       prev = lru_to_folio(head);
>
> +               while (prev) {
> +                       struct folio *folio = prev;
>                         VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
>                         VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
>                         VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
>                         VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
>
> +                       if (unlikely(list_is_first(&folio->lru, head)))
> +                               prev = NULL;
> +                       else
> +                               prev = lru_to_folio(&folio->lru);
> +
>                         new_gen = folio_inc_gen(lruvec, folio, false, &batch);
> -                       list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
> +                       lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, new_gen, type, zone, &batch);
>
>                         if (!--remaining) {
> -                               lru_gen_update_batch(lruvec, type, zone, &batch);
> +                               lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
>                                 return false;
>                         }
>                 }
> -               lru_gen_update_batch(lruvec, type, zone, &batch);
> +
> +               lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
>         }
>  done:
>         reset_ctrl_pos(lruvec, type, true);
> @@ -4240,7 +4289,7 @@ static int lru_gen_memcg_seg(struct lruvec *lruvec)
>   ******************************************************************************/
>
>  static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc,
> -                      int tier_idx, struct gen_update_batch *batch)
> +                      int tier_idx, int bulk_gen, struct gen_update_batch *batch)
>  {
>         bool success;
>         int gen = folio_lru_gen(folio);
> @@ -4283,7 +4332,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
>                 int hist = lru_hist_from_seq(lrugen->min_seq[type]);
>
>                 gen = folio_inc_gen(lruvec, folio, false, batch);
> -               list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +               lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
>
>                 WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
>                            lrugen->protected[hist][type][tier - 1] + delta);
> @@ -4293,7 +4342,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
>         /* ineligible */
>         if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
>                 gen = folio_inc_gen(lruvec, folio, false, batch);
> -               list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +               lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
>                 return true;
>         }
>
> @@ -4367,11 +4416,16 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>                 LIST_HEAD(moved);
>                 int skipped_zone = 0;
>                 struct gen_update_batch batch = { };
> +               int bulk_gen = (gen + 1) % MAX_NR_GENS;
>                 int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>                 struct list_head *head = &lrugen->folios[gen][type][zone];
> +               struct folio *prev = NULL;
>
> -               while (!list_empty(head)) {
> -                       struct folio *folio = lru_to_folio(head);
> +               if (!list_empty(head))
> +                       prev = lru_to_folio(head);
> +
> +               while (prev) {
> +                       struct folio *folio = prev;
>                         int delta = folio_nr_pages(folio);
>
>                         VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> @@ -4380,8 +4434,12 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>                         VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
>
>                         scanned += delta;
> +                       if (unlikely(list_is_first(&folio->lru, head)))
> +                               prev = NULL;
> +                       else
> +                               prev = lru_to_folio(&folio->lru);
>
> -                       if (sort_folio(lruvec, folio, sc, tier, &batch))
> +                       if (sort_folio(lruvec, folio, sc, tier, bulk_gen, &batch))
>                                 sorted += delta;
>                         else if (isolate_folio(lruvec, folio, sc)) {
>                                 list_add(&folio->lru, list);
> @@ -4401,7 +4459,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>                         skipped += skipped_zone;
>                 }
>
> -               lru_gen_update_batch(lruvec, type, zone, &batch);
> +               lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
>
>                 if (!remaining || isolated >= MIN_LRU_BATCH)
>                         break;
> --
> 2.43.0
>