Re: [RFC PATCH 2/3] mm/memory-tiers: Use page counter to track toptier memory usage

Wei Xu <weixugc@xxxxxxxxxx> · Tue, 14 Jun 2022 17:27:51 -0700

On Tue, Jun 14, 2022 at 3:26 PM Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> wrote:
If we need to restrict toptier memory usage for a cgroup,

we need to retrieve usage of toptier memory efficiently.

Add a page counter to track toptier memory usage directly

so its value can be returned right away.

---

 include/linux/memcontrol.h |  1 +

 mm/memcontrol.c            | 50 ++++++++++++++++++++++++++++++++------

 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 9ecead1042b9..b4f727cba1de 100644

--- a/include/linux/memcontrol.h

+++ b/include/linux/memcontrol.h

@@ -241,6 +241,7 @@ struct mem_cgroup {



        /* Accounted resources */

        struct page_counter memory;             /* Both v1 & v2 */

+       struct page_counter toptier;



        union {

                struct page_counter swap;       /* v2 only */

diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 2f6e95e6d200..2f20ec2712b8 100644

--- a/mm/memcontrol.c

+++ b/mm/memcontrol.c

@@ -848,6 +848,23 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,

        __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages);

 }



+static inline void mem_cgroup_charge_toptier(struct mem_cgroup *memcg,

+                                            int nid,

+                                            int nr_pages)

+{

+       if (!node_is_toptier(nid) || !memcg)

+               return;

+

+       if (nr_pages >= 0) {

+               page_counter_charge(&memcg->toptier,

+                               (unsigned long) nr_pages);

+       } else {

+               nr_pages = -nr_pages;

+               page_counter_uncharge(&memcg->toptier,

+                               (unsigned long) nr_pages);

+       }

+}

+

When we don't know which pages are being charged, we should still charge the usage to toptier (assuming that toptier always include the default tier), e.g. from try_charge_memcg().

The idea is that when lower tier memory is not used, memcg->toptier and memcg->memory should have the same value. Otherwise, it can cause confusions about where the pages of (memcg->memory - memcg->toptier) go.


 static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,

                                       enum mem_cgroup_events_target target)

 {

@@ -3027,6 +3044,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)

                if (!ret) {

                        page->memcg_data = (unsigned long)objcg |

                                MEMCG_DATA_KMEM;

+                       mem_cgroup_charge_toptier(page_memcg(page),

+                                       page_to_nid(page), 1 << order);

                        return 0;

                }

                obj_cgroup_put(objcg);

@@ -3050,6 +3069,8 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)



        objcg = __folio_objcg(folio);

        obj_cgroup_uncharge_pages(objcg, nr_pages);

+       mem_cgroup_charge_toptier(page_memcg(page),

+                       page_to_nid(page), -nr_pages);

        folio->memcg_data = 0;

        obj_cgroup_put(objcg);

 }

@@ -3947,13 +3968,10 @@ unsigned long mem_cgroup_memtier_usage(struct mem_cgroup *memcg,



 unsigned long mem_cgroup_toptier_usage(struct mem_cgroup *memcg)

 {

-       struct memory_tier *top_tier;

-

-       top_tier = list_first_entry(&memory_tiers, struct memory_tier, list);

-       if (top_tier)

-               return mem_cgroup_memtier_usage(memcg, top_tier);

-       else

+       if (!memcg)

                return 0;

+

+       return page_counter_read(&memcg->toptier);

 }



 #endif /* CONFIG_NUMA */

@@ -5228,11 +5246,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)

                memcg->oom_kill_disable = parent->oom_kill_disable;



                page_counter_init(&memcg->memory, &parent->memory);

+               page_counter_init(&memcg->toptier, &parent->toptier);

                page_counter_init(&memcg->swap, &parent->swap);

                page_counter_init(&memcg->kmem, &parent->kmem);

                page_counter_init(&memcg->tcpmem, &parent->tcpmem);

        } else {

                page_counter_init(&memcg->memory, NULL);

+               page_counter_init(&memcg->toptier, NULL);

                page_counter_init(&memcg->swap, NULL);

                page_counter_init(&memcg->kmem, NULL);

                page_counter_init(&memcg->tcpmem, NULL);

@@ -5678,6 +5698,8 @@ static int mem_cgroup_move_account(struct page *page,

        memcg_check_events(to, nid);

        mem_cgroup_charge_statistics(from, -nr_pages);

        memcg_check_events(from, nid);

+       mem_cgroup_charge_toptier(to, nid, nr_pages);

+       mem_cgroup_charge_toptier(from, nid, -nr_pages);

        local_irq_enable();

 out_unlock:

        folio_unlock(folio);

@@ -6761,6 +6783,7 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,



        local_irq_disable();

        mem_cgroup_charge_statistics(memcg, nr_pages);

+       mem_cgroup_charge_toptier(memcg, folio_nid(folio), nr_pages);

        memcg_check_events(memcg, folio_nid(folio));

        local_irq_enable();

 out:

@@ -6853,6 +6876,7 @@ struct uncharge_gather {

        unsigned long nr_memory;

        unsigned long pgpgout;

        unsigned long nr_kmem;

+       unsigned long nr_toptier;

        int nid;

 };



@@ -6867,6 +6891,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)



        if (ug->nr_memory) {

                page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);

+               page_counter_uncharge(&ug->memcg->toptier, ug->nr_toptier);

                if (do_memsw_account())

                        page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);

                if (ug->nr_kmem)

@@ -6929,12 +6954,18 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)

                ug->nr_memory += nr_pages;

                ug->nr_kmem += nr_pages;



+               if (node_is_toptier(folio_nid(folio)))

+                       ug->nr_toptier += nr_pages;

+

                folio->memcg_data = 0;

                obj_cgroup_put(objcg);

        } else {

                /* LRU pages aren't accounted at the root level */

-               if (!mem_cgroup_is_root(memcg))

+               if (!mem_cgroup_is_root(memcg)) {

                        ug->nr_memory += nr_pages;

+                       if (node_is_toptier(folio_nid(folio)))

+                               ug->nr_toptier += nr_pages;

+               }

                ug->pgpgout++;



                folio->memcg_data = 0;

@@ -7011,6 +7042,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)

        /* Force-charge the new page. The old one will be freed soon */

        if (!mem_cgroup_is_root(memcg)) {

                page_counter_charge(&memcg->memory, nr_pages);

+               mem_cgroup_charge_toptier(memcg, folio_nid(new), nr_pages);

                if (do_memsw_account())

                        page_counter_charge(&memcg->memsw, nr_pages);

        }

@@ -7231,8 +7263,10 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)



        folio->memcg_data = 0;



-       if (!mem_cgroup_is_root(memcg))

+       if (!mem_cgroup_is_root(memcg)) {

                page_counter_uncharge(&memcg->memory, nr_entries);

+               mem_cgroup_charge_toptier(memcg, folio_nid(folio), -nr_entries);

+       }



        if (!cgroup_memory_noswap && memcg != swap_memcg) {

                if (!mem_cgroup_is_root(swap_memcg))

-- 

2.35.1