Re: [PATCH 4/4] Add more per memcg stats.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Nov 29, 2010 at 11:53 PM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@xxxxxxxxxxxxxx> wrote:
> On Mon, 29 Nov 2010 22:49:45 -0800
> Ying Han <yinghan@xxxxxxxxxx> wrote:
>
>> A bunch of statistics are added in memory.stat to monitor per cgroup
>> kswapd performance.
>>
>> Signed-off-by: Ying Han <yinghan@xxxxxxxxxx>
>
> No objections. But please update the documenation and add more comments.

Sure. will do.

Thanks

--Ying
>
> Thanks,
> -Kame
>
>> ---
>>  include/linux/memcontrol.h |   81 +++++++++++++++++++++++++
>>  mm/memcontrol.c            |  140 ++++++++++++++++++++++++++++++++++++++++++++
>>  mm/vmscan.c                |   33 +++++++++-
>>  3 files changed, 250 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index dbed45d..893ca62 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -127,6 +127,19 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
>>                                               gfp_t gfp_mask);
>>  u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
>>
>> +/* background reclaim stats */
>> +void mem_cgroup_kswapd_steal(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_pg_steal(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_kswapd_pgscan(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_pg_pgscan(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_pgrefill(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_pg_outrun(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_alloc_stall(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_balance_wmark_ok(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_balance_swap_max(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_kswapd_shrink_zone(struct mem_cgroup *memcg, int val);
>> +void mem_cgroup_kswapd_may_writepage(struct mem_cgroup *memcg, int val);
>> +
>>  void mem_cgroup_clear_unreclaimable(struct page *page, struct zone *zone);
>>  bool mem_cgroup_zone_reclaimable(struct mem_cgroup *mem, int nid, int zid);
>>  bool mem_cgroup_mz_unreclaimable(struct mem_cgroup *mem, struct zone *zone);
>> @@ -337,6 +350,74 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *mem)
>>       return 0;
>>  }
>>
>> +/* background reclaim stats */
>> +static inline void mem_cgroup_kswapd_steal(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_pg_steal(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_kswapd_pgscan(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_pg_pgscan(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_pgrefill(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_pg_outrun(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_alloc_stall(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_balance_wmark_ok(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_balance_swap_max(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void mem_cgroup_kswapd_shrink_zone(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>> +
>> +static inline void mem_cgroup_kswapd_may_writepage(struct mem_cgroup *memcg,
>> +                                                             int val)
>> +{
>> +     return 0;
>> +}
>> +
>>  static inline bool mem_cgroup_zone_reclaimable(struct mem_cgroup *mem, int nid,
>>                                                               int zid)
>>  {
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 1d39b65..97df6dd 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -91,6 +91,21 @@ enum mem_cgroup_stat_index {
>>       MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
>>       MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
>>       MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
>> +     MEM_CGROUP_STAT_KSWAPD_INVOKE, /* # of times invokes kswapd */
>> +     MEM_CGROUP_STAT_KSWAPD_STEAL, /* # of pages reclaimed from kswapd */
>> +     MEM_CGROUP_STAT_PG_PGSTEAL, /* # of pages reclaimed from ttfp */
>> +     MEM_CGROUP_STAT_KSWAPD_PGSCAN, /* # of pages scanned from kswapd */
>> +     MEM_CGROUP_STAT_PG_PGSCAN, /* # of pages scanned from ttfp */
>> +     MEM_CGROUP_STAT_PGREFILL, /* # of pages scanned on active list */
>> +     MEM_CGROUP_STAT_WMARK_LOW_OK,
>> +     MEM_CGROUP_STAT_KSWAP_CREAT,
>> +     MEM_CGROUP_STAT_PGOUTRUN,
>> +     MEM_CGROUP_STAT_ALLOCSTALL,
>> +     MEM_CGROUP_STAT_BALANCE_WMARK_OK,
>> +     MEM_CGROUP_STAT_BALANCE_SWAP_MAX,
>> +     MEM_CGROUP_STAT_WAITQUEUE,
>> +     MEM_CGROUP_STAT_KSWAPD_SHRINK_ZONE,
>> +     MEM_CGROUP_STAT_KSWAPD_MAY_WRITEPAGE,
>>       MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
>>       /* incremented at every  pagein/pageout */
>>       MEM_CGROUP_EVENTS = MEM_CGROUP_STAT_DATA,
>> @@ -619,6 +634,62 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
>>       this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
>>  }
>>
>> +void mem_cgroup_kswapd_steal(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_KSWAPD_STEAL], val);
>> +}
>> +
>> +void mem_cgroup_pg_steal(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_PG_PGSTEAL], val);
>> +}
>> +
>> +void mem_cgroup_kswapd_pgscan(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_KSWAPD_PGSCAN], val);
>> +}
>> +
>> +void mem_cgroup_pg_pgscan(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_PG_PGSCAN], val);
>> +}
>> +
>> +void mem_cgroup_pgrefill(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_PGREFILL], val);
>> +}
>> +
>> +void mem_cgroup_pg_outrun(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_PGOUTRUN], val);
>> +}
>> +
>> +void mem_cgroup_alloc_stall(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_ALLOCSTALL], val);
>> +}
>> +
>> +void mem_cgroup_balance_wmark_ok(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_BALANCE_WMARK_OK], val);
>> +}
>> +
>> +void mem_cgroup_balance_swap_max(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_BALANCE_SWAP_MAX], val);
>> +}
>> +
>> +void mem_cgroup_kswapd_shrink_zone(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_KSWAPD_SHRINK_ZONE], val);
>> +}
>> +
>> +void mem_cgroup_kswapd_may_writepage(struct mem_cgroup *mem, int val)
>> +{
>> +     this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_KSWAPD_MAY_WRITEPAGE],
>> +                     val);
>> +}
>> +
>>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>>                                        struct page_cgroup *pc,
>>                                        bool charge)
>> @@ -2000,8 +2071,14 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
>>               ret = res_counter_charge(&mem->res, csize, CHARGE_WMARK_LOW,
>>                                       &fail_res);
>>               if (likely(!ret)) {
>> +                     this_cpu_add(
>> +                             mem->stat->count[MEM_CGROUP_STAT_WMARK_LOW_OK],
>> +                             1);
>>                       return CHARGE_OK;
>>               } else {
>> +                     this_cpu_add(
>> +                             mem->stat->count[MEM_CGROUP_STAT_KSWAPD_INVOKE],
>> +                             1);
>>                       mem_over_limit = mem_cgroup_from_res_counter(fail_res,
>>                                                                       res);
>>                       wake_memcg_kswapd(mem_over_limit);
>> @@ -3723,6 +3800,21 @@ enum {
>>       MCS_PGPGIN,
>>       MCS_PGPGOUT,
>>       MCS_SWAP,
>> +     MCS_KSWAPD_INVOKE,
>> +     MCS_KSWAPD_STEAL,
>> +     MCS_PG_PGSTEAL,
>> +     MCS_KSWAPD_PGSCAN,
>> +     MCS_PG_PGSCAN,
>> +     MCS_PGREFILL,
>> +     MCS_WMARK_LOW_OK,
>> +     MCS_KSWAP_CREAT,
>> +     MCS_PGOUTRUN,
>> +     MCS_ALLOCSTALL,
>> +     MCS_BALANCE_WMARK_OK,
>> +     MCS_BALANCE_SWAP_MAX,
>> +     MCS_WAITQUEUE,
>> +     MCS_KSWAPD_SHRINK_ZONE,
>> +     MCS_KSWAPD_MAY_WRITEPAGE,
>>       MCS_INACTIVE_ANON,
>>       MCS_ACTIVE_ANON,
>>       MCS_INACTIVE_FILE,
>> @@ -3745,6 +3837,21 @@ struct {
>>       {"pgpgin", "total_pgpgin"},
>>       {"pgpgout", "total_pgpgout"},
>>       {"swap", "total_swap"},
>> +     {"kswapd_invoke", "total_kswapd_invoke"},
>> +     {"kswapd_steal", "total_kswapd_steal"},
>> +     {"pg_pgsteal", "total_pg_pgsteal"},
>> +     {"kswapd_pgscan", "total_kswapd_pgscan"},
>> +     {"pg_scan", "total_pg_scan"},
>> +     {"pgrefill", "total_pgrefill"},
>> +     {"wmark_low_ok", "total_wmark_low_ok"},
>> +     {"kswapd_create", "total_kswapd_create"},
>> +     {"pgoutrun", "total_pgoutrun"},
>> +     {"allocstall", "total_allocstall"},
>> +     {"balance_wmark_ok", "total_balance_wmark_ok"},
>> +     {"balance_swap_max", "total_balance_swap_max"},
>> +     {"waitqueue", "total_waitqueue"},
>> +     {"kswapd_shrink_zone", "total_kswapd_shrink_zone"},
>> +     {"kswapd_may_writepage", "total_kswapd_may_writepage"},
>>       {"inactive_anon", "total_inactive_anon"},
>>       {"active_anon", "total_active_anon"},
>>       {"inactive_file", "total_inactive_file"},
>> @@ -3773,6 +3880,37 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
>>               val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
>>               s->stat[MCS_SWAP] += val * PAGE_SIZE;
>>       }
>> +     /* kswapd stat */
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_KSWAPD_INVOKE);
>> +     s->stat[MCS_KSWAPD_INVOKE] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_KSWAPD_STEAL);
>> +     s->stat[MCS_KSWAPD_STEAL] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PG_PGSTEAL);
>> +     s->stat[MCS_PG_PGSTEAL] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_KSWAPD_PGSCAN);
>> +     s->stat[MCS_KSWAPD_PGSCAN] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PG_PGSCAN);
>> +     s->stat[MCS_PG_PGSCAN] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGREFILL);
>> +     s->stat[MCS_PGREFILL] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_WMARK_LOW_OK);
>> +     s->stat[MCS_WMARK_LOW_OK] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_KSWAP_CREAT);
>> +     s->stat[MCS_KSWAP_CREAT] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGOUTRUN);
>> +     s->stat[MCS_PGOUTRUN] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_ALLOCSTALL);
>> +     s->stat[MCS_ALLOCSTALL] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_BALANCE_WMARK_OK);
>> +     s->stat[MCS_BALANCE_WMARK_OK] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_BALANCE_SWAP_MAX);
>> +     s->stat[MCS_BALANCE_SWAP_MAX] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_WAITQUEUE);
>> +     s->stat[MCS_WAITQUEUE] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_KSWAPD_SHRINK_ZONE);
>> +     s->stat[MCS_KSWAPD_SHRINK_ZONE] += val;
>> +     val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_KSWAPD_MAY_WRITEPAGE);
>> +     s->stat[MCS_KSWAPD_MAY_WRITEPAGE] += val;
>>
>>       /* per zone stat */
>>       val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
>> @@ -4579,9 +4717,11 @@ void wake_memcg_kswapd(struct mem_cgroup *mem)
>>                               0);
>>               else
>>                       kswapd_p->kswapd_task = thr;
>> +             this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_KSWAP_CREAT], 1);
>>       }
>>
>>       if (!waitqueue_active(wait)) {
>> +             this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_WAITQUEUE], 1);
>>               return;
>>       }
>>       wake_up_interruptible(wait);
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index f8430c4..5b0c349 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -1389,10 +1389,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
>>                                       ISOLATE_INACTIVE : ISOLATE_BOTH,
>>                       zone, sc->mem_cgroup,
>>                       0, file);
>> +             mem_cgroup_mz_pages_scanned(sc->mem_cgroup, zone, nr_scanned);
>>               /*
>>                * mem_cgroup_isolate_pages() keeps track of
>>                * scanned pages on its own.
>>                */
>> +             if (current_is_kswapd())
>> +                     mem_cgroup_kswapd_pgscan(sc->mem_cgroup, nr_scanned);
>> +             else
>> +                     mem_cgroup_pg_pgscan(sc->mem_cgroup, nr_scanned);
>>       }
>>
>>       if (nr_taken == 0) {
>> @@ -1413,9 +1418,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
>>       }
>>
>>       local_irq_disable();
>> -     if (current_is_kswapd())
>> -             __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
>> -     __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
>> +     if (scanning_global_lru(sc)) {
>> +             if (current_is_kswapd())
>> +                     __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
>> +             __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
>> +     } else {
>> +             if (current_is_kswapd())
>> +                     mem_cgroup_kswapd_steal(sc->mem_cgroup, nr_reclaimed);
>> +             else
>> +                     mem_cgroup_pg_steal(sc->mem_cgroup, nr_reclaimed);
>> +     }
>>
>>       putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
>>
>> @@ -1508,11 +1520,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
>>                * mem_cgroup_isolate_pages() keeps track of
>>                * scanned pages on its own.
>>                */
>> +             mem_cgroup_mz_pages_scanned(sc->mem_cgroup, zone, pgscanned);
>>       }
>>
>>       reclaim_stat->recent_scanned[file] += nr_taken;
>>
>> -     __count_zone_vm_events(PGREFILL, zone, pgscanned);
>> +     if (scanning_global_lru(sc))
>> +             __count_zone_vm_events(PGREFILL, zone, pgscanned);
>> +     else
>> +             mem_cgroup_pgrefill(sc->mem_cgroup, pgscanned);
>> +
>>       if (file)
>>               __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
>>       else
>> @@ -1955,6 +1972,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
>>
>>       if (scanning_global_lru(sc))
>>               count_vm_event(ALLOCSTALL);
>> +     else
>> +             mem_cgroup_alloc_stall(sc->mem_cgroup, 1);
>>
>>       for (priority = DEF_PRIORITY; priority >= 0; priority--) {
>>               sc->nr_scanned = 0;
>> @@ -2444,6 +2463,8 @@ scan:
>>                       priority != DEF_PRIORITY)
>>                       continue;
>>
>> +             mem_cgroup_kswapd_shrink_zone(mem_cont, 1);
>> +
>>               sc->nr_scanned = 0;
>>               shrink_zone(priority, zone, sc);
>>               total_scanned += sc->nr_scanned;
>> @@ -2462,6 +2483,7 @@ scan:
>>               if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
>>                   total_scanned > sc->nr_reclaimed + sc->nr_reclaimed / 2) {
>>                       sc->may_writepage = 1;
>> +                     mem_cgroup_kswapd_may_writepage(mem_cont, 1);
>>               }
>>       }
>>
>> @@ -2504,6 +2526,8 @@ loop_again:
>>       sc.nr_reclaimed = 0;
>>       total_scanned = 0;
>>
>> +     mem_cgroup_pg_outrun(mem_cont, 1);
>> +
>>       for (priority = DEF_PRIORITY; priority >= 0; priority--) {
>>               sc.priority = priority;
>>
>> @@ -2544,6 +2568,7 @@ loop_again:
>>                               wmark_ok = 0;
>>
>>                       if (wmark_ok) {
>> +                             mem_cgroup_balance_wmark_ok(sc.mem_cgroup, 1);
>>                               goto out;
>>                       }
>>               }
>> --
>> 1.7.3.1
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
>> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>
>>
>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]