THIS IS ONLY FOR DEBUG. Show more detail about per-order page count on each cpu in zoneinfo, and a new pcp_order_stat shows the total counts of each hugepage size in sysfs. #cat /proc/zoneinfo .... cpu: 15 count: 275 high: 529 batch: 63 order0: 59 order1: 28 order2: 28 order3: 6 order4: 0 order5: 0 order6: 0 order7: 0 order8: 0 order9: 0 #cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/pcp_order_stat 10 Signed-off-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx> --- include/linux/mmzone.h | 6 ++++++ include/linux/vmstat.h | 19 +++++++++++++++++++ mm/Kconfig.debug | 8 ++++++++ mm/huge_memory.c | 27 +++++++++++++++++++++++++++ mm/page_alloc.c | 4 ++++ mm/vmstat.c | 16 ++++++++++++++++ 6 files changed, 80 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c745e2f1a0f2..c32c01468a77 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -665,6 +665,9 @@ enum zone_watermarks { #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) #define HIGHORDER_PCP_LIST_INDEX (NR_LOWORDER_PCP_LISTS - (PAGE_ALLOC_COSTLY_ORDER + 1)) #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) +#ifdef CONFIG_PCP_ORDER_STATS +#define NR_PCP_ORDER (PAGE_ALLOC_COSTLY_ORDER + NR_PCP_THP + 1) +#endif #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) @@ -702,6 +705,9 @@ struct per_cpu_pages { /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; +#ifdef CONFIG_PCP_ORDER_STATS + int per_order_count[NR_PCP_ORDER]; /* per-order page counts */ +#endif } ____cacheline_aligned_in_smp; struct per_cpu_zonestat { diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 735eae6e272c..91843f2d327f 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -624,4 +624,23 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, { lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } + +static inline void pcp_order_stat_mod(struct per_cpu_pages *pcp, int order, + int val) +{ +#ifdef CONFIG_PCP_ORDER_STATS + pcp->per_order_count[order] += val; +#endif +} + +static inline void pcp_order_stat_inc(struct per_cpu_pages *pcp, int order) +{ + pcp_order_stat_mod(pcp, order, 1); +} + +static inline void pcp_order_stat_dec(struct per_cpu_pages *pcp, int order) +{ + pcp_order_stat_mod(pcp, order, -1); +} + #endif /* _LINUX_VMSTAT_H */ diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index afc72fde0f03..57eef0ce809b 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -276,3 +276,11 @@ config PER_VMA_LOCK_STATS overhead in the page fault path. If in doubt, say N. + +config PCP_ORDER_STATS + bool "Statistics for per-order of PCP (Per-CPU pageset)" + help + Say Y to show per-order statistics of Per-CPU pageset from zoneinfo + and pcp_order_stat in sysfs. + + If in doubt, say N. diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9b8a8aa36526..0c6262bb8fe4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -599,12 +599,39 @@ DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT); DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK); DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT); +#ifdef CONFIG_PCP_ORDER_STATS +static ssize_t pcp_order_stat_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int order = to_thpsize(kobj)->order; + unsigned int counts = 0; + struct zone *zone; + + for_each_populated_zone(zone) { + struct per_cpu_pages *pcp; + int i; + + for_each_online_cpu(i) { + pcp = per_cpu_ptr(zone->per_cpu_pageset, i); + counts += pcp->per_order_count[order]; + } + } + + return sysfs_emit(buf, "%u\n", counts); +} + +static struct kobj_attribute pcp_order_stat_attr = __ATTR_RO(pcp_order_stat); +#endif + static struct attribute *stats_attrs[] = { &anon_alloc_attr.attr, &anon_alloc_fallback_attr.attr, &anon_swpout_attr.attr, &anon_swpout_fallback_attr.attr, &anon_swpin_refault_attr.attr, +#ifdef CONFIG_PCP_ORDER_STATS + &pcp_order_stat_attr.attr, +#endif NULL, }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 25fd3fe30cb0..f44cdf8dec50 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1185,6 +1185,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, list_del(&page->pcp_list); count -= nr_pages; pcp->count -= nr_pages; + pcp_order_stat_dec(pcp, order); __free_one_page(page, pfn, zone, order, mt, FPI_NONE); trace_mm_page_pcpu_drain(page, order, mt); @@ -2560,6 +2561,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, pindex = order_to_pindex(migratetype, order); list_add(&page->pcp_list, &pcp->lists[pindex]); pcp->count += 1 << order; + pcp_order_stat_inc(pcp, order); batch = READ_ONCE(pcp->batch); /* @@ -2957,6 +2959,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, migratetype, alloc_flags); pcp->count += alloced << order; + pcp_order_stat_mod(pcp, order, alloced); if (unlikely(list_empty(list))) return NULL; } @@ -2964,6 +2967,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, page = list_first_entry(list, struct page, pcp_list); list_del(&page->pcp_list); pcp->count -= 1 << order; + pcp_order_stat_dec(pcp, order); } while (check_new_pages(page, order)); return page; diff --git a/mm/vmstat.c b/mm/vmstat.c index db79935e4a54..632bb1ed6a53 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1674,6 +1674,19 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) return false; } +static void zoneinfo_show_pcp_order_stat(struct seq_file *m, + struct per_cpu_pages *pcp) +{ +#ifdef CONFIG_PCP_ORDER_STATS + int j; + + for (j = 0; j < NR_PCP_ORDER; j++) + seq_printf(m, + "\n order%d: %i", + j, pcp->per_order_count[j]); +#endif +} + static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone) { @@ -1748,6 +1761,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, pcp->count, pcp->high, pcp->batch); + + zoneinfo_show_pcp_order_stat(m, pcp); + #ifdef CONFIG_SMP pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); seq_printf(m, "\n vm stats threshold: %d", -- 2.27.0