>>On Mon, Sep 25, 2023 at 10:20?PM Jaewon Kim <jaewon31.kim@xxxxxxxxxxx> wrote: >>> >>> As the legacy lru provides, the lru_gen needs some trace events for >>> debugging. >>> >>> This commit introduces 2 trace events. >>> trace_mm_vmscan_lru_gen_scan >>> trace_mm_vmscan_lru_gen_evict >>> >>> Each event is similar to the following legacy events. >>> trace_mm_vmscan_lru_isolate, >>> trace_mm_vmscan_lru_shrink_[in]active >> >>We should just reuse trace_mm_vmscan_lru_isolate and >>trace_mm_vmscan_lru_shrink_inactive instead of adding new tracepoints. >> >>To reuse trace_mm_vmscan_lru_isolate, we'd just need to append two new >>names to LRU_NAMES. >> >>The naming of trace_mm_vmscan_lru_shrink_inactive might seem confusing >>but it's how MGLRU maintains the compatibility, e.g., the existing >>active/inactive counters in /proc/vmstat. > > >Hello > >Actually I had tried to reuse them. But some value was not that compatible. >Let me try that way again. > >> Hello Yu Zhao Could you look into what I tried below? I reused the legacy trace events as you recommened. For the nr_scanned for trace_mm_vmscan_lru_shrink_inactive, I just used the scanned returned from isolate_folios. I thought this is right as scan_folios also uses its isolated. __count_vm_events(PGSCAN_ANON + type, isolated); But I guess the scanned in scan_folios is actually the one used in shrink_inactive_list I tested this on both 0 and 7 of /sys/kernel/mm/lru_gen/enabled diff --git a/mm/vmscan.c b/mm/vmscan.c index a4e44f1c97c1..b61a0156559c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4328,6 +4328,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, int sorted = 0; int scanned = 0; int isolated = 0; + int skipped = 0; int remaining = MAX_LRU_BATCH; struct lru_gen_folio *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); @@ -4341,7 +4342,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, for (i = MAX_NR_ZONES; i > 0; i--) { LIST_HEAD(moved); - int skipped = 0; + int skipped_zone = 0; int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; struct list_head *head = &lrugen->folios[gen][type][zone]; @@ -4363,16 +4364,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, isolated += delta; } else { list_move(&folio->lru, &moved); - skipped += delta; + skipped_zone += delta; } - if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH) + if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH) break; } - if (skipped) { + if (skipped_zone) { list_splice(&moved, head); - __count_zid_vm_events(PGSCAN_SKIP, zone, skipped); + __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone); + skipped += skipped_zone; } if (!remaining || isolated >= MIN_LRU_BATCH) @@ -4387,6 +4389,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, __count_memcg_events(memcg, item, isolated); __count_memcg_events(memcg, PGREFILL, sorted); __count_vm_events(PGSCAN_ANON + type, isolated); + trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH, + scanned, skipped, isolated, + type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); /* * There might not be eligible folios due to reclaim_idx. Check the @@ -4517,6 +4522,9 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap retry: reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); sc->nr_reclaimed += reclaimed; + trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, + scanned, reclaimed, &stat, sc->priority, + type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); list_for_each_entry_safe_reverse(folio, next, &list, lru) { if (!folio_evictable(folio)) { >>> Here's an example >>> mm_vmscan_lru_gen_scan: classzone=2 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=anon >>> mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC >>> mm_vmscan_lru_gen_scan: classzone=1 order=0 nr_requested=4096 nr_scanned=64 nr_skipped=0 nr_taken=64 lru=file >>> mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=64 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=12 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC >>> >>> Signed-off-by: Jaewon Kim <jaewon31.kim@xxxxxxxxxxx> >>> --- >>> v4: wrap with #ifdef CONFIG_LRU_GEN >>> v3: change printk format >>> v2: use condition and make it aligned >>> v1: introduce trace events >>> --- >>> include/trace/events/mmflags.h | 9 ++++ >>> include/trace/events/vmscan.h | 96 ++++++++++++++++++++++++++++++++++ >>> mm/vmscan.c | 20 +++++-- >>> 3 files changed, 120 insertions(+), 5 deletions(-) >>> >>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h >>> index 1478b9dd05fa..6dfe85bd4e81 100644 >>> --- a/include/trace/events/mmflags.h >>> +++ b/include/trace/events/mmflags.h >>> @@ -274,6 +274,12 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ >>> EM (LRU_ACTIVE_FILE, "active_file") \ >>> EMe(LRU_UNEVICTABLE, "unevictable") >>> >>> +#ifdef CONFIG_LRU_GEN >>> +#define LRU_GEN_NAMES \ >>> + EM (LRU_GEN_ANON, "anon") \ >>> + EMe(LRU_GEN_FILE, "file") >>> +#endif >>> + >>> /* >>> * First define the enums in the above macros to be exported to userspace >>> * via TRACE_DEFINE_ENUM(). >>> @@ -288,6 +294,9 @@ COMPACTION_PRIORITY >>> /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */ >>> ZONE_TYPE >>> LRU_NAMES >>> +#ifdef CONFIG_LRU_GEN >>> +LRU_GEN_NAMES >>> +#endif >>> >>> /* >>> * Now redefine the EM() and EMe() macros to map the enums to the strings >>> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h >>> index d2123dd960d5..2080ef742f89 100644 >>> --- a/include/trace/events/vmscan.h >>> +++ b/include/trace/events/vmscan.h >>> @@ -327,6 +327,102 @@ TRACE_EVENT(mm_vmscan_lru_isolate, >>> __print_symbolic(__entry->lru, LRU_NAMES)) >>> ); >>> >>> +#ifdef CONFIG_LRU_GEN >>> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan, >>> + TP_PROTO(int highest_zoneidx, >>> + int order, >>> + unsigned long nr_requested, >>> + unsigned long nr_scanned, >>> + unsigned long nr_skipped, >>> + unsigned long nr_taken, >>> + int lru), >>> + >>> + TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru), >>> + >>> + TP_CONDITION(nr_scanned), >>> + >>> + TP_STRUCT__entry( >>> + __field(int, highest_zoneidx) >>> + __field(int, order) >>> + __field(unsigned long, nr_requested) >>> + __field(unsigned long, nr_scanned) >>> + __field(unsigned long, nr_skipped) >>> + __field(unsigned long, nr_taken) >>> + __field(int, lru) >>> + ), >>> + >>> + TP_fast_assign( >>> + __entry->highest_zoneidx = highest_zoneidx; >>> + __entry->order = order; >>> + __entry->nr_requested = nr_requested; >>> + __entry->nr_scanned = nr_scanned; >>> + __entry->nr_skipped = nr_skipped; >>> + __entry->nr_taken = nr_taken; >>> + __entry->lru = lru; >>> + ), >>> + >>> + /* >>> + * classzone is previous name of the highest_zoneidx. >>> + * Reason not to change it is the ABI requirement of the tracepoint. >>> + */ >>> + TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", >>> + __entry->highest_zoneidx, >>> + __entry->order, >>> + __entry->nr_requested, >>> + __entry->nr_scanned, >>> + __entry->nr_skipped, >>> + __entry->nr_taken, >>> + __print_symbolic(__entry->lru, LRU_GEN_NAMES)) >>> +); >>> + >>> +TRACE_EVENT(mm_vmscan_lru_gen_evict, >>> + >>> + TP_PROTO(int nid, unsigned long nr_reclaimed, >>> + struct reclaim_stat *stat, int priority, int file), >>> + >>> + TP_ARGS(nid, nr_reclaimed, stat, priority, file), >>> + >>> + TP_STRUCT__entry( >>> + __field(unsigned long, nr_reclaimed) >>> + __field(unsigned long, nr_dirty) >>> + __field(unsigned long, nr_writeback) >>> + __field(unsigned long, nr_congested) >>> + __field(unsigned long, nr_immediate) >>> + __field(unsigned int, nr_activate0) >>> + __field(unsigned int, nr_activate1) >>> + __field(unsigned long, nr_ref_keep) >>> + __field(unsigned long, nr_unmap_fail) >>> + __field(int, nid) >>> + __field(int, priority) >>> + __field(int, reclaim_flags) >>> + ), >>> + >>> + TP_fast_assign( >>> + __entry->nid = nid; >>> + __entry->nr_reclaimed = nr_reclaimed; >>> + __entry->nr_dirty = stat->nr_dirty; >>> + __entry->nr_writeback = stat->nr_writeback; >>> + __entry->nr_congested = stat->nr_congested; >>> + __entry->nr_immediate = stat->nr_immediate; >>> + __entry->nr_activate0 = stat->nr_activate[0]; >>> + __entry->nr_activate1 = stat->nr_activate[1]; >>> + __entry->nr_ref_keep = stat->nr_ref_keep; >>> + __entry->nr_unmap_fail = stat->nr_unmap_fail; >>> + __entry->priority = priority; >>> + __entry->reclaim_flags = trace_reclaim_flags(file); >>> + ), >>> + >>> + TP_printk("nid=%d nr_reclaimed=%lu nr_dirty=%lu nr_writeback=%lu nr_congested=%lu nr_immediate=%lu nr_activate_anon=%u nr_activate_file=%u nr_ref_keep=%lu nr_unmap_fail=%lu priority=%d flags=%s", >>> + __entry->nid, __entry->nr_reclaimed, >>> + __entry->nr_dirty, __entry->nr_writeback, >>> + __entry->nr_congested, __entry->nr_immediate, >>> + __entry->nr_activate0, __entry->nr_activate1, >>> + __entry->nr_ref_keep, __entry->nr_unmap_fail, >>> + __entry->priority, >>> + show_reclaim_flags(__entry->reclaim_flags)) >>> +); >>> +#endif >>> + >>> TRACE_EVENT(mm_vmscan_write_folio, >>> >>> TP_PROTO(struct folio *folio), >>> diff --git a/mm/vmscan.c b/mm/vmscan.c >>> index 6f13394b112e..0c8b48bcb461 100644 >>> --- a/mm/vmscan.c >>> +++ b/mm/vmscan.c >>> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, >>> int sorted = 0; >>> int scanned = 0; >>> int isolated = 0; >>> + int skipped = 0; >>> int remaining = MAX_LRU_BATCH; >>> struct lru_gen_folio *lrugen = &lruvec->lrugen; >>> struct mem_cgroup *memcg = lruvec_memcg(lruvec); >>> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, >>> >>> for (i = MAX_NR_ZONES; i > 0; i--) { >>> LIST_HEAD(moved); >>> - int skipped = 0; >>> + int skipped_zone = 0; >>> int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; >>> struct list_head *head = &lrugen->folios[gen][type][zone]; >>> >>> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, >>> isolated += delta; >>> } else { >>> list_move(&folio->lru, &moved); >>> - skipped += delta; >>> + skipped_zone += delta; >>> } >>> >>> - if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH) >>> + if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH) >>> break; >>> } >>> >>> - if (skipped) { >>> + if (skipped_zone) { >>> list_splice(&moved, head); >>> - __count_zid_vm_events(PGSCAN_SKIP, zone, skipped); >>> + __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone); >>> + skipped += skipped_zone; >>> } >>> >>> if (!remaining || isolated >= MIN_LRU_BATCH) >>> @@ -5065,6 +5067,10 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, >>> __count_memcg_events(memcg, PGREFILL, sorted); >>> __count_vm_events(PGSCAN_ANON + type, isolated); >>> >>> +#ifdef CONFIG_LRU_GEN >>> + trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH, >>> + scanned, skipped, isolated, type); >>> +#endif >> >>These functions are already within CONFIG_LRU_GEN. >> >>> /* >>> * There might not be eligible folios due to reclaim_idx. Check the >>> * remaining to prevent livelock if it's not making progress. >>> @@ -5194,6 +5200,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap >>> retry: >>> reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); >>> sc->nr_reclaimed += reclaimed; >>> +#ifdef CONFIG_LRU_GEN >>> + trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat, >>> + sc->priority, type); >>> +#endif >> >>Ditto.