Useful to track how RSS is changing per TGID. Several Android teams have been using this patch in various kernel trees for half a year now. Many reported to me it is really useful. Initial patch developed by Tim Murray. Changes I made from original patch: o Prevent any additional space consumed by mm_struct. o Keep overhead low by checking if tracing is enabled. o Add some noise reduction and lower overhead by emitting only on threshold changes. Co-developed-by: Tim Murray <timmurray@xxxxxxxxxx> Signed-off-by: Tim Murray <timmurray@xxxxxxxxxx> Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx> --- Cc: carmenjackson@xxxxxxxxxx Cc: mayankgupta@xxxxxxxxxx Cc: dancol@xxxxxxxxxx Cc: rostedt@xxxxxxxxxxx Cc: minchan@xxxxxxxxxx Cc: akpm@xxxxxxxxxxxxxxxxxxxx Cc: kernel-team@xxxxxxxxxxx include/linux/mm.h | 14 +++++++++++--- include/trace/events/kmem.h | 21 +++++++++++++++++++++ mm/memory.c | 20 ++++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0334ca97c584..823aaf759bdb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1671,19 +1671,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return (unsigned long)val; } +void mm_trace_rss_stat(int member, long count, long value); + static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { - atomic_long_add(value, &mm->rss_stat.count[member]); + long count = atomic_long_add_return(value, &mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count, value); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { - atomic_long_inc(&mm->rss_stat.count[member]); + long count = atomic_long_inc_return(&mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count, 1); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { - atomic_long_dec(&mm->rss_stat.count[member]); + long count = atomic_long_dec_return(&mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count, -1); } /* Optimized variant when page is already known not to be PageAnon */ diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eb57e3037deb..8b88e04fafbf 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -315,6 +315,27 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->change_ownership) ); +TRACE_EVENT(rss_stat, + + TP_PROTO(int member, + long count), + + TP_ARGS(member, count), + + TP_STRUCT__entry( + __field(int, member) + __field(long, size) + ), + + TP_fast_assign( + __entry->member = member; + __entry->size = (count << PAGE_SHIFT); + ), + + TP_printk("member=%d size=%ldB", + __entry->member, + __entry->size) + ); #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ diff --git a/mm/memory.c b/mm/memory.c index e2bb51b6242e..9d81322c24a3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -72,6 +72,8 @@ #include <linux/oom.h> #include <linux/numa.h> +#include <trace/events/kmem.h> + #include <asm/io.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> @@ -140,6 +142,24 @@ static int __init init_zero_pfn(void) } core_initcall(init_zero_pfn); +/* + * This threshold is the boundary in the value space, that the counter has to + * advance before we trace it. Should be a power of 2. It is to reduce unwanted + * trace overhead. The counter is in units of number of pages. + */ +#define TRACE_MM_COUNTER_THRESHOLD 128 + +void mm_trace_rss_stat(int member, long count, long value) +{ + long thresh_mask = ~(TRACE_MM_COUNTER_THRESHOLD - 1); + + if (!trace_rss_stat_enabled()) + return; + + /* Threshold roll-over, trace it */ + if ((count & thresh_mask) != ((count - value) & thresh_mask)) + trace_rss_stat(member, count); +} #if defined(SPLIT_RSS_COUNTING) -- 2.23.0.187.g17f5b7556c-goog