The following commit has been merged into the timers/core branch of tip: Commit-ID: 36e40df35d2c1891fe58241640c7c95de4aa739b Gitweb: https://git.kernel.org/tip/36e40df35d2c1891fe58241640c7c95de4aa739b Author: Anna-Maria Behnsen <anna-maria@xxxxxxxxxxxxx> AuthorDate: Thu, 22 Feb 2024 11:34:03 +01:00 Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx> CommitterDate: Thu, 22 Feb 2024 17:52:32 +01:00 timer_migration: Add tracepoints The timer pull logic needs proper debugging aids. Add tracepoints so the hierarchical idle machinery can be diagnosed. Signed-off-by: Anna-Maria Behnsen <anna-maria@xxxxxxxxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Link: https://lore.kernel.org/r/20240222103403.31923-1-anna-maria@xxxxxxxxxxxxx --- MAINTAINERS | 1 +- include/trace/events/timer_migration.h | 298 ++++++++++++++++++++++++- kernel/time/timer_migration.c | 26 ++- 3 files changed, 325 insertions(+) create mode 100644 include/trace/events/timer_migration.h diff --git a/MAINTAINERS b/MAINTAINERS index 9ed4d38..70c07ae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17499,6 +17499,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core F: fs/timerfd.c F: include/linux/time_namespace.h F: include/linux/timer* +F: include/trace/events/timer* F: kernel/time/*timer* F: kernel/time/namespace.c diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h new file mode 100644 index 0000000..79f19e7 --- /dev/null +++ b/include/trace/events/timer_migration.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM timer_migration + +#if !defined(_TRACE_TIMER_MIGRATION_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TIMER_MIGRATION_H + +#include <linux/tracepoint.h> + +/* Group events */ +TRACE_EVENT(tmigr_group_set, + + TP_PROTO(struct tmigr_group *group), + + TP_ARGS(group), + + TP_STRUCT__entry( + __field( void *, group ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + ), + + TP_fast_assign( + __entry->group = group; + __entry->lvl = group->level; + __entry->numa_node = group->numa_node; + ), + + TP_printk("group=%p lvl=%d numa=%d", + __entry->group, __entry->lvl, __entry->numa_node) +); + +TRACE_EVENT(tmigr_connect_child_parent, + + TP_PROTO(struct tmigr_group *child), + + TP_ARGS(child), + + TP_STRUCT__entry( + __field( void *, child ) + __field( void *, parent ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + __field( unsigned int, num_children ) + __field( u32, childmask ) + ), + + TP_fast_assign( + __entry->child = child; + __entry->parent = child->parent; + __entry->lvl = child->parent->level; + __entry->numa_node = child->parent->numa_node; + __entry->num_children = child->parent->num_children; + __entry->childmask = child->childmask; + ), + + TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->child, __entry->childmask, __entry->parent, + __entry->lvl, __entry->numa_node, __entry->num_children) +); + +TRACE_EVENT(tmigr_connect_cpu_parent, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc), + + TP_STRUCT__entry( + __field( void *, parent ) + __field( unsigned int, cpu ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + __field( unsigned int, num_children ) + __field( u32, childmask ) + ), + + TP_fast_assign( + __entry->parent = tmc->tmgroup; + __entry->cpu = tmc->cpuevt.cpu; + __entry->lvl = tmc->tmgroup->level; + __entry->numa_node = tmc->tmgroup->numa_node; + __entry->num_children = tmc->tmgroup->num_children; + __entry->childmask = tmc->childmask; + ), + + TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->cpu, __entry->childmask, __entry->parent, + __entry->lvl, __entry->numa_node, __entry->num_children) +); + +DECLARE_EVENT_CLASS(tmigr_group_and_cpu, + + TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask), + + TP_ARGS(group, state, childmask), + + TP_STRUCT__entry( + __field( void *, group ) + __field( void *, parent ) + __field( unsigned int, lvl ) + __field( unsigned int, numa_node ) + __field( u32, childmask ) + __field( u8, active ) + __field( u8, migrator ) + ), + + TP_fast_assign( + __entry->group = group; + __entry->parent = group->parent; + __entry->lvl = group->level; + __entry->numa_node = group->numa_node; + __entry->childmask = childmask; + __entry->active = state.active; + __entry->migrator = state.migrator; + ), + + TP_printk("group=%p lvl=%d numa=%d active=%0x migrator=%0x " + "parent=%p childmask=%0x", + __entry->group, __entry->lvl, __entry->numa_node, + __entry->active, __entry->migrator, + __entry->parent, __entry->childmask) +); + +DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_inactive, + + TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask), + + TP_ARGS(group, state, childmask) +); + +DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_active, + + TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask), + + TP_ARGS(group, state, childmask) +); + +/* CPU events*/ +DECLARE_EVENT_CLASS(tmigr_cpugroup, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc), + + TP_STRUCT__entry( + __field( u64, wakeup ) + __field( void *, parent ) + __field( unsigned int, cpu ) + + ), + + TP_fast_assign( + __entry->wakeup = tmc->wakeup; + __entry->parent = tmc->tmgroup; + __entry->cpu = tmc->cpuevt.cpu; + ), + + TP_printk("cpu=%d parent=%p wakeup=%llu", __entry->cpu, __entry->parent, __entry->wakeup) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_new_timer, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_active, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DEFINE_EVENT(tmigr_cpugroup, tmigr_handle_remote_cpu, + + TP_PROTO(struct tmigr_cpu *tmc), + + TP_ARGS(tmc) +); + +DECLARE_EVENT_CLASS(tmigr_idle, + + TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt), + + TP_ARGS(tmc, nextevt), + + TP_STRUCT__entry( + __field( u64, nextevt) + __field( u64, wakeup) + __field( void *, parent) + __field( unsigned int, cpu) + ), + + TP_fast_assign( + __entry->nextevt = nextevt; + __entry->wakeup = tmc->wakeup; + __entry->parent = tmc->tmgroup; + __entry->cpu = tmc->cpuevt.cpu; + ), + + TP_printk("cpu=%d parent=%p nextevt=%llu wakeup=%llu", + __entry->cpu, __entry->parent, __entry->nextevt, __entry->wakeup) +); + +DEFINE_EVENT(tmigr_idle, tmigr_cpu_idle, + + TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt), + + TP_ARGS(tmc, nextevt) +); + +DEFINE_EVENT(tmigr_idle, tmigr_cpu_new_timer_idle, + + TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt), + + TP_ARGS(tmc, nextevt) +); + +TRACE_EVENT(tmigr_update_events, + + TP_PROTO(struct tmigr_group *child, struct tmigr_group *group, + union tmigr_state childstate, union tmigr_state groupstate, + u64 nextevt), + + TP_ARGS(child, group, childstate, groupstate, nextevt), + + TP_STRUCT__entry( + __field( void *, child ) + __field( void *, group ) + __field( u64, nextevt ) + __field( u64, group_next_expiry ) + __field( u64, child_evt_expiry ) + __field( unsigned int, group_lvl ) + __field( unsigned int, child_evtcpu ) + __field( u8, child_active ) + __field( u8, group_active ) + ), + + TP_fast_assign( + __entry->child = child; + __entry->group = group; + __entry->nextevt = nextevt; + __entry->group_next_expiry = group->next_expiry; + __entry->child_evt_expiry = child ? child->groupevt.nextevt.expires : 0; + __entry->group_lvl = group->level; + __entry->child_evtcpu = child ? child->groupevt.cpu : 0; + __entry->child_active = childstate.active; + __entry->group_active = groupstate.active; + ), + + TP_printk("child=%p group=%p group_lvl=%d child_active=%0x group_active=%0x " + "nextevt=%llu next_expiry=%llu child_evt_expiry=%llu child_evtcpu=%d", + __entry->child, __entry->group, __entry->group_lvl, __entry->child_active, + __entry->group_active, + __entry->nextevt, __entry->group_next_expiry, __entry->child_evt_expiry, + __entry->child_evtcpu) +); + +TRACE_EVENT(tmigr_handle_remote, + + TP_PROTO(struct tmigr_group *group), + + TP_ARGS(group), + + TP_STRUCT__entry( + __field( void * , group ) + __field( unsigned int , lvl ) + ), + + TP_fast_assign( + __entry->group = group; + __entry->lvl = group->level; + ), + + TP_printk("group=%p lvl=%d", + __entry->group, __entry->lvl) +); + +#endif /* _TRACE_TIMER_MIGRATION_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 23cb6ea..d85aa2a 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -14,6 +14,9 @@ #include "timer_migration.h" #include "tick-internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/timer_migration.h> + /* * The timer migration mechanism is built on a hierarchy of groups. The * lowest level group contains CPUs, the next level groups of CPU groups @@ -663,6 +666,8 @@ static bool tmigr_active_up(struct tmigr_group *group, */ group->groupevt.ignore = true; + trace_tmigr_group_set_cpu_active(group, newstate, childmask); + return walk_done; } @@ -672,6 +677,8 @@ static void __tmigr_cpu_activate(struct tmigr_cpu *tmc) data.childmask = tmc->childmask; + trace_tmigr_cpu_active(tmc); + tmc->cpuevt.ignore = true; WRITE_ONCE(tmc->wakeup, KTIME_MAX); @@ -829,6 +836,9 @@ check_toplvl: data->firstexp = tmigr_next_groupevt_expires(group); } + trace_tmigr_update_events(child, group, childstate, groupstate, + nextexp); + unlock: raw_spin_unlock(&group->lock); @@ -863,6 +873,8 @@ static u64 tmigr_new_timer(struct tmigr_cpu *tmc, u64 nextexp) if (tmc->remote) return KTIME_MAX; + trace_tmigr_cpu_new_timer(tmc); + tmc->cpuevt.ignore = false; data.remote = false; @@ -904,6 +916,8 @@ static void tmigr_handle_remote_cpu(unsigned int cpu, u64 now, return; } + trace_tmigr_handle_remote_cpu(tmc); + tmc->remote = true; WRITE_ONCE(tmc->wakeup, KTIME_MAX); @@ -984,6 +998,7 @@ static bool tmigr_handle_remote_up(struct tmigr_group *group, childmask = data->childmask; + trace_tmigr_handle_remote(group); again: /* * Handle the group only if @childmask is the migrator or if the @@ -1206,6 +1221,7 @@ u64 tmigr_cpu_new_timer(u64 nextexp) */ WRITE_ONCE(tmc->wakeup, ret); + trace_tmigr_cpu_new_timer_idle(tmc, nextexp); raw_spin_unlock(&tmc->lock); return ret; } @@ -1298,6 +1314,8 @@ static bool tmigr_inactive_up(struct tmigr_group *group, */ WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent); + trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); + return walk_done; } @@ -1350,6 +1368,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp) */ WRITE_ONCE(tmc->wakeup, ret); + trace_tmigr_cpu_idle(tmc, nextexp); raw_spin_unlock(&tmc->lock); return ret; } @@ -1467,6 +1486,7 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node, /* Setup successful. Add it to the hierarchy */ list_add(&group->list, &tmigr_level_list[lvl]); + trace_tmigr_group_set(group); return group; } @@ -1484,6 +1504,8 @@ static void tmigr_connect_child_parent(struct tmigr_group *child, raw_spin_unlock(&parent->lock); raw_spin_unlock_irq(&child->lock); + trace_tmigr_connect_child_parent(child); + /* * To prevent inconsistent states, active children need to be active in * the new parent as well. Inactive children are already marked inactive @@ -1575,6 +1597,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) raw_spin_unlock_irq(&group->lock); + trace_tmigr_connect_cpu_parent(tmc); + /* There are no children that need to be connected */ continue; } else { @@ -1642,6 +1666,7 @@ static int tmigr_cpu_online(unsigned int cpu) WRITE_ONCE(tmc->wakeup, KTIME_MAX); } raw_spin_lock_irq(&tmc->lock); + trace_tmigr_cpu_online(tmc); tmc->idle = timer_base_is_idle(); if (!tmc->idle) __tmigr_cpu_activate(tmc); @@ -1681,6 +1706,7 @@ static int tmigr_cpu_offline(unsigned int cpu) * offline; Therefore nextevt value is set to KTIME_MAX */ firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); + trace_tmigr_cpu_offline(tmc); raw_spin_unlock_irq(&tmc->lock); if (firstexp != KTIME_MAX) {