Changes in V2: - Introduce PWR_EVENT_EXIT instead of 0 to mark non-power state - Use u32 instead of u64 for cpuid, state which is by far enough New power trace events: power:processor_idle power:processor_frequency power:machine_suspend C-state/idle accounting events: power:power_start power:power_end are replaced with: power:processor_idle and power:power_frequency is replaced with: power:processor_frequency power:machine_suspend is newly introduced, a first implementation comes from the ARM side, but it's easy to add these events in X86 as well if needed. the type= field got removed from both, it was never used and the type is differed by the event type itself. perf timechart userspace tool gets adjusted in a separate patch. Signed-off-by: Thomas Renninger <trenn@xxxxxxx> CC: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> CC: Thomas Gleixner <tglx@xxxxxxxxxxxxx> CC: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx> CC: Frank Eigler <fche@xxxxxxxxxx> CC: Steven Rostedt <rostedt@xxxxxxxxxxx> CC: Kevin Hilman <khilman@xxxxxxxxxxxxxxxxxxx> CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CC: linux-omap@xxxxxxxxxxxxxxx CC: rjw@xxxxxxx CC: linux-pm@xxxxxxxxxxxxxxxxxxxxxxxxxx CC: linux-trace-users@xxxxxxxxxxxxxxx CC: Jean Pihet <jean.pihet@xxxxxxxxxxxxxx> CC: Pierre Tardy <tardyp@xxxxxxxxx> CC: Frederic Weisbecker <fweisbec@xxxxxxxxx> CC: Tejun Heo <tj@xxxxxxxxxx> CC: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx> CC: Arjan van de Ven <arjan@xxxxxxxxxxxxxxx> CC: Ingo Molnar <mingo@xxxxxxx> --- arch/x86/kernel/process.c | 7 +++- arch/x86/kernel/process_64.c | 2 + drivers/cpufreq/cpufreq.c | 1 + drivers/cpuidle/cpuidle.c | 1 + drivers/idle/intel_idle.c | 1 + include/trace/events/power.h | 81 +++++++++++++++++++++++++++++++++++++++++- kernel/trace/Kconfig | 14 +++++++ kernel/trace/power-traces.c | 3 ++ 8 files changed, 108 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868..6a98da3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -374,6 +374,7 @@ void default_idle(void) { if (hlt_use_halt()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); + trace_processor_idle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -444,6 +445,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); + trace_processor_idle((ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -460,6 +462,7 @@ static void mwait_idle(void) { if (!need_resched()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); + trace_processor_idle(1, smp_processor_id()); if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -481,10 +484,12 @@ static void mwait_idle(void) static void poll_idle(void) { trace_power_start(POWER_CSTATE, 0, smp_processor_id()); + trace_processor_idle(1, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(0); + trace_power_end(smp_processor_id()); + trace_processor_idle(PWR_EVENT_EXIT, smp_processor_id()); } /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3d9ea53..5f2bb98 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -142,6 +142,8 @@ void cpu_idle(void) start_critical_timings(); trace_power_end(smp_processor_id()); + trace_processor_idle(PWR_EVENT_EXIT, + smp_processor_id()); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 199dcb9..33bdc41 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -355,6 +355,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); + trace_processor_frequency(freqs->new, freqs->cpu); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); if (likely(policy) && likely(policy->cpu == freqs->cpu)) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a507108..ec703e6 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -107,6 +107,7 @@ static void cpuidle_idle_call(void) if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); trace_power_end(smp_processor_id()); + trace_processor_idle(PWR_EVENT_EXIT, smp_processor_id()); } /** diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 21ac077..c78e496 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -202,6 +202,7 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) stop_critical_timings(); trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); + trace_processor_idle((eax >> 4) + 1, smp_processor_id()); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 35a2a6e..4b13414 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -7,6 +7,61 @@ #include <linux/ktime.h> #include <linux/tracepoint.h> +DECLARE_EVENT_CLASS(processor, + + TP_PROTO(unsigned int state, unsigned int cpu_id), + + TP_ARGS(state, cpu_id), + + TP_STRUCT__entry( + __field( u32, state ) + __field( u32, cpu_id ) + ), + + TP_fast_assign( + __entry->state = state; + __entry->cpu_id = cpu_id; + ), + + TP_printk("state=%lu cpu_id=%lu", (unsigned long)__entry->state, + (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(processor, processor_idle, + + TP_PROTO(unsigned int state, unsigned int cpu_id), + + TP_ARGS(state, cpu_id) +); + +#define PWR_EVENT_EXIT 0xFFFFFFFF + +DEFINE_EVENT(processor, processor_frequency, + + TP_PROTO(unsigned int frequency, unsigned int cpu_id), + + TP_ARGS(frequency, cpu_id) +); + +TRACE_EVENT(machine_suspend, + + TP_PROTO(unsigned int state), + + TP_ARGS(state), + + TP_STRUCT__entry( + __field( u32, state ) + ), + + TP_fast_assign( + __entry->state = state; + ), + + TP_printk("state=%lu", (unsigned long)__entry->state) +); + +#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED + #ifndef _TRACE_POWER_ENUM_ #define _TRACE_POWER_ENUM_ enum { @@ -69,8 +124,32 @@ TRACE_EVENT(power_end, TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id) ); - +#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ #endif /* _TRACE_POWER_H */ +/* Deprecated dummy functions must be protected against multi-declartion */ +#ifndef EVENT_POWER_TRACING_DEPRECATED_PART_H +#define EVENT_POWER_TRACING_DEPRECATED_PART_H + +#ifndef CONFIG_EVENT_POWER_TRACING_DEPRECATED + +#ifndef _TRACE_POWER_ENUM_ +#define _TRACE_POWER_ENUM_ +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; +#endif + +static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; +static inline void trace_power_end(u64 cpuid) {}; +static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; +#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ + +#endif /* EVENT_POWER_TRACING_DEPRECATED_PART_H */ + + + /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 538501c..0b5c841 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -64,6 +64,20 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool +config EVENT_POWER_TRACING_DEPRECATED + depends on EVENT_TRACING + bool + help + Provides old power event types: + C-state/idle accounting events: + power:power_start + power:power_end + and old cpufreq accounting event: + power:power_frequency + This is for userspace compatibility + and will vanish after 5 kernel iterations, + namely 2.6.41. + config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 0e0497d..6b6da42 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,5 +13,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/power.h> +#ifdef EVENT_POWER_TRACING_DEPRECATED EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); +#endif +EXPORT_TRACEPOINT_SYMBOL_GPL(processor_idle); -- 1.6.3 -- To unsubscribe from this list: send the line "unsubscribe linux-trace-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html