On Thu, Feb 26, 2009 at 05:45:48PM +0000, Ingo Molnar wrote: > Author: Ingo Molnar <mingo@xxxxxxx> > AuthorDate: Thu, 26 Feb 2009 18:47:11 +0100 > Commit: Ingo Molnar <mingo@xxxxxxx> > CommitDate: Thu, 26 Feb 2009 18:44:06 +0100 > > tracing: implement trace_clock_*() APIs > > Impact: implement new tracing timestamp APIs > > Add three trace clock variants, with differing scalability/precision > tradeoffs: > > - local: CPU-local trace clock > - medium: scalable global clock with some jitter > - global: globally monotonic, serialized clock > > Make the ring-buffer use the local trace clock internally. > > Acked-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Acked-by: Steven Rostedt <rostedt@xxxxxxxxxxx> > Signed-off-by: Ingo Molnar <mingo@xxxxxxx> > > > --- > include/linux/trace_clock.h | 19 ++++++++ > kernel/trace/Makefile | 1 + > kernel/trace/ring_buffer.c | 5 +- > kernel/trace/trace_clock.c | 101 +++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 123 insertions(+), 3 deletions(-) > > diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h > new file mode 100644 > index 0000000..7a81303 > --- /dev/null > +++ b/include/linux/trace_clock.h > @@ -0,0 +1,19 @@ > +#ifndef _LINUX_TRACE_CLOCK_H > +#define _LINUX_TRACE_CLOCK_H > + > +/* > + * 3 trace clock variants, with differing scalability/precision > + * tradeoffs: > + * > + * - local: CPU-local trace clock > + * - medium: scalable global clock with some jitter > + * - global: globally monotonic, serialized clock > + */ > +#include <linux/compiler.h> > +#include <linux/types.h> > + > +extern u64 notrace trace_clock_local(void); > +extern u64 notrace trace_clock(void); > +extern u64 notrace trace_clock_global(void); > + > +#endif /* _LINUX_TRACE_CLOCK_H */ > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile > index 664b6c0..c931fe0 100644 > --- a/kernel/trace/Makefile > +++ b/kernel/trace/Makefile > @@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o > obj-$(CONFIG_RING_BUFFER) += ring_buffer.o > > obj-$(CONFIG_TRACING) += trace.o > +obj-$(CONFIG_TRACING) += trace_clock.o > obj-$(CONFIG_TRACING) += trace_output.o > obj-$(CONFIG_TRACING) += trace_stat.o > obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o > diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c > index 8f19f1a..a8c275c 100644 > --- a/kernel/trace/ring_buffer.c > +++ b/kernel/trace/ring_buffer.c > @@ -4,6 +4,7 @@ > * Copyright (C) 2008 Steven Rostedt <srostedt@xxxxxxxxxx> > */ > #include <linux/ring_buffer.h> > +#include <linux/trace_clock.h> > #include <linux/ftrace_irq.h> > #include <linux/spinlock.h> > #include <linux/debugfs.h> > @@ -12,7 +13,6 @@ > #include <linux/module.h> > #include <linux/percpu.h> > #include <linux/mutex.h> > -#include <linux/sched.h> /* used for sched_clock() (for now) */ > #include <linux/init.h> > #include <linux/hash.h> > #include <linux/list.h> > @@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on); > /* Up this if you want to test the TIME_EXTENTS and normalization */ > #define DEBUG_SHIFT 0 > > -/* FIXME!!! */ > u64 ring_buffer_time_stamp(int cpu) > { > u64 time; > > preempt_disable_notrace(); > /* shift to debug/test normalization and TIME_EXTENTS */ > - time = sched_clock() << DEBUG_SHIFT; > + time = trace_clock_local() << DEBUG_SHIFT; > preempt_enable_no_resched_notrace(); > > return time; > diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c > new file mode 100644 > index 0000000..2d4953f > --- /dev/null > +++ b/kernel/trace/trace_clock.c > @@ -0,0 +1,101 @@ > +/* > + * tracing clocks > + * > + * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@xxxxxxxxxx> > + * > + * Implements 3 trace clock variants, with differing scalability/precision > + * tradeoffs: > + * > + * - local: CPU-local trace clock > + * - medium: scalable global clock with some jitter > + * - global: globally monotonic, serialized clock > + * > + * Tracer plugins will chose a default from these clocks. > + */ > +#include <linux/spinlock.h> > +#include <linux/hardirq.h> > +#include <linux/module.h> > +#include <linux/percpu.h> > +#include <linux/sched.h> > +#include <linux/ktime.h> > + > +/* > + * trace_clock_local(): the simplest and least coherent tracing clock. > + * > + * Useful for tracing that does not cross to other CPUs nor > + * does it go through idle events. > + */ > +u64 notrace trace_clock_local(void) > +{ > + /* > + * sched_clock() is an architecture implemented, fast, scalable, > + * lockless clock. It is not guaranteed to be coherent across > + * CPUs, nor across CPU idle events. > + */ > + return sched_clock(); > +} > + > +/* > + * trace_clock(): 'inbetween' trace clock. Not completely serialized, > + * but not completely incorrect when crossing CPUs either. > + * > + * This is based on cpu_clock(), which will allow at most ~1 jiffy of > + * jitter between CPUs. So it's a pretty scalable clock, but there > + * can be offsets in the trace data. > + */ > +u64 notrace trace_clock(void) > +{ > + return cpu_clock(raw_smp_processor_id()); > +} > + > + > +/* > + * trace_clock_global(): special globally coherent trace clock > + * > + * It has higher overhead than the other trace clocks but is still > + * an order of magnitude faster than GTOD derived hardware clocks. > + * > + * Used by plugins that need globally coherent timestamps. > + */ > + > +static u64 prev_trace_clock_time; > + > +static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = > + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; > + > +u64 notrace trace_clock_global(void) > +{ > + unsigned long flags; > + int this_cpu; > + u64 now; > + > + raw_local_irq_save(flags); > + > + this_cpu = raw_smp_processor_id(); > + now = cpu_clock(this_cpu); > + /* > + * If in an NMI context then dont risk lockups and return the > + * cpu_clock() time: > + */ > + if (unlikely(in_nmi())) > + goto out; > + > + __raw_spin_lock(&trace_clock_lock); > + > + /* > + * TODO: if this happens often then maybe we should reset > + * my_scd->clock to prev_trace_clock_time+1, to make sure > + * we start ticking with the local clock from now on? > + */ > + if ((s64)(now - prev_trace_clock_time) < 0) > + now = prev_trace_clock_time + 1; > + > + prev_trace_clock_time = now; > + > + __raw_spin_unlock(&trace_clock_lock); > + > + out: > + raw_local_irq_restore(flags); > + > + return now; > +} Hi, I missed this one. Wouldn't your previous idea of a cmpxchg global clock be better? Perhaps it would scale better while tracing on many cpus. Anyway, it's something less in my TODO list :-) > -- > To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html