Hi, 2009/9/29 Azraiyl <azraiyl@xxxxxxxxx>: >> introduces inside the system. Look at the CPU-load this is >> generating... High latencies would not surprise me since it is likely >> that the processor just cannot keep up... > > With some simple work inside the loop the max. cpu usage is 19%. > Normally it's ca. 15%. I would expect the load is much higher... Do you have a proper sched_clock() implementation or the default fallback-to-jiffies based sched_clock()? Load statistics might be misleading... See attached for a preliminary patch... (using it on a sam9261 core) >> Better use the TC-library to generate a dedicated interrupt if you >> want some realtime responsiveness and somewhat reasonable CPU-load. >> Personally I would not go beyond the 1kHz boundary with this >> processor... > > I'll try this. Thanks for the hint. > > Anyway, I'm still worried about these worst case latencies and like to > know from where > they come from. Is the ftrace infrastructure supposed to work on ARM? I used it for tracing sched_switches on 2.6.31 and it works there. Not tested it myself on 2.6.29 though. Remy
Add sched_clock to AT91 clocksource driver Without this patch the tools like 'top' will display a far too low CPU-load usage. However, on AT91 there is no architecture specific sched_clock() implementation, so the default fallback is used. This fallback uses the jiffie counter as sched_clock() On AT91 there is NO standard clocksource available that is acurate enough, except the TC-based clocksource implementation. Therefor this implementation is used as base for the sched_clock(). This clocksource offers sub-millisecond timestamping. (< 200 ns) Signed-off-by: Remy Bohmer <linux@xxxxxxxxxx> --- drivers/clocksource/tcb_clksrc.c | 68 ++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 7 deletions(-) Index: linux-2.6.31/drivers/clocksource/tcb_clksrc.c =================================================================== --- linux-2.6.31.orig/drivers/clocksource/tcb_clksrc.c 2009-09-29 23:03:21.000000000 +0200 +++ linux-2.6.31/drivers/clocksource/tcb_clksrc.c 2009-09-29 23:09:19.000000000 +0200 @@ -11,6 +11,7 @@ #include <linux/platform_device.h> #include <linux/atmel_tc.h> +#include <linux/sched.h> /* for sched_clock() prototype */ /* * We're configured to use a specific TC block, one that's not hooked @@ -38,19 +39,22 @@ */ static void __iomem *tcaddr; +static int clocksource_initialised; +static unsigned long long nsecs_per_clock; +static DEFINE_ATOMIC_SPINLOCK(sched_clock_lock); static cycle_t tc_get_cycles(struct clocksource *cs) { unsigned long flags; u32 lower, upper; - raw_local_irq_save(flags); + atomic_spin_lock_irqsave(&sched_clock_lock, flags); do { upper = __raw_readl(tcaddr + ATMEL_TC_REG(1, CV)); lower = __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); } while (upper != __raw_readl(tcaddr + ATMEL_TC_REG(1, CV))); - raw_local_irq_restore(flags); + atomic_spin_unlock_irqrestore(&sched_clock_lock, flags); return (upper << 16) | lower; } @@ -63,6 +67,53 @@ static struct clocksource clksrc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +/* Overload the sched_clock() implementation */ +unsigned long long sched_clock(void) +{ + unsigned long flags; + unsigned long long cycles; + u32 upper32, lower32; + u32 cycles32; + static u32 prev_cycles32; + static unsigned long long upper64; + + if (clocksource_initialised) { + /* Transfer the 32 bits cycles to a 64 bits cycles. We + assume being called faster than once every 5.726 minutes... + (this time is derived from a clock rate at 12.5 MHz) */ + atomic_spin_lock_irqsave(&sched_clock_lock, flags); + + do { + upper32 = __raw_readl(tcaddr + ATMEL_TC_REG(1, CV)); + lower32 = __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); + /* Make sure the low counter does not wrap while + reading the time */ + } while (upper32 != __raw_readl(tcaddr + ATMEL_TC_REG(1, CV))); + + cycles32 = (upper32 << 16) | lower32; + + if (cycles32 < prev_cycles32) { + /* Wrap around detected, or a jump in time backwards + of the lower 16 bits device? Ignore those. + REVISIT: Unfortunately we have seen these. Without + this check you will see the printk clock make huge + jumps in time forward during boot. */ + if ((prev_cycles32 - cycles32) > (1 << 16)) + upper64 += 1LLU << 32; /* A full wrap around */ + } + prev_cycles32 = cycles32; + + cycles = upper64 | (unsigned long long)cycles32; + + atomic_spin_unlock_irqrestore(&sched_clock_lock, flags); + + cycles *= nsecs_per_clock; + return cycles; + } else { + return 0; + } +} + #ifdef CONFIG_GENERIC_CLOCKEVENTS struct tc_clkevt_device { @@ -212,9 +263,6 @@ static void __init setup_clkevents(struc static int __init tcb_clksrc_init(void) { - static char bootinfo[] __initdata - = KERN_DEBUG "%s: tc%d at %d.%03d MHz\n"; - struct platform_device *pdev; struct atmel_tc *tc; struct clk *t0_clk; @@ -258,9 +306,11 @@ static int __init tcb_clksrc_init(void) clksrc.mult = clocksource_hz2mult(divided_rate, clksrc.shift); - printk(bootinfo, clksrc.name, CONFIG_ATMEL_TCB_CLKSRC_BLOCK, + printk(KERN_DEBUG "%s: tc%d at %d.%03d MHz\n", + clksrc.name, CONFIG_ATMEL_TCB_CLKSRC_BLOCK, divided_rate / 1000000, - ((divided_rate + 500000) % 1000000) / 1000); + (divided_rate - + ((divided_rate / 1000000) * 1000000)) / 1000); /* tclib will give us three clocks no matter what the * underlying platform supports. @@ -297,6 +347,10 @@ static int __init tcb_clksrc_init(void) /* channel 2: periodic and oneshot timer support */ setup_clkevents(tc, clk32k_divisor_idx); + /* Calculate the time per clocktick. Needed for sched_clock() */ + nsecs_per_clock = (1000 * 1000 * 1000) / divided_rate; + clocksource_initialised = 1; + return 0; } arch_initcall(tcb_clksrc_init);