Hi Marc, On 8/24/2012 10:52 AM, Marc Zyngier wrote: > At the moment, the arch_timer driver only uses the physical timer, > which can cause problem if PL2 hasn't enabled PL1 access in CNTHCTL, > which is likely in a virtualized environment. Instead, the virtual > timer is always available. > > This patch enables the use of the virtual timer, unless no > interrupt is provided in the DT for it, in which case it falls > back to the physical timer. > > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> Looks very nice... Minor comments below. > --- > arch/arm/kernel/arch_timer.c | 337 ++++++++++++++++++++++++++++++------------- > 1 file changed, 233 insertions(+), 104 deletions(-) > > diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c > index cf25880..3f63b90 100644 > --- a/arch/arm/kernel/arch_timer.c > +++ b/arch/arm/kernel/arch_timer.c > @@ -27,13 +27,23 @@ > #include <asm/sched_clock.h> > > static unsigned long arch_timer_rate; > -static int arch_timer_ppi; > -static int arch_timer_ppi2; > + > +enum ppi_nr { > + PHYS_SECURE_PPI, > + PHYS_NONSECURE_PPI, > + VIRT_PPI, > + HYP_PPI, > + MAX_TIMER_PPI > +}; > + > +static int arch_timer_ppi[MAX_TIMER_PPI]; > > static struct clock_event_device __percpu **arch_timer_evt; > > extern void init_current_timer_delay(unsigned long freq); > > +static bool arch_timer_use_virtual = true; > + > /* > * Architected system timer support. > */ > @@ -43,53 +53,98 @@ extern void init_current_timer_delay(unsigned long freq); > #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) > > #define ARCH_TIMER_REG_CTRL 0 > -#define ARCH_TIMER_REG_FREQ 1 > -#define ARCH_TIMER_REG_TVAL 2 > +#define ARCH_TIMER_REG_TVAL 1 > > -static void arch_timer_reg_write(int reg, u32 val) > +#define ARCH_TIMER_PHYS_ACCESS 0 > +#define ARCH_TIMER_VIRT_ACCESS 1 > + > +/* > + * These register accessors are marked inline so the compiler can > + * nicely work out which register we want, and chuck away the rest of > + * the code. At least it does so with a recent GCC (4.6.3). > + */ > +static inline void arch_timer_reg_write(const int access, const int reg, u32 val) > { > - switch (reg) { > - case ARCH_TIMER_REG_CTRL: > - asm volatile("mcr p15, 0, %0, c14, c2, 1" : : "r" (val)); > - break; > - case ARCH_TIMER_REG_TVAL: > - asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val)); > - break; > + if (access == ARCH_TIMER_PHYS_ACCESS) { > + switch (reg) { > + case ARCH_TIMER_REG_CTRL: > + asm volatile("mcr p15, 0, %0, c14, c2, 1" : : "r" (val)); > + break; > + case ARCH_TIMER_REG_TVAL: > + asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val)); > + break; > + } > + } > + > + if (access == ARCH_TIMER_VIRT_ACCESS) { > + switch (reg) { > + case ARCH_TIMER_REG_CTRL: > + asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val)); > + break; > + case ARCH_TIMER_REG_TVAL: > + asm volatile("mcr p15, 0, %0, c14, c3, 0" : : "r" (val)); > + break; > + } > } > > isb(); > } > > -static u32 arch_timer_reg_read(int reg) > +static inline u32 arch_timer_reg_read(const int access, const int reg) > { > - u32 val; > + u32 val = 0; > + > + if (access == ARCH_TIMER_PHYS_ACCESS) { > + switch (reg) { > + case ARCH_TIMER_REG_CTRL: > + asm volatile("mrc p15, 0, %0, c14, c2, 1" : "=r" (val)); > + break; > + case ARCH_TIMER_REG_TVAL: > + asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val)); > + break; > + } > + } > > - switch (reg) { > - case ARCH_TIMER_REG_CTRL: > - asm volatile("mrc p15, 0, %0, c14, c2, 1" : "=r" (val)); > - break; > - case ARCH_TIMER_REG_FREQ: > - asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r" (val)); > - break; > - case ARCH_TIMER_REG_TVAL: > - asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val)); > - break; > - default: > - BUG(); > + if (access == ARCH_TIMER_VIRT_ACCESS) { > + switch (reg) { > + case ARCH_TIMER_REG_CTRL: > + asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val)); > + break; > + case ARCH_TIMER_REG_TVAL: > + asm volatile("mrc p15, 0, %0, c14, c3, 0" : "=r" (val)); > + break; > + } > } > > return val; > } > > -static irqreturn_t arch_timer_handler(int irq, void *dev_id) > +static inline cycle_t arch_counter_get_cntpct(void) > { > - struct clock_event_device *evt = *(struct clock_event_device **)dev_id; > - unsigned long ctrl; > + u32 cvall, cvalh; > + > + asm volatile("mrrc p15, 0, %0, %1, c14" : "=r" (cvall), "=r" (cvalh)); > + > + return ((cycle_t) cvalh << 32) | cvall; > +} > + > +static inline cycle_t arch_counter_get_cntvct(void) > +{ > + u32 cvall, cvalh; > + > + asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (cvall), "=r" (cvalh)); > + > + return ((cycle_t) cvalh << 32) | cvall; > +} We should use the Q and R qualifiers to avoid compiler misbehavior: u64 cval; asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (cvall)); The compiler generates sad looking code when constructing 64-bit quantities with shifts and ORs. We found this while implementing the phys-virt patching for 64-bit phys_addr_t. Is there value in unifying the physical and virtual counter read functions using the access specifier as you've done for the register read and write functions? > > - ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); > +static irqreturn_t inline timer_handler(const int access, > + struct clock_event_device *evt) > +{ > + unsigned long ctrl; > + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); > if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { > ctrl |= ARCH_TIMER_CTRL_IT_MASK; > - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); > + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); > evt->event_handler(evt); > return IRQ_HANDLED; > } > @@ -97,63 +152,100 @@ static irqreturn_t arch_timer_handler(int irq, void *dev_id) > return IRQ_NONE; > } > > -static void arch_timer_disable(void) > +static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id) > { > - unsigned long ctrl; > + struct clock_event_device *evt = *(struct clock_event_device **)dev_id; > > - ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); > - ctrl &= ~ARCH_TIMER_CTRL_ENABLE; > - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); > + return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt); > } > > -static void arch_timer_set_mode(enum clock_event_mode mode, > - struct clock_event_device *clk) > +static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) > { > + struct clock_event_device *evt = *(struct clock_event_device **)dev_id; > + > + return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); > +} > + > +static inline void timer_set_mode(const int access, int mode) > +{ > + unsigned long ctrl; > switch (mode) { > case CLOCK_EVT_MODE_UNUSED: > case CLOCK_EVT_MODE_SHUTDOWN: > - arch_timer_disable(); > + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); > + ctrl &= ~ARCH_TIMER_CTRL_ENABLE; > + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); > break; > default: > break; > } > } > > -static int arch_timer_set_next_event(unsigned long evt, > - struct clock_event_device *unused) > +static void arch_timer_set_mode_virt(enum clock_event_mode mode, > + struct clock_event_device *clk) > { > - unsigned long ctrl; > + timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode); > +} > + > +static void arch_timer_set_mode_phys(enum clock_event_mode mode, > + struct clock_event_device *clk) > +{ > + timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode); > +} > > - ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); > +static inline void set_next_event(const int access, unsigned long evt) > +{ > + unsigned long ctrl; > + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); > ctrl |= ARCH_TIMER_CTRL_ENABLE; > ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; > + arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt); > + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); > +} > > - arch_timer_reg_write(ARCH_TIMER_REG_TVAL, evt); > - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); > +static int arch_timer_set_next_event_virt(unsigned long evt, > + struct clock_event_device *unused) > +{ > + set_next_event(ARCH_TIMER_VIRT_ACCESS, evt); > + return 0; > +} > > +static int arch_timer_set_next_event_phys(unsigned long evt, > + struct clock_event_device *unused) > +{ > + set_next_event(ARCH_TIMER_PHYS_ACCESS, evt); > return 0; > } > > static int __cpuinit arch_timer_setup(struct clock_event_device *clk) > { > - /* Be safe... */ > - arch_timer_disable(); > - > clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; > clk->name = "arch_sys_timer"; > clk->rating = 450; > - clk->set_mode = arch_timer_set_mode; > - clk->set_next_event = arch_timer_set_next_event; > - clk->irq = arch_timer_ppi; > + if (arch_timer_use_virtual) { > + arch_timer_set_mode_virt(CLOCK_EVT_MODE_SHUTDOWN, NULL); > + clk->set_mode = arch_timer_set_mode_virt; > + clk->set_next_event = arch_timer_set_next_event_virt; > + } else { > + arch_timer_set_mode_phys(CLOCK_EVT_MODE_SHUTDOWN, NULL); > + clk->set_mode = arch_timer_set_mode_phys; > + clk->set_next_event = arch_timer_set_next_event_phys; > + } > + > + clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; > > clockevents_config_and_register(clk, arch_timer_rate, > 0xf, 0x7fffffff); > > *__this_cpu_ptr(arch_timer_evt) = clk; > > - enable_percpu_irq(clk->irq, 0); > - if (arch_timer_ppi2) > - enable_percpu_irq(arch_timer_ppi2, 0); > + if (arch_timer_use_virtual) > + enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0); > + else { > + enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0); > + if (arch_timer_ppi[PHYS_NONSECURE_PPI]) > + enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0); > + } > > return 0; > } > @@ -173,8 +265,7 @@ static int arch_timer_available(void) > return -ENXIO; > > if (arch_timer_rate == 0) { > - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, 0); > - freq = arch_timer_reg_read(ARCH_TIMER_REG_FREQ); > + asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r" (freq)); > > /* Check the timer frequency. */ > if (freq == 0) { > @@ -185,43 +276,42 @@ static int arch_timer_available(void) > arch_timer_rate = freq; > } > > - pr_info_once("Architected local timer running at %lu.%02luMHz.\n", > - arch_timer_rate / 1000000, (arch_timer_rate / 10000) % 100); > + pr_info_once("Architected local timer running at %lu.%02luMHz (%s).\n", > + arch_timer_rate / 1000000, (arch_timer_rate / 10000) % 100, > + arch_timer_use_virtual ? "virt" : "phys"); > return 0; > } > > -static inline cycle_t arch_counter_get_cntpct(void) > +static u32 notrace arch_counter_get_cntpct32(void) > { > - u32 cvall, cvalh; > + cycle_t cnt = arch_counter_get_cntpct(); > > - asm volatile("mrrc p15, 0, %0, %1, c14" : "=r" (cvall), "=r" (cvalh)); > - > - return ((cycle_t) cvalh << 32) | cvall; > -} > - > -static inline cycle_t arch_counter_get_cntvct(void) > -{ > - u32 cvall, cvalh; > - > - asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (cvall), "=r" (cvalh)); > - > - return ((cycle_t) cvalh << 32) | cvall; > + /* > + * The sched_clock infrastructure only knows about counters > + * with at most 32bits. Forget about the upper 24 bits for the > + * time being... > + */ > + return (u32)(cnt & (u32)~0); Wouldn't a return (u32)cnt be sufficient here? > } > > static u32 notrace arch_counter_get_cntvct32(void) > { > - cycle_t cntvct = arch_counter_get_cntvct(); > + cycle_t cnt = arch_counter_get_cntvct(); > > /* > * The sched_clock infrastructure only knows about counters > * with at most 32bits. Forget about the upper 24 bits for the > * time being... > */ > - return (u32)(cntvct & (u32)~0); > + return (u32)(cnt & (u32)~0); > } > > static cycle_t arch_counter_read(struct clocksource *cs) > { > + /* > + * Always use the physical counter for the clocksource. > + * CNTHCTL.PL1PCTEN must be set to 1. > + */ > return arch_counter_get_cntpct(); > } > > @@ -245,10 +335,16 @@ static void __cpuinit arch_timer_stop(struct clock_event_device *clk) > { > pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", > clk->irq, smp_processor_id()); > - disable_percpu_irq(clk->irq); > - if (arch_timer_ppi2) > - disable_percpu_irq(arch_timer_ppi2); > - arch_timer_set_mode(CLOCK_EVT_MODE_UNUSED, clk); > + > + if (arch_timer_use_virtual) { > + disable_percpu_irq(arch_timer_ppi[VIRT_PPI]); > + arch_timer_set_mode_virt(CLOCK_EVT_MODE_UNUSED, clk); > + } else { > + disable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI]); > + if (arch_timer_ppi[PHYS_NONSECURE_PPI]) > + disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]); > + arch_timer_set_mode_phys(CLOCK_EVT_MODE_UNUSED, clk); > + } > } > > static struct local_timer_ops arch_timer_ops __cpuinitdata = { > @@ -261,36 +357,44 @@ static struct clock_event_device arch_timer_global_evt; > static int __init arch_timer_register(void) > { > int err; > + int ppi; > > err = arch_timer_available(); > if (err) > - return err; > + goto out; > > arch_timer_evt = alloc_percpu(struct clock_event_device *); > - if (!arch_timer_evt) > - return -ENOMEM; > + if (!arch_timer_evt) { > + err = -ENOMEM; > + goto out; > + } > > clocksource_register_hz(&clocksource_counter, arch_timer_rate); > > - err = request_percpu_irq(arch_timer_ppi, arch_timer_handler, > - "arch_timer", arch_timer_evt); > + if (arch_timer_use_virtual) { > + ppi = arch_timer_ppi[VIRT_PPI]; > + err = request_percpu_irq(ppi, arch_timer_handler_virt, > + "arch_timer", arch_timer_evt); > + } else { > + ppi = arch_timer_ppi[PHYS_SECURE_PPI]; > + err = request_percpu_irq(ppi, arch_timer_handler_phys, > + "arch_timer", arch_timer_evt); > + if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) { > + ppi = arch_timer_ppi[PHYS_NONSECURE_PPI]; > + err = request_percpu_irq(ppi, arch_timer_handler_phys, > + "arch_timer", arch_timer_evt); > + if (err) > + free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], > + arch_timer_evt); > + } > + } > + > if (err) { > pr_err("arch_timer: can't register interrupt %d (%d)\n", > - arch_timer_ppi, err); > + ppi, err); > goto out_free; > } > > - if (arch_timer_ppi2) { > - err = request_percpu_irq(arch_timer_ppi2, arch_timer_handler, > - "arch_timer", arch_timer_evt); > - if (err) { > - pr_err("arch_timer: can't register interrupt %d (%d)\n", > - arch_timer_ppi2, err); > - arch_timer_ppi2 = 0; > - goto out_free_irq; > - } > - } > - > err = local_timer_register(&arch_timer_ops); > if (err) { > /* > @@ -310,13 +414,19 @@ static int __init arch_timer_register(void) > return 0; > > out_free_irq: > - free_percpu_irq(arch_timer_ppi, arch_timer_evt); > - if (arch_timer_ppi2) > - free_percpu_irq(arch_timer_ppi2, arch_timer_evt); > + if (arch_timer_use_virtual) > + free_percpu_irq(arch_timer_ppi[VIRT_PPI], arch_timer_evt); > + else { > + free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], > + arch_timer_evt); > + if (arch_timer_ppi[PHYS_NONSECURE_PPI]) > + free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], > + arch_timer_evt); > + } > > out_free: > free_percpu(arch_timer_evt); > - > +out: > return err; > } > > @@ -329,6 +439,7 @@ int __init arch_timer_of_register(void) > { > struct device_node *np; > u32 freq; > + int i; > > np = of_find_matching_node(NULL, arch_timer_of_match); > if (!np) { > @@ -340,22 +451,40 @@ int __init arch_timer_of_register(void) > if (!of_property_read_u32(np, "clock-frequency", &freq)) > arch_timer_rate = freq; > > - arch_timer_ppi = irq_of_parse_and_map(np, 0); > - arch_timer_ppi2 = irq_of_parse_and_map(np, 1); > - pr_info("arch_timer: found %s irqs %d %d\n", > - np->name, arch_timer_ppi, arch_timer_ppi2); > + for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++) > + arch_timer_ppi[i] = irq_of_parse_and_map(np, i); > + > + /* > + * If no interrupt provided for virtual timer, we'll have to > + * stick to the physical timer. It'd better be accessible... > + */ > + if (!arch_timer_ppi[VIRT_PPI]) { > + arch_timer_use_virtual = false; > + > + if (!arch_timer_ppi[PHYS_SECURE_PPI] || > + !arch_timer_ppi[PHYS_NONSECURE_PPI]) { > + pr_warn("arch_timer: No interrupt available, giving up\n"); > + return -EINVAL; > + } > + } > > return arch_timer_register(); > } > > int __init arch_timer_sched_clock_init(void) > { > + u32 (*cnt32)(void); > int err; > > err = arch_timer_available(); > if (err) > return err; > > - setup_sched_clock(arch_counter_get_cntvct32, 32, arch_timer_rate); > + if (arch_timer_use_virtual) > + cnt32 = arch_counter_get_cntvct32; > + else > + cnt32 = arch_counter_get_cntpct32; > + > + setup_sched_clock(cnt32, 32, arch_timer_rate); > return 0; > } > -- Thanks - Cyril _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm