On Wed, Jan 27, 2021 at 12:23:45PM -0800, Michael Kelley wrote: > STIMER0 interrupts are most naturally modeled as per-cpu IRQs. But > because x86/x64 doesn't have per-cpu IRQs, the core STIMER0 interrupt > handling machinery is done in code under arch/x86 and Linux IRQs are > not used. Adding support for ARM64 means adding equivalent code > using per-cpu IRQs under arch/arm64. > > A better model is to treat per-cpu IRQs as the normal path (which it is > for modern architectures), and the x86/x64 path as the exception. Do this > by incorporating standard Linux per-cpu IRQ allocation into the main > SITMER0 driver code, and bypass it in the x86/x64 exception case. For > x86/x64, special case code is retained under arch/x86, but no STIMER0 > interrupt handling code is needed under arch/arm64. > > No functional change. > > Signed-off-by: Michael Kelley <mikelley@xxxxxxxxxxxxx> > --- > arch/x86/hyperv/hv_init.c | 2 +- > arch/x86/include/asm/mshyperv.h | 4 - > arch/x86/kernel/cpu/mshyperv.c | 10 +-- > drivers/clocksource/hyperv_timer.c | 170 +++++++++++++++++++++++++------------ > include/asm-generic/mshyperv.h | 5 -- > include/clocksource/hyperv_timer.h | 3 +- > 6 files changed, 123 insertions(+), 71 deletions(-) > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > index 22e9557..fe37546 100644 > --- a/arch/x86/hyperv/hv_init.c > +++ b/arch/x86/hyperv/hv_init.c > @@ -371,7 +371,7 @@ void __init hyperv_init(void) > * Ignore any errors in setting up stimer clockevents > * as we can run with the LAPIC timer as a fallback. > */ > - (void)hv_stimer_alloc(); > + (void)hv_stimer_alloc(false); > > hv_apic_init(); > > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index 5ccbba8..941dd55 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -31,10 +31,6 @@ static inline u64 hv_get_register(unsigned int reg) > > void hyperv_vector_handler(struct pt_regs *regs); > > -static inline void hv_enable_stimer0_percpu_irq(int irq) {} > -static inline void hv_disable_stimer0_percpu_irq(int irq) {} > - > - > #if IS_ENABLED(CONFIG_HYPERV) > extern void *hv_hypercall_pg; > extern void __percpu **hyperv_pcpu_input_arg; > diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c > index 5679100a1..440507e 100644 > --- a/arch/x86/kernel/cpu/mshyperv.c > +++ b/arch/x86/kernel/cpu/mshyperv.c > @@ -85,21 +85,17 @@ void hv_remove_vmbus_handler(void) > set_irq_regs(old_regs); > } > > -int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) > +/* For x86/x64, override weak placeholders in hyperv_timer.c */ > +void hv_setup_stimer0_handler(void (*handler)(void)) > { > - *vector = HYPERV_STIMER0_VECTOR; > - *irq = -1; /* Unused on x86/x64 */ > hv_stimer0_handler = handler; > - return 0; > } > -EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq); > > -void hv_remove_stimer0_irq(int irq) > +void hv_remove_stimer0_handler(void) > { > /* We have no way to deallocate the interrupt gate */ > hv_stimer0_handler = NULL; > } > -EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq); > > void hv_setup_kexec_handler(void (*handler)(void)) > { > diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c > index edf2d43..c553b8c 100644 > --- a/drivers/clocksource/hyperv_timer.c > +++ b/drivers/clocksource/hyperv_timer.c > @@ -18,6 +18,9 @@ > #include <linux/sched_clock.h> > #include <linux/mm.h> > #include <linux/cpuhotplug.h> > +#include <linux/interrupt.h> > +#include <linux/irq.h> > +#include <linux/acpi.h> > #include <clocksource/hyperv_timer.h> > #include <asm/hyperv-tlfs.h> > #include <asm/mshyperv.h> > @@ -43,14 +46,13 @@ > */ > static bool direct_mode_enabled; > > -static int stimer0_irq; > -static int stimer0_vector; > +static int stimer0_irq = -1; > +static long __percpu *stimer0_evt; > static int stimer0_message_sint; > > /* > - * ISR for when stimer0 is operating in Direct Mode. Direct Mode > - * does not use VMbus or any VMbus messages, so process here and not > - * in the VMbus driver code. > + * Common code for stimer0 interrupts coming via Direct Mode or > + * as a VMbus message. > */ > void hv_stimer0_isr(void) > { > @@ -61,6 +63,16 @@ void hv_stimer0_isr(void) > } > EXPORT_SYMBOL_GPL(hv_stimer0_isr); > > +/* > + * stimer0 interrupt handler for architectures that support > + * per-cpu interrupts, which also implies Direct Mode. > + */ > +static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) > +{ > + hv_stimer0_isr(); > + return IRQ_HANDLED; > +} > + > static int hv_ce_set_next_event(unsigned long delta, > struct clock_event_device *evt) > { > @@ -76,8 +88,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt) > { > hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); > hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); > - if (direct_mode_enabled) > - hv_disable_stimer0_percpu_irq(stimer0_irq); > + if (direct_mode_enabled && stimer0_irq >= 0) > + disable_percpu_irq(stimer0_irq); > > return 0; > } > @@ -95,8 +107,9 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) > * on the specified hardware vector/IRQ. > */ > timer_cfg.direct_mode = 1; > - timer_cfg.apic_vector = stimer0_vector; > - hv_enable_stimer0_percpu_irq(stimer0_irq); > + timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; > + if (stimer0_irq >= 0) > + enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE); > } else { > /* > * When it expires, the timer will generate a VMbus message, > @@ -169,10 +182,67 @@ int hv_stimer_cleanup(unsigned int cpu) > } > EXPORT_SYMBOL_GPL(hv_stimer_cleanup); > > +/* > + * These placeholders are overridden by arch specific code on > + * architectures that need special setup of the stimer0 IRQ because > + * they don't support per-cpu IRQs (such as x86/x64). > + */ > +void __weak hv_setup_stimer0_handler(void (*handler)(void)) > +{ > +}; > + > +void __weak hv_remove_stimer0_handler(void) > +{ > +}; > + > +static int hv_setup_stimer0_irq(void) > +{ > + int ret; > + > + ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, > + ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); > + if (ret < 0) { > + pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); > + return ret; > + } > + stimer0_irq = ret; > + > + stimer0_evt = alloc_percpu(long); > + if (!stimer0_evt) { > + ret = -ENOMEM; > + goto unregister_gsi; > + } > + ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, > + "Hyper-V stimer0", stimer0_evt); > + if (ret) { > + pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", > + stimer0_irq, ret); > + goto free_stimer0_evt; > + } > + return ret; > + > +free_stimer0_evt: > + free_percpu(stimer0_evt); > +unregister_gsi: > + acpi_unregister_gsi(stimer0_irq); > + stimer0_irq = -1; > + return ret; > +} > + > +static void hv_remove_stimer0_irq(void) > +{ > + if (stimer0_irq != -1) { > + free_percpu_irq(stimer0_irq, stimer0_evt); > + free_percpu(stimer0_evt); > + acpi_unregister_gsi(stimer0_irq); > + stimer0_irq = -1; > + } I think we need: else { hv_remove_stimer0_handler(); } here? Because previously, on x86 we set hv_stimer0_handler to NULL in hv_remove_stimer0_irq(), however, this patch doesn't keep this behavior any more. Thoughts? Regards, Boqun > +} > + > /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ > -int hv_stimer_alloc(void) > +int hv_stimer_alloc(bool have_percpu_irqs) > { > - int ret = 0; > + int ret; > > /* > * Synthetic timers are always available except on old versions of > @@ -188,29 +258,37 @@ int hv_stimer_alloc(void) > > direct_mode_enabled = ms_hyperv.misc_features & > HV_STIMER_DIRECT_MODE_AVAILABLE; > - if (direct_mode_enabled) { > - ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, > - hv_stimer0_isr); > + > + /* > + * If Direct Mode isn't enabled, the remainder of the initialization > + * is done later by hv_stimer_legacy_init() > + */ > + if (!direct_mode_enabled) > + return 0; > + > + if (have_percpu_irqs) { > + ret = hv_setup_stimer0_irq(); > if (ret) > - goto free_percpu; > + goto free_clock_event; > + } else { > + hv_setup_stimer0_handler(hv_stimer0_isr); > + } > > - /* > - * Since we are in Direct Mode, stimer initialization > - * can be done now with a CPUHP value in the same range > - * as other clockevent devices. > - */ > - ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, > - "clockevents/hyperv/stimer:starting", > - hv_stimer_init, hv_stimer_cleanup); > - if (ret < 0) > - goto free_stimer0_irq; > + /* > + * Since we are in Direct Mode, stimer initialization > + * can be done now with a CPUHP value in the same range > + * as other clockevent devices. > + */ > + ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, > + "clockevents/hyperv/stimer:starting", > + hv_stimer_init, hv_stimer_cleanup); > + if (ret < 0) { > + hv_remove_stimer0_irq(); > + goto free_clock_event; > } > return ret; > > -free_stimer0_irq: > - hv_remove_stimer0_irq(stimer0_irq); > - stimer0_irq = 0; > -free_percpu: > +free_clock_event: > free_percpu(hv_clock_event); > hv_clock_event = NULL; > return ret; > @@ -254,23 +332,6 @@ void hv_stimer_legacy_cleanup(unsigned int cpu) > } > EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); > > - > -/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ > -void hv_stimer_free(void) > -{ > - if (!hv_clock_event) > - return; > - > - if (direct_mode_enabled) { > - cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); > - hv_remove_stimer0_irq(stimer0_irq); > - stimer0_irq = 0; > - } > - free_percpu(hv_clock_event); > - hv_clock_event = NULL; > -} > -EXPORT_SYMBOL_GPL(hv_stimer_free); > - > /* > * Do a global cleanup of clockevents for the cases of kexec and > * vmbus exit > @@ -287,12 +348,17 @@ void hv_stimer_global_cleanup(void) > hv_stimer_legacy_cleanup(cpu); > } > > - /* > - * If Direct Mode is enabled, the cpuhp teardown callback > - * (hv_stimer_cleanup) will be run on all CPUs to stop the > - * stimers. > - */ > - hv_stimer_free(); > + if (!hv_clock_event) > + return; > + > + if (direct_mode_enabled) { > + cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); > + hv_remove_stimer0_irq(); > + stimer0_irq = -1; > + } > + free_percpu(hv_clock_event); > + hv_clock_event = NULL; > + > } > EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); > > diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h > index 9f4089b..c271870 100644 > --- a/include/asm-generic/mshyperv.h > +++ b/include/asm-generic/mshyperv.h > @@ -178,9 +178,4 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, > static inline void hyperv_cleanup(void) {} > #endif /* CONFIG_HYPERV */ > > -#if IS_ENABLED(CONFIG_HYPERV) > -extern int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); > -extern void hv_remove_stimer0_irq(int irq); > -#endif > - > #endif > diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h > index 34eef083..b6774aa 100644 > --- a/include/clocksource/hyperv_timer.h > +++ b/include/clocksource/hyperv_timer.h > @@ -21,8 +21,7 @@ > #define HV_MIN_DELTA_TICKS 1 > > /* Routines called by the VMbus driver */ > -extern int hv_stimer_alloc(void); > -extern void hv_stimer_free(void); > +extern int hv_stimer_alloc(bool have_percpu_irqs); > extern int hv_stimer_cleanup(unsigned int cpu); > extern void hv_stimer_legacy_init(unsigned int cpu, int sint); > extern void hv_stimer_legacy_cleanup(unsigned int cpu); > -- > 1.8.3.1 >