RE: [PATCH char-misc 1/1] Drivers: hv: vmbus: Implement Direct Mode for stimer0

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> writes:

> Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> writes:
> 
> > mikelley@xxxxxxxxxxxxxxxxxxxxxx writes:
> >
> >> From: Michael Kelley <mikelley@xxxxxxxxxxxxx>
> >>
> >> The 2016 version of Hyper-V offers the option to operate the guest VM
> >> per-vcpu stimer's in Direct Mode, which means the timer interupts on
> >> its own vector rather than queueing a VMbus message.  Direct Mode
> >> reduces timer processing overhead in both the hypervisor and the
> >> guest, and avoids having timer interrupts pollute the VMbus interrupt
> >> stream for the synthetic NIC and storage.  This patch enables Direct
> >> Mode by default on stimer0 (which is the only stimer used in Linux on
> >> Hyper-V) when running on a version of Hyper-V that supports it, with
> >> a hv_vmbus module parameter for disabling Direct Mode and reverting
> >> to the old behavior.
> >>
> >> Signed-off-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>
> >> ---
> >>  arch/x86/include/asm/mshyperv.h    | 14 ++++++++
> >>  arch/x86/include/uapi/asm/hyperv.h | 26 ++++++++++++++
> >>  arch/x86/kernel/cpu/mshyperv.c     | 64 +++++++++++++++++++++++++++++++++-
> >>  drivers/hv/hv.c                    | 71 ++++++++++++++++++++++++++++++++++++--
> >>  drivers/hv/hyperv_vmbus.h          |  4 ++-
> >>  5 files changed, 175 insertions(+), 4 deletions(-)
> >>
> 
> (in addition to my previous comment)
> 
> This patch is also x86-heavy so it probably makes sense to make x86 maintainers (Thomas,
> Peter, Ingo) aware no matter which tree it will go through.
> 
> CC: x86@xxxxxxxxxx
> 
> >> diff --git a/arch/x86/include/asm/mshyperv.h
> >> b/arch/x86/include/asm/mshyperv.h index 740dc97..1bba1d7 100644
> >> --- a/arch/x86/include/asm/mshyperv.h
> >> +++ b/arch/x86/include/asm/mshyperv.h
> >> @@ -4,6 +4,8 @@
> >>  #include <linux/types.h>
> >>  #include <linux/atomic.h>
> >>  #include <linux/nmi.h>
> >> +#include <linux/irq.h>
> >> +#include <linux/irqdesc.h>
> >>  #include <asm/io.h>
> >>  #include <asm/hyperv.h>
> >>
> >> @@ -374,4 +376,16 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
> >>  	return NULL;
> >>  }
> >>  #endif
> >> +
> >> +/* Per architecture routines for stimer0 Direct Mode handling.  On
> >> +x86/x64
> >> + * there are no percpu actions to take.
> >> + */
> >> +#if IS_ENABLED(CONFIG_HYPERV)
> >> +static inline void hv_enable_stimer0_percpu_irq(int irq) { } static
> >> +inline void hv_disable_stimer0_percpu_irq(int irq) { } extern int
> >> +hv_allocate_stimer0_irq(int *irq, int *vector); extern void
> >> +hv_deallocate_stimer0_irq(int irq); extern void
> >> +hv_ack_stimer0_interrupt(struct irq_desc *desc); #endif
> >> +
> >>  #endif
> >> diff --git a/arch/x86/include/uapi/asm/hyperv.h
> >> b/arch/x86/include/uapi/asm/hyperv.h
> >> index f65d125..408cf3e 100644
> >> --- a/arch/x86/include/uapi/asm/hyperv.h
> >> +++ b/arch/x86/include/uapi/asm/hyperv.h
> >> @@ -112,6 +112,22 @@
> >>  #define HV_X64_GUEST_IDLE_STATE_AVAILABLE		(1 << 5)
> >>  /* Guest crash data handler available */
> >>  #define HV_X64_GUEST_CRASH_MSR_AVAILABLE		(1 << 10)
> >> +/* Debug MSRs available */
> >> +#define HV_X64_DEBUG_MSR_AVAILABLE			(1 << 11)
> >> +/* Support for Non-Privileged Instruction Execution Prevention is available */
> >> +#define HV_X64_NPIEP_AVAILABLE				(1 << 12)
> >> +/* Support for DisableHypervisor is available */
> >> +#define HV_X64_DISABLE_HYPERVISOR_AVAILABLE		(1 << 13)
> >> +/* Extended GVA Ranges for Flush Virtual Address list is available */
> >> +#define HV_X64_EXTENDED_GVA_RANGE_AVAILABLE		(1 << 14)
> >> +/* Return Hypercall output via XMM registers is available */
> >> +#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE		(1 << 15)
> >> +/* SINT polling mode available */
> >> +#define HV_X64_SINT_POLLING_MODE_AVAILABLE		(1 << 17)
> >> +/* Hypercall MSR lock is available */
> >> +#define HV_X64_HYPERCALL_MSR_LOCK_AVAILABLE		(1 << 18)
> >> +/* stimer direct mode is available */
> >> +#define HV_X64_STIMER_DIRECT_MODE_AVAILABLE		(1 << 19)
> >>
> >>  /*
> >>   * Implementation recommendations. Indicates which behaviors the
> >> hypervisor @@ -300,6 +316,16 @@ enum HV_GENERIC_SET_FORMAT {
> >>
> >>  #define HV_SYNIC_STIMER_COUNT		(4)
> >>
> >> +/* Hardware IRQ number to use for stimer0 in Direct Mode.  This IRQ
> >> +is a fake
> >> + * because stimer's in Direct Mode simply interrupt on the specified
> >> +vector,
> >> + * without using a particular IOAPIC pin. But we use the IRQ
> >> +allocation
> >> + * machinery, so we need a hardware IRQ #.  This value is somewhat
> >> +arbitrary,
> >> + * but it should not be a legacy IRQ (0 to 15), and should fit
> >> +within the
> >> + * single IOAPIC (0 to 23) that Hyper-V provides to a guest VM. So
> >> +any value
> >> + * between 16 and 23 should be good.
> >> + */
> >
> > (nitpick): all comments in the patch are in 'net' style:
> >
> >  /* This is a
> >   * comment.
> >   */
> >
> > While in Hyper-V files (and x86 in general) the 'normal' style is used:
> >
> >  /*
> >   * This is a
> >   * comment.
> >   */
> >
> > But checkpatch.pl doesn't complain.

Will fix in v2.

> >
> >> +#define HV_STIMER0_IRQNR		18
> >> +
> >>  /* Define synthetic interrupt controller message constants. */
> >>  #define HV_MESSAGE_SIZE			(256)
> >>  #define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
> >> diff --git a/arch/x86/kernel/cpu/mshyperv.c
> >> b/arch/x86/kernel/cpu/mshyperv.c index 236324e8..88dc243 100644
> >> --- a/arch/x86/kernel/cpu/mshyperv.c
> >> +++ b/arch/x86/kernel/cpu/mshyperv.c
> >> @@ -19,7 +19,10 @@
> >>  #include <linux/efi.h>
> >>  #include <linux/interrupt.h>
> >>  #include <linux/irq.h>
> >> +#include <linux/irqdomain.h>
> >> +#include <linux/irqdesc.h>
> >>  #include <linux/kexec.h>
> >> +#include <linux/acpi.h>
> >>  #include <asm/processor.h>
> >>  #include <asm/hypervisor.h>
> >>  #include <asm/hyperv.h>
> >> @@ -27,6 +30,7 @@
> >>  #include <asm/desc.h>
> >>  #include <asm/irq_regs.h>
> >>  #include <asm/i8259.h>
> >> +#include <asm/irqdomain.h>
> >>  #include <asm/apic.h>
> >>  #include <asm/timer.h>
> >>  #include <asm/reboot.h>
> >> @@ -69,6 +73,64 @@ void hv_remove_vmbus_irq(void)
> >> EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
> >>  EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
> >>
> >> +
> >> +/* Routines to do per-architecture handling of stimer0 when in
> >> +Direct Mode */
> >> +
> >> +void hv_ack_stimer0_interrupt(struct irq_desc *desc) {
> >> +	ack_APIC_irq();
> >> +}
> >> +
> >> +static void allonline_vector_allocation_domain(int cpu, struct cpumask *retmask,
> >> +				const struct cpumask *mask)
> >> +{
> >> +	cpumask_copy(retmask, cpu_online_mask); }
> >> +
> >> +int hv_allocate_stimer0_irq(int *irq, int *vector) {
> >> +	int		localirq;
> >> +	int		result;
> >> +	struct irq_data *irq_data;
> >> +
> >> +	/* The normal APIC vector allocation domain allows allocation of vectors
> >> +	 * only for the calling CPU.  So we change the allocation domain to one
> >> +	 * that allows vectors to be allocated in all online CPUs.  This
> >> +	 * change is fine in a Hyper-V VM because VMs don't have the usual
> >> +	 * complement of interrupting devices.
> >> +	 */
> >> +	apic->vector_allocation_domain = allonline_vector_allocation_domain;
> >> +	localirq = acpi_register_gsi(NULL, HV_STIMER0_IRQNR,
> >> +				ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
> >> +	if (localirq < 0) {
> >> +		pr_err("Cannot register stimer0 gsi. Error %d", localirq);
> >> +		return -1;
> >> +	}
> >> +
> >> +	/* We pass in a dummy IRQ handler because architecture independent code
> >> +	 * will later override the IRQ domain interrupt handler and set it to a
> >> +	 * Hyper-V specific handler.
> >> +	 */
> >> +	result = request_irq(localirq, (irq_handler_t)(-1), 0,
> >> +					"Hyper-V stimer0", NULL);
> >
> > I remember K. Y. told me he has patches in his stash to implement
> > Hyper-V-specific APIC. Is this still in works or there is a reason to
> > not do it? Just curious.
> >

I've looked at KY's pending patch.  It's only a partial APIC implementation that overrides certain performance sensitive functions and doesn't really help what's needed here.   v2 of the patch will take a different approach that makes this moot.

> >> +	if (result) {
> >> +		pr_err("Cannot request stimer0 irq. Error %d", result);
> >> +		acpi_unregister_gsi(localirq);
> >> +		return -1;
> >> +	}
> >> +	irq_data = irq_domain_get_irq_data(x86_vector_domain, localirq);
> >
> > In theory irq_domain_get_irq_data() can return NULL.
> >

Per separate comments from Thomas Gleixner, shouldn't be doing this anyway.  Will take a different approach in v2 of the patch. 

> >> +	*vector = irqd_cfg(irq_data)->vector;
> >> +	*irq = localirq;
> >> +	return 0;
> >> +}
> >> +
> >> +void hv_deallocate_stimer0_irq(int irq) {
> >> +	free_irq(irq, NULL);
> >> +	acpi_unregister_gsi(irq);
> >> +}
> >> +
> >> +
> >>  void hv_setup_kexec_handler(void (*handler)(void))  {
> >>  	hv_kexec_handler = handler;
> >> @@ -195,7 +257,7 @@ static void __init ms_hyperv_init_platform(void)
> >>  		hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION);
> >>  		hv_host_info_edx = cpuid_edx(HVCPUID_VERSION);
> >>
> >> -		pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
> >> +		pr_info("Hyper-V: Host Build %d-%d.%d-%d-%d.%d\n",
> >
> > While this definitely makes logs nicer uhe change is probably not
> > needed in this particular patch.

True.  Will move this to a separate patch.

> >
> >>  			hv_host_info_eax, hv_host_info_ebx >> 16,
> >>  			hv_host_info_ebx & 0xFFFF, hv_host_info_ecx,
> >>  			hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF); diff --git
> >> a/drivers/hv/hv.c b/drivers/hv/hv.c index fe96aab..68ac474 100644
> >> --- a/drivers/hv/hv.c
> >> +++ b/drivers/hv/hv.c
> >> @@ -27,8 +27,12 @@
> >>  #include <linux/vmalloc.h>
> >>  #include <linux/hyperv.h>
> >>  #include <linux/version.h>
> >> -#include <linux/interrupt.h>
> >> +#include <linux/irq.h>
> >> +#include <linux/irqdesc.h>
> >> +#include <linux/random.h>
> >> +#include <linux/kernel_stat.h>
> >>  #include <linux/clockchips.h>
> >> +#include <linux/module.h>
> >>  #include <asm/hyperv.h>
> >>  #include <asm/mshyperv.h>
> >>  #include "hyperv_vmbus.h"
> >> @@ -38,6 +42,21 @@ struct hv_context hv_context = {
> >>  	.synic_initialized	= false,
> >>  };
> >>
> >> +/* If true, we're using Direct Mode for stimer0, and the timer will
> >> +do it own
> >> + * interrupt when it expires. If false, stimer0 is not using Direct
> >> +Mode and
> >> + * will send a VMbus message when it expires. We prefer to use
> >> +Direct Mode,
> >> + * but not all versions of Hyper-V support Direct Mode.
> >> + *
> >> + * While Hyper-V provides a total of four stimer's per CPU, Linux
> >> +use only
> >> + * stimer0.
> >> + */
> >> +static bool	stimer_direct_mode;
> >> +static int	stimer0_irq;
> >> +static int	stimer0_vector;
> >> +static bool	direct_mode_disable;
> >> +module_param(direct_mode_disable, bool, 0444);
> >> +MODULE_PARM_DESC(direct_mode_disable, "Set to Y to disable Direct
> >> +Mode.");
> >> +
> >>  #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
> >> #define HV_MAX_MAX_DELTA_TICKS 0xffffffff  #define HV_MIN_DELTA_TICKS
> >> 1 @@ -52,7 +71,12 @@ int hv_init(void)
> >>  	hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
> >>  	if (!hv_context.cpu_context)
> >>  		return -ENOMEM;
> >> +	stimer_direct_mode = (ms_hyperv.misc_features &
> >> +		HV_X64_STIMER_DIRECT_MODE_AVAILABLE) ? true : false;
> >>
> >> +	/* Apply boot command line override to the Direct Mode setting */
> >> +	if (direct_mode_disable)
> >> +		stimer_direct_mode = false;
> >
> > Having both direct_mode_disable and stimer_direct_mode seems redundant.
> >
> > we may write something like
> >
> >      if (!direct_mode_disable)
> >          direct_mode_disable = (ms_hyperv.misc_features &
> > HV_X64_STIMER_DIRECT_MODE_AVAILABLE) ? false : true;
> >
> > and use it everywhere.

Got it.  Makes sense.

> >
> >>  	return 0;
> >>  }
> >>
> >> @@ -91,6 +115,23 @@ int hv_post_message(union hv_connection_id connection_id,
> >>  	return status & 0xFFFF;
> >>  }
> >>
> >> +/* ISR for when stimer0 is operating in Direct Mode.  Direct Mode
> >> +does
> >> + * not use VMBus or any VMBus messages, so process here and not in
> >> +the
> >> + * VMBus driver code.
> >> + */
> >> +
> >> +static void hv_stimer0_isr(struct irq_desc *desc) {
> >> +	struct hv_per_cpu_context *hv_cpu;
> >> +
> >> +	__this_cpu_inc(*desc->kstat_irqs);
> >> +	__this_cpu_inc(kstat.irqs_sum);
> >> +	hv_ack_stimer0_interrupt(desc);
> >> +	hv_cpu = this_cpu_ptr(hv_context.cpu_context);
> >> +	hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
> >> +	add_interrupt_randomness(desc->irq_data.irq, 0);
> >
> > AFAIU implementing Hyper-V specific IRQ chip would allow us to drop
> > this as we would do this on 'normal' Linux IRQ path.
> >
> >> +}
> >> +
> >>  static int hv_ce_set_next_event(unsigned long delta,
> >>  				struct clock_event_device *evt)
> >>  {
> >> @@ -108,6 +149,8 @@ static int hv_ce_shutdown(struct
> >> clock_event_device *evt)  {
> >>  	hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0);
> >>  	hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0);
> >> +	if (stimer_direct_mode)
> >> +		hv_disable_stimer0_percpu_irq(stimer0_irq);
> >>
> >
> > Both hv_disable_stimer0_percpu_irq() and
> > hv_enable_stimer0_percpu_irq() are no-ops, right? Why don't we
> > mask/unmask the IRQ? I may have missed something...

Yes, these functions are no-ops on x86 as there's really no need to mask/unmask.  But we have code in the works to enable Linux guests on Hyper-V on ARM64 hardware.  On the ARM64 side, stimer Direct Mode fits nicely with percpu IRQs, and these functions are needed to call enable_percpu_irq() and disable_percpu_irq().

> >
> >>  	return 0;
> >>  }
> >> @@ -116,9 +159,24 @@ static int hv_ce_set_oneshot(struct
> >> clock_event_device *evt)  {
> >>  	union hv_timer_config timer_cfg;
> >>
> >> +	timer_cfg.as_uint64 = 0; /* Zero everything */
> >>  	timer_cfg.enable = 1;
> >>  	timer_cfg.auto_enable = 1;
> >> -	timer_cfg.sintx = VMBUS_MESSAGE_SINT;
> >> +	if (stimer_direct_mode) {
> >> +
> >> +		/* When it expires, the timer will directly interrupt
> >> +		 * on the specific hardware vector.
> >> +		 */
> >> +		timer_cfg.direct_mode = 1;
> >> +		timer_cfg.apic_vector = stimer0_vector;
> >> +		hv_enable_stimer0_percpu_irq(stimer0_irq);
> >> +	} else {
> >> +		/* When it expires, the timer will generate a VMbus message,
> >> +		 * to be handled by the normal VMbus interrupt handler.
> >> +		 */
> >> +		timer_cfg.direct_mode = 0;
> >> +		timer_cfg.sintx = VMBUS_MESSAGE_SINT;
> >> +	}
> >>  	hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG,
> >> timer_cfg.as_uint64);
> >>
> >>  	return 0;
> >> @@ -191,6 +249,12 @@ int hv_synic_alloc(void)
> >>  		INIT_LIST_HEAD(&hv_cpu->chan_list);
> >>  	}
> >>
> >> +	if (stimer_direct_mode) {
> >> +		if (hv_allocate_stimer0_irq(&stimer0_irq, &stimer0_vector))
> >> +			goto err;
> >> +		irq_set_handler(stimer0_irq, hv_stimer0_isr);
> >> +	}
> >> +
> >>  	return 0;
> >>  err:
> >>  	return -ENOMEM;
> >> @@ -292,6 +356,9 @@ void hv_synic_clockevents_cleanup(void)
> >>  	if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
> >>  		return;
> >>
> >> +	if (stimer_direct_mode)
> >> +		hv_deallocate_stimer0_irq(stimer0_irq);
> >> +
> >>  	for_each_present_cpu(cpu) {
> >>  		struct hv_per_cpu_context *hv_cpu
> >>  			= per_cpu_ptr(hv_context.cpu_context, cpu); diff --git
> >> a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index
> >> de6f01d..ee8c89b 100644
> >> --- a/drivers/hv/hyperv_vmbus.h
> >> +++ b/drivers/hv/hyperv_vmbus.h
> >> @@ -55,7 +55,9 @@
> >>  		u64 periodic:1;
> >>  		u64 lazy:1;
> >>  		u64 auto_enable:1;
> >> -		u64 reserved_z0:12;
> >> +		u64 apic_vector:8;
> >> +		u64 direct_mode:1;
> >> +		u64 reserved_z0:3;
> >>  		u64 sintx:4;
> >>  		u64 reserved_z1:44;
> >>  	};
> 
> --
>   Vitaly
_______________________________________________
devel mailing list
devel@xxxxxxxxxxxxxxxxxxxxxx
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel



[Index of Archives]     [Linux Driver Backports]     [DMA Engine]     [Linux GPIO]     [Linux SPI]     [Video for Linux]     [Linux USB Devel]     [Linux Coverity]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Yosemite Backpacking]
  Powered by Linux