>>> On 23.02.12 at 23:31, Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> wrote: > For the hypervisor to take advantage of the MWAIT support it needs > to extract from the ACPI _CST the register address. But the > hypervisor does not have the support to parse DSDT so it relies on > the initial domain (dom0) to parse the ACPI Power Management information > and push it up to the hypervisor. The pushing of the data is done > by the processor_harveset_xen module which parses the information that > the ACPI parser has graciously exposed in 'struct acpi_processor'. > > For the ACPI parser to also expose the Cx states for MWAIT, we need > to expose the MWAIT capability (leaf 1). Furthermore we also need to > expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly > function. > > The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX > operations, but it can't do it since it needs to be backwards compatible. > Instead we choose to use the native CPUID to figure out if the MWAIT > capability exists and use the XEN_SET_PDC query hypercall to figure out > if the hypervisor wants us to expose the MWAIT_LEAF capability or not. > > Note: The XEN_SET_PDC query was implemented in c/s 23783: > "ACPI: add _PDC input override mechanism". > > With this in place, instead of > C3 ACPI IOPORT 415 > we get now > C3:ACPI FFH INTEL MWAIT 0x20 > > Note: The cpu_idle which would be calling the mwait variants for idling > never gets set b/c we set the default pm_idle to be the hypercall variant. > > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> > --- > arch/x86/xen/enlighten.c | 92 > +++++++++++++++++++++++++++++++++++++- > include/xen/interface/platform.h | 4 +- > 2 files changed, 94 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c > index 12eb07b..4c82936 100644 > --- a/arch/x86/xen/enlighten.c > +++ b/arch/x86/xen/enlighten.c > @@ -62,6 +62,14 @@ > #include <asm/reboot.h> > #include <asm/stackprotector.h> > #include <asm/hypervisor.h> > +#include <asm/mwait.h> > + > +#ifdef CONFIG_ACPI > +#include <asm/acpi.h> > +#include <acpi/pdc_intel.h> > +#include <acpi/processor.h> > +#include <xen/interface/platform.h> > +#endif > > #include "xen-ops.h" > #include "mmu.h" > @@ -200,13 +208,17 @@ static void __init xen_banner(void) > static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; > static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; > > +static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask; > +static __read_mostly unsigned int cpuid_leaf5_ecx_val; > +static __read_mostly unsigned int cpuid_leaf5_edx_val; > + > static void xen_cpuid(unsigned int *ax, unsigned int *bx, > unsigned int *cx, unsigned int *dx) > { > unsigned maskebx = ~0; > unsigned maskecx = ~0; > unsigned maskedx = ~0; > - > + unsigned setecx = 0; > /* > * Mask out inconvenient features, to try and disable as many > * unsupported kernel subsystems as possible. > @@ -214,9 +226,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int > *bx, > switch (*ax) { > case 1: > maskecx = cpuid_leaf1_ecx_mask; > + setecx = cpuid_leaf1_ecx_set_mask; > maskedx = cpuid_leaf1_edx_mask; > break; > > + case CPUID_MWAIT_LEAF: > + /* Synthesize the values.. */ > + *ax = 0; > + *bx = 0; > + *cx = cpuid_leaf5_ecx_val; > + *dx = cpuid_leaf5_edx_val; > + return; > + > case 0xb: > /* Suppress extended topology stuff */ > maskebx = 0; > @@ -232,9 +253,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int > *bx, > > *bx &= maskebx; > *cx &= maskecx; > + *cx |= setecx; > *dx &= maskedx; > + > } > > +static bool __init xen_check_mwait(void) > +{ > +#if CONFIG_ACPI #ifdef > + struct xen_platform_op op = { > + .cmd = XENPF_set_processor_pminfo, > + .u.set_pminfo.id = -1, > + .u.set_pminfo.type = XEN_PM_PDC, > + }; > + uint32_t buf[3]; > + unsigned int ax, bx, cx, dx; > + unsigned int mwait_mask; > + > + /* We need to determine whether it is OK to expose the MWAIT > + * capability to the kernel to harvest deeper than C3 states from ACPI > + * _CST using the processor_harvest_xen.c module. For this to work, we > + * need to gather the MWAIT_LEAF values (which the cstate.c code > + * checks against). The hypervisor won't expose the MWAIT flag because > + * it would break backwards compatibility; so we will find out directly > + * from the hardware and hypercall. > + */ > + if (!xen_initial_domain()) > + return false; > + > + ax = 1; > + cx = 0; > + > + native_cpuid(&ax, &bx, &cx, &dx); > + > + mwait_mask = (1 << (X86_FEATURE_EST % 32)) | > + (1 << (X86_FEATURE_MWAIT % 32)); > + > + if ((cx & mwait_mask) != mwait_mask) > + return false; > + > + /* We need to emulate the MWAIT_LEAF and for that we need both > + * ecx and edx. The hypercall provides only partial information. > + */ > + > + ax = CPUID_MWAIT_LEAF; > + bx = 0; > + cx = 0; > + dx = 0; > + > + native_cpuid(&ax, &bx, &cx, &dx); > + > + /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, > + * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. > + */ > + buf[0] = ACPI_PDC_REVISION_ID; > + buf[1] = 1; > + buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); > + > + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); > + > + if ((HYPERVISOR_dom0_op(&op) == 0) && > + (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { > + cpuid_leaf5_ecx_val = cx; > + cpuid_leaf5_edx_val = dx; > + } > + return true; > +#else > + return false; > +#endif > +} > static void __init xen_init_cpuid_mask(void) > { > unsigned int ax, bx, cx, dx; > @@ -261,6 +348,9 @@ static void __init xen_init_cpuid_mask(void) > /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ > if ((cx & xsave_mask) != xsave_mask) > cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ > + > + if (xen_check_mwait()) > + cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); > } > > static void xen_set_debugreg(int reg, unsigned long val) > diff --git a/include/xen/interface/platform.h > b/include/xen/interface/platform.h > index c168468..6220b98 100644 > --- a/include/xen/interface/platform.h > +++ b/include/xen/interface/platform.h > @@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t); > #define XEN_PM_CX 0 > #define XEN_PM_PX 1 > #define XEN_PM_TX 2 > - > +#define XEN_PM_PDC 3 > /* Px sub info type */ > #define XEN_PX_PCT 1 > #define XEN_PX_PSS 2 > @@ -286,6 +286,7 @@ struct xen_processor_performance { > }; > DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance); > > +DEFINE_GUEST_HANDLE(uint32_t); Do you really need to introduce (step by step) handles for all those uintNN_t types in a way different from Xen's (__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t))? Looks good to me otherwise. Jan > struct xenpf_set_processor_pminfo { > /* IN variables */ > uint32_t id; /* ACPI CPU ID */ > @@ -293,6 +294,7 @@ struct xenpf_set_processor_pminfo { > union { > struct xen_processor_power power;/* Cx: _CST/_CSD */ > struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ > + GUEST_HANDLE(uint32_t) pdc; > }; > }; > DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); > -- > 1.7.9.48.g85da4d -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html