Re: [Qemu-devel] [Patch V2 1/2] x86, mce: Basic support to add LMCE support to QEMU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Comments below:

On Thu, Dec 10, 2015 at 02:41:21PM -0500, Ashok Raj wrote:
> This patch adds basic enumeration, control msr's required to support
> Local Machine Check Exception Support (LMCE).
> 
> - Added Local Machine Check definitions, changed MCG_CAP
> - Added support for IA32_FEATURE_CONTROL.
> - When delivering MCE to guest, we deliver to just a single CPU
>   when guest OS has opted in to Local delivery.
> 
> Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> Tested-by: Gong Chen <gong.chen@xxxxxxxxx>
> ---
> Resending with proper commit message for second patch
> 
>  target-i386/cpu.c |  8 ++++++++
>  target-i386/cpu.h |  8 ++++++--
>  target-i386/kvm.c | 38 +++++++++++++++++++++++++++++++-------
>  3 files changed, 45 insertions(+), 9 deletions(-)
> 
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 11e5e39..167669a 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu)
>      }
>  }
>  
> +static void feature_control_init(X86CPU *cpu)
> +{
> +	CPUX86State *cenv = &cpu->env;
> +
> +	cenv->msr_ia32_feature_control = ((1<<20) | (1<<0));

FEATURE_CONTROL_LOCKED and FEATURE_CONTROL_LMCE macros would make
the code clearer.

> +}
> +
>  #ifndef CONFIG_USER_ONLY
>  static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
>  {
> @@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
>  #endif
>  
>      mce_init(cpu);
> +    feature_control_init(cpu);
>  
>  #ifndef CONFIG_USER_ONLY
>      if (tcg_enabled()) {
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 84edfd0..a567d7a 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -282,8 +282,9 @@
>  
>  #define MCG_CTL_P       (1ULL<<8)   /* MCG_CAP register available */
>  #define MCG_SER_P       (1ULL<<24) /* MCA recovery/new status bits */
> +#define MCG_LMCE_P	(1ULL<<27) /* Local Machine Check Supported */

Please use spaces instead of tabs. You can detect this and other
coding style issues in this patch with checkpatch.pl.


>  
> -#define MCE_CAP_DEF     (MCG_CTL_P|MCG_SER_P)
> +#define MCE_CAP_DEF     (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P)

This makes mcg_cap change when upgrading QEMU.

VMs with MCG_LMCE_P enabled shouldn't be migratable to hosts
running older kernels, or the guest may try to read or write
MSR_IA32_MCG_EXT_CTL after miration and get a #GP. That means:

1) Older machine-types (pc-2.5 and older) should keep the
   old (MCG_CTL_P|MCG_SER_P) default.
2) We can't make pc-2.6 enable LMCE by default, either, because
   QEMU guarantees that just changing the machine-type shouldn't
   introduce new host requirements (see:
   http://article.gmane.org/gmane.comp.emulators.qemu/346651)

It looks like we need a new -cpu option to enable the feature,
then. At least until we raise the minimum kernel version
requirements of QEMU.

(I didn't review the kvm_mce_inject() changes as I am not
familiar with the details of how LMCE is implemented.)


>  #define MCE_BANKS_DEF   10
>  
>  #define MCG_CAP_BANKS_MASK 0xff
> @@ -291,6 +292,7 @@
>  #define MCG_STATUS_RIPV (1ULL<<0)   /* restart ip valid */
>  #define MCG_STATUS_EIPV (1ULL<<1)   /* ip points to correct instruction */
>  #define MCG_STATUS_MCIP (1ULL<<2)   /* machine check in progress */
> +#define MCG_STATUS_LMCE (1ULL<<3)   /* Local MCE signaled */
>  
>  #define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
>  #define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
> @@ -333,6 +335,7 @@
>  #define MSR_MCG_CAP                     0x179
>  #define MSR_MCG_STATUS                  0x17a
>  #define MSR_MCG_CTL                     0x17b
> +#define MSR_MCG_EXT_CTL			0x4d0
>  
>  #define MSR_P6_EVNTSEL0                 0x186
>  
> @@ -892,7 +895,6 @@ typedef struct CPUX86State {
>  
>      uint64_t mcg_status;
>      uint64_t msr_ia32_misc_enable;
> -    uint64_t msr_ia32_feature_control;
>  
>      uint64_t msr_fixed_ctr_ctrl;
>      uint64_t msr_global_ctrl;
> @@ -977,8 +979,10 @@ typedef struct CPUX86State {
>      int64_t tsc_khz;
>      void *kvm_xsave_buf;
>  
> +    uint64_t msr_ia32_feature_control;
>      uint64_t mcg_cap;
>      uint64_t mcg_ctl;
> +    uint64_t mcg_ext_ctl;
>      uint64_t mce_banks[MCE_BANKS_DEF*4];
>  
>      uint64_t tsc_aux;
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 6dc9846..c61fe1f 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -72,6 +72,7 @@ static bool has_msr_tsc_aux;
>  static bool has_msr_tsc_adjust;
>  static bool has_msr_tsc_deadline;
>  static bool has_msr_feature_control;
> +static bool has_msr_ext_mcg_ctl;
>  static bool has_msr_async_pf_en;
>  static bool has_msr_pv_eoi_en;
>  static bool has_msr_misc_enable;
> @@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
>      uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
>                        MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
>      uint64_t mcg_status = MCG_STATUS_MCIP;
> +    int flags = 0;
> +    CPUState *cs = CPU(cpu);
> +
> +    /*
> +     * We need to read back the value of MSR_EXT_MCG_CTL that was set by the
> +     * guest kernel back into Qemu
> +     */
> +    cpu_synchronize_state(cs);
> +
> +    flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
>  
>      if (code == BUS_MCEERR_AR) {
> -        status |= MCI_STATUS_AR | 0x134;
> -        mcg_status |= MCG_STATUS_EIPV;
> +	status |= MCI_STATUS_AR | 0x134;
> +	mcg_status |= MCG_STATUS_EIPV;
> +	if (env->mcg_ext_ctl & 0x1) {
> +		mcg_status |= MCG_STATUS_LMCE;
> +		flags = 0; /* No Broadcast when LMCE is opted by guest */
> +	}
>      } else {
>          status |= 0xc0;
>          mcg_status |= MCG_STATUS_RIPV;
>      }
>      cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
> -                       (MCM_ADDR_PHYS << 6) | 0xc,
> -                       cpu_x86_support_mca_broadcast(env) ?
> -                       MCE_INJECT_BROADCAST : 0);
> +		       (MCM_ADDR_PHYS << 6) | 0xc, flags);
>  }
>  
>  static void hardware_memory_error(void)
> @@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
>  
>      c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
>      if (c) {
> -        has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
> -                                  !!(c->ecx & CPUID_EXT_SMX);
> +        has_msr_feature_control = !!((c->ecx & CPUID_EXT_VMX) ||
> +                                  !!(c->ecx & CPUID_EXT_SMX) || 
> +				  !!(env->mcg_cap & MCG_LMCE_P));
>      }
>  
> +    if (has_msr_feature_control && (env->mcg_cap & MCG_LMCE_P))
> +        has_msr_ext_mcg_ctl = true;
> +
>      c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0);
>      if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) {
>          /* for migration */
> @@ -1557,6 +1574,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
>  
>          kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
>          kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
> +	kvm_msr_entry_set(&msrs[n++], MSR_MCG_EXT_CTL, env->mcg_ext_ctl);
>          for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
>              kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
>          }
> @@ -1811,6 +1829,9 @@ static int kvm_get_msrs(X86CPU *cpu)
>      if (has_msr_feature_control) {
>          msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
>      }
> +    if (has_msr_ext_mcg_ctl) {
> +    	msrs[n++].index = MSR_MCG_EXT_CTL;
> +    }
>      if (has_msr_bndcfgs) {
>          msrs[n++].index = MSR_IA32_BNDCFGS;
>      }
> @@ -1981,6 +2002,9 @@ static int kvm_get_msrs(X86CPU *cpu)
>          case MSR_IA32_FEATURE_CONTROL:
>              env->msr_ia32_feature_control = msrs[i].data;
>              break;
> +	case MSR_MCG_EXT_CTL:
> +	    env->mcg_ext_ctl = msrs[i].data;
> +	    break;
>          case MSR_IA32_BNDCFGS:
>              env->msr_bndcfgs = msrs[i].data;
>              break;
> -- 
> 2.4.3
> 
> 

-- 
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux