Re: [PATCH] Add MCE support to KVM

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2009-04-09 at 23:50 +0800, Avi Kivity wrote:
> Huang Ying wrote:
> > +int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > +{
> > +	switch (msr) {
> > +	case MSR_EFER:
> > +		set_efer(vcpu, data);
> >  		break;
> >  	case MSR_IA32_DEBUGCTLMSR:
> >  		if (!data) {
> > @@ -807,6 +828,8 @@ int kvm_set_msr_common(struct kvm_vcpu *
> >  		break;
> >  	}
> >  	default:
> > +		if (!set_msr_mce(vcpu, msr, data))
> > +		    break;
> >  		pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
> >  		return 1;
> >  	}
> >   
> 
> Is there any reason you split kvm_set_msr_common() into two functions?

I want to group MCE related MSR together. And most MCE MSR read/write
need to access vcpu->arch.mcg_xxx or vcpu->arch_mce_banks, So I think
use a MCE specific function would be cleaner.

But It seems that something as follow would be better.

kvm_set_msr_comm()
{
	switch (msr) {
	case MSR_IA32_P5_MC_ADDR:
	case MSR_IA32_P5_MC_TYPE:
	case MSR_IA32_MCG_CAP:
	case MSR_IA32_MCG_CTL:
	case MSR_IA32_MCG_STATUS:
	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_MISC + 4 * KVM_MCE_MAX_BANK:
		set_msr_mce();
		break;
	...
	}
	...
}

> > +
> > +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> > +{
> > +	u64 data;
> > +
> > +	switch (msr) {
> > +	case 0xc0010010: /* SYSCFG */
> > +	case 0xc0010015: /* HWCR */
> >   
> 
> Please use MSR_ constants (add them if they don't exist yet).

In fact, this is not added by me. But I can change this by the way.

> >  
> > +static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
> > +					u64 mcg_cap)
> > +{
> > +	int r;
> > +	unsigned bank_num = mcg_cap & 0xff, bank;
> > +	u64 *banks;
> > +
> > +	r = -EINVAL;
> > +	if (!bank_num)
> > +		goto out;
> > +	r = -ENOMEM;
> > +	banks = kzalloc(bank_num * sizeof(u64) * 4, GFP_KERNEL);
> >   
> 
> If banks is already allocated, you'll leak it.

Yes. I will fix this.

> Why not always allocate it on vcpu setup?

Because the MCE bank number is not fixed, it is based on mcg_cap from
user space.

> > +static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
> > +				      struct kvm_x86_mce *mce)
> > +{
> > +	u64 mcg_cap = vcpu->arch.mcg_cap;
> > +	unsigned bank_num = mcg_cap & 0xff;
> > +	u64 *banks = vcpu->arch.mce_banks;
> > +
> > +	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
> > +		return -EINVAL;
> > +	/*
> > +	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
> > +	 * reporting is disabled
> > +	 */
> > +	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
> > +	    vcpu->arch.mcg_ctl != ~(u64)0)
> > +		return 0;
> > +	banks += 4 * mce->bank;
> > +	/*
> > +	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
> > +	 * reporting is disabled for the bank
> > +	 */
> > +	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
> > +		return 0;
> > +	if (mce->status & MCI_STATUS_UC) {
> > +		u64 status = mce->status;
> > +		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
> > +		    !(vcpu->arch.cr4 & X86_CR4_MCE)) {
> > +			printk(KERN_DEBUG "kvm: set_mce: "
> > +			       "injects mce exception while "
> > +			       "previous one is in progress!\n");
> > +			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
> > +			return 0;
> > +		}
> > +		if (banks[1] & MCI_STATUS_VAL)
> > +			status |= MCI_STATUS_OVER;
> > +		banks[1] = mce->status;
> > +		banks[2] = mce->addr;
> > +		banks[3] = mce->misc;
> > +		vcpu->arch.mcg_status = mce->mcg_status;
> > +		kvm_queue_exception(vcpu, MC_VECTOR);
> > +	} else if (!(banks[1] & MCI_STATUS_VAL) ||
> > +		   (!(banks[1] & MCI_STATUS_UC) &&
> > +		    !((mcg_cap & MCG_TES_P) && ((banks[1]>>53) & 0x3) < 2))) {
> > +		u64 status = mce->status;
> > +		if (banks[1] & MCI_STATUS_VAL)
> > +			status |= MCI_STATUS_OVER;
> > +		banks[1] = mce->status;
> > +		banks[2] = mce->addr;
> > +		banks[3] = mce->misc;
> > +	} else
> > +		banks[1] |= MCI_STATUS_OVER;
> > +	return 0;
> > +}
> >   
> 
> Can userspace just use KVM_SET_MSR for this?

In addition to assigning MSR, we have some other logic for MCE, such as
BANK reporting disabling, overwriting rules, triple fault for UC MCE
during MCIP. So I think we need some dedicate interface.

> > +	case KVM_X86_SETUP_MCE: {
> > +		u64 mcg_cap;
> > +
> > +		r = -EFAULT;
> > +		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
> > +			goto out;
> > +		/*
> > +		 * extended machine-check state registers and CMCI are
> > +		 * not supported.
> > +		 */
> > +		mcg_cap &= ~(MCG_EXT_P|MCG_CMCI_P);
> >   
> 
> Instead of silently dropping, should return an error.
> 
> > +		if (copy_to_user(argp, &mcg_cap, sizeof mcg_cap))
> > +			goto out;
> >   
> 
> And not copy.

This is designed as some kind of feature negotiating. Use space request
MCE features via mcg_cap, kernel space remove un-supported features and
return the resulting mcg_cap.

> 
> >  #define KVM_TRC_SHIFT           16
> >  /*
> >   * kvm trace categories
> > @@ -528,6 +540,9 @@ struct kvm_irq_routing {
> >  #define KVM_NMI                   _IO(KVMIO,  0x9a)
> >  /* Available with KVM_CAP_SET_GUEST_DEBUG */
> >  #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
> > +/* MCE for x86 */
> > +#define KVM_X86_SETUP_MCE           _IOWR(KVMIO,  0x9a, __u64)
> > +#define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9b, struct kvm_x86_mce)
> >   
> 
> Please add a KVM_CAP_ to advertise this capability.

Yes. I will do this.

Best Regards,
Huang Ying

Attachment: signature.asc
Description: This is a digitally signed message part


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux