Re: [PATCH] KVM: VMX: enable LBR virtualization

Jian Zhou <jianjay.zhou@xxxxxxxxxx> · Mon, 12 Oct 2015 20:10:11 +0800

ping...

> Using vmx msr store/load mechanism and msr intercept bitmap
> to implement LBR virtualization.
> 
> Signed-off-by: Jian Zhou  <jianjay.zhou@xxxxxxxxxx>
> Signed-off-by: Stephen He <herongguang.he@xxxxxxxxxx>
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2beee03..244f68c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -887,6 +887,12 @@ struct kvm_x86_ops {
>   					   gfn_t offset, unsigned long mask);
>   	/* pmu operations of sub-arch */
>   	const struct kvm_pmu_ops *pmu_ops;
> +
> +	void (*vmcs_write64)(unsigned long field, u64 value);
> +	u64 (*vmcs_read64)(unsigned long field);
> +
> +	int (*add_atomic_switch_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val);
> +	void (*disable_intercept_guest_msr)(struct kvm_vcpu *vcpu, u32 msr);
>   };
> 
>   struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 06ef490..2305308 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -159,7 +159,7 @@ module_param(ple_window_max, int, S_IRUGO);
> 
>   extern const ulong vmx_return;
> 
> -#define NR_AUTOLOAD_MSRS 8
> +#define NR_AUTOLOAD_MSRS 256
>   #define VMCS02_POOL_SIZE 1
> 
>   struct vmcs {
> @@ -1630,6 +1630,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
>   	--m->nr;
>   	m->guest[i] = m->guest[m->nr];
>   	m->host[i] = m->host[m->nr];
> +	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
>   	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
>   	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
>   }
> @@ -1645,7 +1646,7 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
>   	vm_exit_controls_setbit(vmx, exit);
>   }
> 
> -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> +static int add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   				  u64 guest_val, u64 host_val)
>   {
>   	unsigned i;
> @@ -1660,7 +1661,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   					GUEST_IA32_EFER,
>   					HOST_IA32_EFER,
>   					guest_val, host_val);
> -			return;
> +			return 0;
>   		}
>   		break;
>   	case MSR_CORE_PERF_GLOBAL_CTRL:
> @@ -1671,7 +1672,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   					GUEST_IA32_PERF_GLOBAL_CTRL,
>   					HOST_IA32_PERF_GLOBAL_CTRL,
>   					guest_val, host_val);
> -			return;
> +			return 0;
>   		}
>   		break;
>   	}
> @@ -1683,9 +1684,10 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   	if (i == NR_AUTOLOAD_MSRS) {
>   		printk_once(KERN_WARNING "Not enough msr switch entries. "
>   				"Can't add msr %x\n", msr);
> -		return;
> +		return -ENOSPC;
>   	} else if (i == m->nr) {
>   		++m->nr;
> +		vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
>   		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
>   		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
>   	}
> @@ -1694,6 +1696,15 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   	m->guest[i].value = guest_val;
>   	m->host[i].index = msr;
>   	m->host[i].value = host_val;
> +
> +	return 0;
> +}
> +
> +static int vmx_add_atomic_switch_msr(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	return add_atomic_switch_msr(vmx, msr, guest_val, host_val);
>   }
> 
>   static void reload_tss(void)
> @@ -4332,6 +4343,20 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
>   			msr, MSR_TYPE_W);
>   }
> 
> +static void vmx_disable_intercept_guest_msr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +	if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
> +		vmx_disable_intercept_msr_read_x2apic(msr);
> +		vmx_disable_intercept_msr_write_x2apic(msr);
> +	}
> +	else {
> +		if (is_long_mode(vcpu))
> +			vmx_disable_intercept_for_msr(msr, true);
> +		else
> +			vmx_disable_intercept_for_msr(msr, false);
> +	}
> +}
> +
>   static int vmx_vm_has_apicv(struct kvm *kvm)
>   {
>   	return enable_apicv && irqchip_in_kernel(kvm);
> @@ -4654,6 +4679,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>   #endif
> 
>   	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
> +	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
>   	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
>   	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
>   	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
> @@ -10409,6 +10435,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
>   	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
> 
>   	.pmu_ops = &intel_pmu_ops,
> +
> +	.vmcs_write64 = vmcs_write64,
> +	.vmcs_read64 = vmcs_read64,
> +
> +	.add_atomic_switch_msr = vmx_add_atomic_switch_msr,
> +	.disable_intercept_guest_msr = vmx_disable_intercept_guest_msr,
>   };
> 
>   static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 92511d4..f1fcd7c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -176,6 +176,113 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
> 
>   u64 __read_mostly host_xcr0;
> 
> +/* Netburst (P4) last-branch recording */
> +#define MSR_P4_LER_FROM_LIP 		0x000001d7
> +#define MSR_P4_LER_TO_LIP 		0x000001d8
> +#define MSR_P4_LASTBRANCH_TOS		0x000001da
> +#define MSR_P4_LASTBRANCH_0		0x000001db
> +#define NUM_MSR_P4_LASTBRANCH		4
> +#define MSR_P4_LASTBRANCH_0_FROM_LIP	0x00000680
> +#define MSR_P4_LASTBRANCH_0_TO_LIP	0x000006c0
> +#define NUM_MSR_P4_LASTBRANCH_FROM_TO	16
> +
> +/* Pentium M (and Core) last-branch recording */
> +#define MSR_PM_LASTBRANCH_TOS		0x000001c9
> +#define MSR_PM_LASTBRANCH_0		0x00000040
> +#define NUM_MSR_PM_LASTBRANCH		8
> +
> +/* Core 2 and Atom last-branch recording */
> +#define MSR_C2_LASTBRANCH_TOS		0x000001c9
> +#define MSR_C2_LASTBRANCH_0_FROM_IP	0x00000040
> +#define MSR_C2_LASTBRANCH_0_TO_IP	0x00000060
> +#define NUM_MSR_C2_LASTBRANCH_FROM_TO	4
> +#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO	8
> +
> +struct lbr_info {
> +	u32 base, count;
> +} p4_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_P4_LER_FROM_LIP,          1 },
> +	{ MSR_P4_LER_TO_LIP,            1 },
> +	{ MSR_P4_LASTBRANCH_TOS,        1 },
> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +}, c2_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_IA32_LASTINTFROMIP,       1 },
> +	{ MSR_IA32_LASTINTTOIP,         1 },
> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +}, nh_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_IA32_LASTINTFROMIP,       1 },
> +	{ MSR_IA32_LASTINTTOIP,         1 },
> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +}, at_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_IA32_LASTINTFROMIP,       1 },
> +	{ MSR_IA32_LASTINTTOIP,         1 },
> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +};
> +
> +static const struct lbr_info *last_branch_msr_get(void)
> +{
> +	switch ( boot_cpu_data.x86 )
> +	{
> +		case 6:
> +			switch ( boot_cpu_data.x86_model )
> +			{
> +				/* Core2 Duo */
> +				case 15:
> +				/* Enhanced Core */
> +				case 23:
> +					return c2_lbr;
> +					break;
> +				/* Nehalem */
> +				case 26: case 30: case 31: case 46:
> +				/* Westmere */
> +				case 37: case 44: case 47:
> +				/* Sandy Bridge */
> +				case 42: case 45:
> +				/* Ivy Bridge */
> +				case 58: case 62:
> +				/* Haswell */
> +				case 60: case 63: case 69: case 70:
> +				/* future */
> +				case 61: case 78:
> +					return nh_lbr;
> +					break;
> +				/* Atom */
> +				case 28: case 38: case 39: case 53: case 54:
> +				/* Silvermont */
> +				case 55: case 74: case 77: case 90: case 93:
> +					return at_lbr;
> +					break;
> +			}
> +			break;
> +		case 15:
> +			switch ( boot_cpu_data.x86_model )
> +			{
> +				/* Pentium4/Xeon with em64t */
> +				case 3: case 4: case 6:
> +					return p4_lbr;
> +					break;
> +			}
> +			break;
> +	}
> +
> +	return NULL;
> +}
> +
>   static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
> 
>   static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
> @@ -1917,6 +2024,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   	bool pr = false;
>   	u32 msr = msr_info->index;
>   	u64 data = msr_info->data;
> +	u64 supported = 0;
> +	static const struct lbr_info *lbr = NULL;
> +	int i = 0;
> +	int value = 0;
> 
>   	switch (msr) {
>   	case MSR_AMD64_NB_CFG:
> @@ -1948,16 +2059,34 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		}
>   		break;
>   	case MSR_IA32_DEBUGCTLMSR:
> -		if (!data) {
> -			/* We support the non-activated case already */
> -			break;
> -		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
> -			/* Values other than LBR and BTF are vendor-specific,
> -			   thus reserved and should throw a #GP */
> +		supported = DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
> +
> +		if (data & ~supported) {
> +			/* Values other than LBR, BTF and FREEZE_LBRS_ON_PMI are not supported,
> +			 * thus reserved and should throw a #GP */
> +			vcpu_unimpl(vcpu, "unsupported MSR_IA32_DEBUGCTLMSR wrmsr: 0x%llx\n", data);
>   			return 1;
>   		}
> -		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
> -			    __func__, data);
> +
> +		if (data & DEBUGCTLMSR_LBR) {
> +			lbr = last_branch_msr_get();
> +			if (lbr == NULL)
> +				break;
> +
> +			for (; (value == 0) && lbr->count; lbr++)
> +				for (i = 0; (value == 0) && (i < lbr->count); i++)
> +					if ((value = kvm_x86_ops->add_atomic_switch_msr(vcpu, lbr->base + i, 0, 0)) == 0)
> +						kvm_x86_ops->disable_intercept_guest_msr(vcpu, lbr->base + i);
> +		}
> +
> +		if (value == 0) {
> +			kvm_x86_ops->vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> +		}
> +		else {
> +			/* throw a #GP */
> +			return 1;
> +		}
> +
>   		break;
>   	case 0x200 ... 0x2ff:
>   		return kvm_mtrr_set_msr(vcpu, msr, data);
> @@ -2178,9 +2307,11 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>   int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   {
>   	switch (msr_info->index) {
> +	case MSR_IA32_DEBUGCTLMSR:
> +		msr_info->data = kvm_x86_ops->vmcs_read64(GUEST_IA32_DEBUGCTL);
> +		break;
>   	case MSR_IA32_PLATFORM_ID:
>   	case MSR_IA32_EBL_CR_POWERON:
> -	case MSR_IA32_DEBUGCTLMSR:
>   	case MSR_IA32_LASTBRANCHFROMIP:
>   	case MSR_IA32_LASTBRANCHTOIP:
>   	case MSR_IA32_LASTINTFROMIP:
> --
> 1.7.12.4
> 
> 
> 
> .
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html