Re: [PATCH v9 7/7] KVM: X86: Add user-space access interface for CET MSRs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Subject should be something like "Enable CET virtualization", or maybe
move CPUID changes to a separate final patch?

On Fri, Dec 27, 2019 at 10:11:33AM +0800, Yang Weijiang wrote:
> There're two different places storing Guest CET states, states
> managed with XSAVES/XRSTORS, as restored/saved
> in previous patch, can be read/write directly from/to the MSRs.
> For those stored in VMCS fields, they're access via vmcs_read/
> vmcs_write.
> 
> Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |   3 +-
>  arch/x86/kvm/cpuid.c            |   5 +-
>  arch/x86/kvm/vmx/vmx.c          | 138 ++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c              |  11 +++
>  4 files changed, 154 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 64bf379381e4..34140462084f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -90,7 +90,8 @@
>  			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
>  			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
>  			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
> -			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
> +			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
> +			  | X86_CR4_CET))
>  
>  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
>  
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 126a31b99823..4414bd110f3c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -385,13 +385,14 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
>  		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
>  		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
>  		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
> -		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
> +		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | F(SHSTK) |
> +		0 /*WAITPKG*/;
>  
>  	/* cpuid 7.0.edx*/
>  	const u32 kvm_cpuid_7_0_edx_x86_features =
>  		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
>  		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
> -		F(MD_CLEAR);
> +		F(MD_CLEAR) | F(IBT);
>  
>  	/* cpuid 7.1.eax */
>  	const u32 kvm_cpuid_7_1_eax_x86_features =
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0a75b65d03f0..52ac67604026 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1763,6 +1763,96 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
>  	return 0;
>  }
>  
> +#define CET_MSR_RSVD_BITS_1    0x3
> +#define CET_MSR_RSVD_BITS_2   (0xF << 6)

Would it make sense to use GENMASK?

> +static bool cet_ssp_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	u64 data = msr->data;
> +	u32 high_word = data >> 32;
> +
> +	if (is_64_bit_mode(vcpu)) {
> +		if (data & CET_MSR_RSVD_BITS_1)

This looks odd.  I assume it should look more like cet_ctl_write_allowed()?
E.g.

	if (data & CET_MSR_RSVD_BITS_1)
		return false;

	if (!is_64_bit_mode(vcpu) && high_word)
		return false;

> +			return false;
> +	} else if (high_word) {
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static bool cet_ctl_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	u64 data = msr->data;
> +	u32 high_word = data >> 32;
> +
> +	if (data & CET_MSR_RSVD_BITS_2)
> +		return false;
> +
> +	if (!is_64_bit_mode(vcpu) && high_word)
> +		return false;
> +
> +	return true;
> +}
> +
> +static bool cet_ssp_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	u64 kvm_xss;
> +	u32 index = msr->index;
> +
> +	if (is_guest_mode(vcpu))

Hmm, this seems wrong, e.g. shouldn't WRMSR be allowed if L1 passes the MSR
to L2, which is the only way to reach this, if I'm not mistaken.

> +		return false;
> +
> +	if (!boot_cpu_has(X86_FEATURE_SHSTK))
> +		return false;
> +
> +	if (!msr->host_initiated &&
> +	    !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
> +		return false;
> +
> +	if (index == MSR_IA32_INT_SSP_TAB)
> +		return true;
> +
> +	kvm_xss = kvm_supported_xss();
> +
> +	if (index == MSR_IA32_PL3_SSP) {
> +		if (!(kvm_xss & XFEATURE_MASK_CET_USER))
> +			return false;
> +	} else if (!(kvm_xss & XFEATURE_MASK_CET_KERNEL)) {
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static bool cet_ctl_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	u64 kvm_xss;
> +	u32 index = msr->index;
> +
> +	if (is_guest_mode(vcpu))
> +		return false;
> +
> +	kvm_xss = kvm_supported_xss();
> +
> +	if (!boot_cpu_has(X86_FEATURE_SHSTK) &&
> +	    !boot_cpu_has(X86_FEATURE_IBT))
> +		return false;
> +
> +	if (!msr->host_initiated &&
> +	    !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) &&
> +	    !guest_cpuid_has(vcpu, X86_FEATURE_IBT))
> +		return false;
> +
> +	if (index == MSR_IA32_U_CET) {
> +		if (!(kvm_xss & XFEATURE_MASK_CET_USER))
> +			return false;
> +	} else if (!(kvm_xss & XFEATURE_MASK_CET_KERNEL)) {
> +		return false;
> +	}
> +
> +	return true;
> +}
>  /*
>   * Reads an msr value (of 'msr_index') into 'pdata'.
>   * Returns 0 on success, non-0 otherwise.
> @@ -1886,6 +1976,26 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
>  		break;
> +	case MSR_IA32_S_CET:
> +		if (!cet_ctl_access_allowed(vcpu, msr_info))
> +			return 1;
> +		msr_info->data = vmcs_readl(GUEST_S_CET);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!cet_ssp_access_allowed(vcpu, msr_info))
> +			return 1;
> +		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
> +		break;
> +	case MSR_IA32_U_CET:
> +		if (!cet_ctl_access_allowed(vcpu, msr_info))
> +			return 1;
> +		rdmsrl(MSR_IA32_U_CET, msr_info->data);
> +		break;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		if (!cet_ssp_access_allowed(vcpu, msr_info))
> +			return 1;
> +		rdmsrl(msr_info->index, msr_info->data);

Ugh, thought of another problem.  If a SoftIRQ runs after an IRQ it can
load the kernel FPU state.  So for all the XSAVES MSRs we'll need a helper
similar to vmx_write_guest_kernel_gs_base(), except XSAVES has to be even
more restrictive and disable IRQs entirely.  E.g.

static void vmx_get_xsave_msr(struct msr_data *msr_info)
{
	local_irq_disable();
	if (test_thread_flag(TIF_NEED_FPU_LOAD))
		switch_fpu_return();
	rdmsrl(msr_info->index, msr_info->data);
	local_irq_enable();
}

> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> @@ -2147,6 +2257,34 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			vmx->pt_desc.guest.addr_a[index / 2] = data;
>  		break;
> +	case MSR_IA32_S_CET:
> +		if (!cet_ctl_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!cet_ctl_write_allowed(vcpu, msr_info))
> +			return 1;
> +		vmcs_writel(GUEST_S_CET, data);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!cet_ctl_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!is_64_bit_mode(vcpu))
> +			return 1;
> +		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
> +		break;
> +	case MSR_IA32_U_CET:
> +		if (!cet_ctl_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!cet_ctl_write_allowed(vcpu, msr_info))
> +			return 1;
> +		wrmsrl(MSR_IA32_U_CET, data);
> +		break;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		if (!cet_ssp_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!cet_ssp_write_allowed(vcpu, msr_info))
> +			return 1;
> +		wrmsrl(msr_info->index, data);
> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6dbe77365b22..7de6faa6aa51 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1186,6 +1186,10 @@ static const u32 msrs_to_save_all[] = {
>  	MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
>  	MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
>  	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
> +
> +	MSR_IA32_XSS, MSR_IA32_U_CET, MSR_IA32_S_CET,
> +	MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
> +	MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
>  };
>  
>  static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
> @@ -1468,6 +1472,13 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  		 * invokes 64-bit SYSENTER.
>  		 */
>  		data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
> +		break;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +	case MSR_IA32_U_CET:
> +	case MSR_IA32_S_CET:
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (is_noncanonical_address(data, vcpu))
> +			return 1;
>  	}
>  
>  	msr.data = data;
> -- 
> 2.17.2
> 



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux