2016-08-09 2:16 GMT+08:00 Radim Krčmář <rkrcmar@xxxxxxxxxx>: > msr bitmap can be used to avoid a VM exit (interception) on guest MSR > accesses. In some configurations of VMX controls, the guest can even > directly access host's x2APIC MSRs. See SDM 29.5 VIRTUALIZING MSR-BASED > APIC ACCESSES. > > L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI. > To do so, L1 would first trick KVM to disable all possible interceptions > by enabling APICv features and then would turn those features off; > nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would > not intercept previously enabled MSRs even though they were not safe > with the new configuration. > > Correctly re-enabling interceptions is not enough as a second bug would > still allow L1+L2 to access host's MSRs: msr bitmap was shared for all > VMCSs, so L1 could trigger a race to get the desired combination of msr > bitmap and VMX controls. > > This fix allocates a msr bitmap for every L1 VCPU, allows only safe > x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would > have to intercept everything anyway. > > Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap") > Reported-by: Jim Mattson <jmattson@xxxxxxxxxx> > Suggested-by: Wincy Van <fanwenyi0529@xxxxxxxxx> > Signed-off-by: Radim Krčmář <rkrcmar@xxxxxxxxxx> Reviewed-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx> > --- > arch/x86/kvm/vmx.c | 107 ++++++++++++++++++++++------------------------------- > 1 file changed, 44 insertions(+), 63 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index a45d8580f91e..c66ac2c70d22 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -435,6 +435,8 @@ struct nested_vmx { > bool pi_pending; > u16 posted_intr_nv; > > + unsigned long *msr_bitmap; > + > struct hrtimer preemption_timer; > bool preemption_timer_expired; > > @@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy; > static unsigned long *vmx_msr_bitmap_longmode; > static unsigned long *vmx_msr_bitmap_legacy_x2apic; > static unsigned long *vmx_msr_bitmap_longmode_x2apic; > -static unsigned long *vmx_msr_bitmap_nested; > static unsigned long *vmx_vmread_bitmap; > static unsigned long *vmx_vmwrite_bitmap; > > @@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) > unsigned long *msr_bitmap; > > if (is_guest_mode(vcpu)) > - msr_bitmap = vmx_msr_bitmap_nested; > + msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; > else if (cpu_has_secondary_exec_ctrls() && > (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & > SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { > @@ -6363,13 +6364,6 @@ static __init int hardware_setup(void) > if (!vmx_msr_bitmap_longmode_x2apic) > goto out4; > > - if (nested) { > - vmx_msr_bitmap_nested = > - (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_msr_bitmap_nested) > - goto out5; > - } > - > vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); > if (!vmx_vmread_bitmap) > goto out6; > @@ -6392,8 +6386,6 @@ static __init int hardware_setup(void) > > memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); > memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); > - if (nested) > - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); > > if (setup_vmcs_config(&vmcs_config) < 0) { > r = -EIO; > @@ -6529,9 +6521,6 @@ out8: > out7: > free_page((unsigned long)vmx_vmread_bitmap); > out6: > - if (nested) > - free_page((unsigned long)vmx_msr_bitmap_nested); > -out5: > free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); > out4: > free_page((unsigned long)vmx_msr_bitmap_longmode); > @@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void) > free_page((unsigned long)vmx_io_bitmap_a); > free_page((unsigned long)vmx_vmwrite_bitmap); > free_page((unsigned long)vmx_vmread_bitmap); > - if (nested) > - free_page((unsigned long)vmx_msr_bitmap_nested); > > free_kvm_area(); > } > @@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) > return 1; > } > > + if (cpu_has_vmx_msr_bitmap()) { > + vmx->nested.msr_bitmap = > + (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx->nested.msr_bitmap) > + goto out_msr_bitmap; > + } > + > vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); > if (!vmx->nested.cached_vmcs12) > - return -ENOMEM; > + goto out_cached_vmcs12; > > if (enable_shadow_vmcs) { > shadow_vmcs = alloc_vmcs(); > - if (!shadow_vmcs) { > - kfree(vmx->nested.cached_vmcs12); > - return -ENOMEM; > - } > + if (!shadow_vmcs) > + goto out_shadow_vmcs; > /* mark vmcs as shadow */ > shadow_vmcs->revision_id |= (1u << 31); > /* init shadow vmcs */ > @@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu) > skip_emulated_instruction(vcpu); > nested_vmx_succeed(vcpu); > return 1; > + > +out_shadow_vmcs: > + kfree(vmx->nested.cached_vmcs12); > + > +out_cached_vmcs12: > + free_page((unsigned long)vmx->nested.msr_bitmap); > + > +out_msr_bitmap: > + return -ENOMEM; > } > > /* > @@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx) > vmx->nested.vmxon = false; > free_vpid(vmx->nested.vpid02); > nested_release_vmcs12(vmx); > + if (vmx->nested.msr_bitmap) { > + free_page((unsigned long)vmx->nested.msr_bitmap); > + vmx->nested.msr_bitmap = NULL; > + } > if (enable_shadow_vmcs) > free_vmcs(vmx->nested.current_shadow_vmcs); > kfree(vmx->nested.cached_vmcs12); > @@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, > { > int msr; > struct page *page; > - unsigned long *msr_bitmap; > + unsigned long *msr_bitmap_l1; > + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; > > + /* This shortcut is ok because we support only x2APIC MSRs so far. */ > if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) > return false; > > @@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, > WARN_ON(1); > return false; > } > - msr_bitmap = (unsigned long *)kmap(page); > - if (!msr_bitmap) { > + msr_bitmap_l1 = (unsigned long *)kmap(page); > + if (!msr_bitmap_l1) { > nested_release_page_clean(page); > WARN_ON(1); > return false; > } > > + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); > + > if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { > if (nested_cpu_has_apic_reg_virt(vmcs12)) > for (msr = 0x800; msr <= 0x8ff; msr++) > nested_vmx_disable_intercept_for_msr( > - msr_bitmap, > - vmx_msr_bitmap_nested, > + msr_bitmap_l1, msr_bitmap_l0, > msr, MSR_TYPE_R); > - /* TPR is allowed */ > - nested_vmx_disable_intercept_for_msr(msr_bitmap, > - vmx_msr_bitmap_nested, > + > + nested_vmx_disable_intercept_for_msr( > + msr_bitmap_l1, msr_bitmap_l0, > APIC_BASE_MSR + (APIC_TASKPRI >> 4), > MSR_TYPE_R | MSR_TYPE_W); > + > if (nested_cpu_has_vid(vmcs12)) { > - /* EOI and self-IPI are allowed */ > nested_vmx_disable_intercept_for_msr( > - msr_bitmap, > - vmx_msr_bitmap_nested, > + msr_bitmap_l1, msr_bitmap_l0, > APIC_BASE_MSR + (APIC_EOI >> 4), > MSR_TYPE_W); > nested_vmx_disable_intercept_for_msr( > - msr_bitmap, > - vmx_msr_bitmap_nested, > + msr_bitmap_l1, msr_bitmap_l0, > APIC_BASE_MSR + (APIC_SELF_IPI >> 4), > MSR_TYPE_W); > } > - } else { > - /* > - * Enable reading intercept of all the x2apic > - * MSRs. We should not rely on vmcs12 to do any > - * optimizations here, it may have been modified > - * by L1. > - */ > - for (msr = 0x800; msr <= 0x8ff; msr++) > - __vmx_enable_intercept_for_msr( > - vmx_msr_bitmap_nested, > - msr, > - MSR_TYPE_R); > - > - __vmx_enable_intercept_for_msr( > - vmx_msr_bitmap_nested, > - APIC_BASE_MSR + (APIC_TASKPRI >> 4), > - MSR_TYPE_W); > - __vmx_enable_intercept_for_msr( > - vmx_msr_bitmap_nested, > - APIC_BASE_MSR + (APIC_EOI >> 4), > - MSR_TYPE_W); > - __vmx_enable_intercept_for_msr( > - vmx_msr_bitmap_nested, > - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), > - MSR_TYPE_W); > } > kunmap(page); > nested_release_page_clean(page); > @@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > } > > if (cpu_has_vmx_msr_bitmap() && > - exec_control & CPU_BASED_USE_MSR_BITMAPS) { > - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); > - /* MSR_BITMAP will be set by following vmx_set_efer. */ > - } else > + exec_control & CPU_BASED_USE_MSR_BITMAPS && > + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) > + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ > + else > exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; > > /* > -- > 2.9.2 > -- Regards, Wanpeng Li -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html