On 10/24/2014 11:18 AM, Tiejun Chen wrote: > Instead of vmx_init(), actually it would make reasonable sense to do > anything specific to vmx hardware setting in vmx_x86_ops->hardware_setup(). > > Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx> Please split this patch in multiple parts. It is quite hard to review this way. Paolo > --- > arch/x86/kvm/vmx.c | 720 +++++++++++++++++++++++++++-------------------------- > 1 file changed, 361 insertions(+), 359 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 04fa1b8..9270076 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -3106,10 +3106,302 @@ static __init int alloc_kvm_area(void) > return 0; > } > > +#define MSR_TYPE_R 1 > +#define MSR_TYPE_W 2 > +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, > + u32 msr, int type) > +{ > + int f = sizeof(unsigned long); > + > + if (!cpu_has_vmx_msr_bitmap()) > + return; > + > + /* > + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals > + * have the write-low and read-high bitmap offsets the wrong way round. > + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. > + */ > + if (msr <= 0x1fff) { > + if (type & MSR_TYPE_R) > + /* read-low */ > + __clear_bit(msr, msr_bitmap + 0x000 / f); > + > + if (type & MSR_TYPE_W) > + /* write-low */ > + __clear_bit(msr, msr_bitmap + 0x800 / f); > + > + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > + msr &= 0x1fff; > + if (type & MSR_TYPE_R) > + /* read-high */ > + __clear_bit(msr, msr_bitmap + 0x400 / f); > + > + if (type & MSR_TYPE_W) > + /* write-high */ > + __clear_bit(msr, msr_bitmap + 0xc00 / f); > + > + } > +} > + > +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, > + u32 msr, int type) > +{ > + int f = sizeof(unsigned long); > + > + if (!cpu_has_vmx_msr_bitmap()) > + return; > + > + /* > + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals > + * have the write-low and read-high bitmap offsets the wrong way round. > + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. > + */ > + if (msr <= 0x1fff) { > + if (type & MSR_TYPE_R) > + /* read-low */ > + __set_bit(msr, msr_bitmap + 0x000 / f); > + > + if (type & MSR_TYPE_W) > + /* write-low */ > + __set_bit(msr, msr_bitmap + 0x800 / f); > + > + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > + msr &= 0x1fff; > + if (type & MSR_TYPE_R) > + /* read-high */ > + __set_bit(msr, msr_bitmap + 0x400 / f); > + > + if (type & MSR_TYPE_W) > + /* write-high */ > + __set_bit(msr, msr_bitmap + 0xc00 / f); > + > + } > +} > + > +static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) > +{ > + if (!longmode_only) > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, > + msr, MSR_TYPE_R | MSR_TYPE_W); > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, > + msr, MSR_TYPE_R | MSR_TYPE_W); > +} > + > +static void vmx_enable_intercept_msr_read_x2apic(u32 msr) > +{ > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > + msr, MSR_TYPE_R); > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > + msr, MSR_TYPE_R); > +} > + > +static void vmx_disable_intercept_msr_read_x2apic(u32 msr) > +{ > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > + msr, MSR_TYPE_R); > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > + msr, MSR_TYPE_R); > +} > + > +static void vmx_disable_intercept_msr_write_x2apic(u32 msr) > +{ > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > + msr, MSR_TYPE_W); > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > + msr, MSR_TYPE_W); > +} > + > +static int vmx_vm_has_apicv(struct kvm *kvm) > +{ > + return enable_apicv && irqchip_in_kernel(kvm); > +} > + > +static void ept_set_mmio_spte_mask(void) > +{ > + /* > + * EPT Misconfigurations can be generated if the value of bits 2:0 > + * of an EPT paging-structure entry is 110b (write/execute). > + * Also, magic bits (0x3ull << 62) is set to quickly identify mmio > + * spte. > + */ > + kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); > +} > + > +static int __grow_ple_window(int val) > +{ > + if (ple_window_grow < 1) > + return ple_window; > + > + val = min(val, ple_window_actual_max); > + > + if (ple_window_grow < ple_window) > + val *= ple_window_grow; > + else > + val += ple_window_grow; > + > + return val; > +} > + > +static int __shrink_ple_window(int val, int modifier, int minimum) > +{ > + if (modifier < 1) > + return ple_window; > + > + if (modifier < ple_window) > + val /= modifier; > + else > + val -= modifier; > + > + return max(val, minimum); > +} > + > +static void grow_ple_window(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int old = vmx->ple_window; > + > + vmx->ple_window = __grow_ple_window(old); > + > + if (vmx->ple_window != old) > + vmx->ple_window_dirty = true; > + > + trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old); > +} > + > +static void shrink_ple_window(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int old = vmx->ple_window; > + > + vmx->ple_window = __shrink_ple_window(old, > + ple_window_shrink, ple_window); > + > + if (vmx->ple_window != old) > + vmx->ple_window_dirty = true; > + > + trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old); > +} > + > +/* > + * ple_window_actual_max is computed to be one grow_ple_window() below > + * ple_window_max. (See __grow_ple_window for the reason.) > + * This prevents overflows, because ple_window_max is int. > + * ple_window_max effectively rounded down to a multiple of ple_window_grow in > + * this process. > + * ple_window_max is also prevented from setting vmx->ple_window < ple_window. > + */ > +static void update_ple_window_actual_max(void) > +{ > + ple_window_actual_max = > + __shrink_ple_window(max(ple_window_max, ple_window), > + ple_window_grow, INT_MIN); > +} > + > + > static __init int hardware_setup(void) > { > - if (setup_vmcs_config(&vmcs_config) < 0) > - return -EIO; > + int r = -ENOMEM, i, msr; > + > + rdmsrl_safe(MSR_EFER, &host_efer); > + > + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) > + kvm_define_shared_msr(i, vmx_msr_index[i]); > + > + vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_io_bitmap_a) > + return r; > + > + vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_io_bitmap_b) > + goto out; > + > + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_msr_bitmap_legacy) > + goto out1; > + > + vmx_msr_bitmap_legacy_x2apic = > + (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_msr_bitmap_legacy_x2apic) > + goto out2; > + > + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_msr_bitmap_longmode) > + goto out3; > + > + vmx_msr_bitmap_longmode_x2apic = > + (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_msr_bitmap_longmode_x2apic) > + goto out4; > + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_vmread_bitmap) > + goto out5; > + > + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_vmwrite_bitmap) > + goto out6; > + > + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); > + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); > + > + /* > + * Allow direct access to the PC debug port (it is often used for I/O > + * delays, but the vmexits simply slow things down). > + */ > + memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); > + clear_bit(0x80, vmx_io_bitmap_a); > + > + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); > + > + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); > + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); > + > + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); > + vmx_disable_intercept_for_msr(MSR_GS_BASE, false); > + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); > + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); > + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); > + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); > + vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); > + > + memcpy(vmx_msr_bitmap_legacy_x2apic, > + vmx_msr_bitmap_legacy, PAGE_SIZE); > + memcpy(vmx_msr_bitmap_longmode_x2apic, > + vmx_msr_bitmap_longmode, PAGE_SIZE); > + > + if (enable_apicv) { > + for (msr = 0x800; msr <= 0x8ff; msr++) > + vmx_disable_intercept_msr_read_x2apic(msr); > + > + /* According SDM, in x2apic mode, the whole id reg is used. > + * But in KVM, it only use the highest eight bits. Need to > + * intercept it */ > + vmx_enable_intercept_msr_read_x2apic(0x802); > + /* TMCCT */ > + vmx_enable_intercept_msr_read_x2apic(0x839); > + /* TPR */ > + vmx_disable_intercept_msr_write_x2apic(0x808); > + /* EOI */ > + vmx_disable_intercept_msr_write_x2apic(0x80b); > + /* SELF-IPI */ > + vmx_disable_intercept_msr_write_x2apic(0x83f); > + } > + > + if (enable_ept) { > + kvm_mmu_set_mask_ptes(0ull, > + (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, > + (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, > + 0ull, VMX_EPT_EXECUTABLE_MASK); > + ept_set_mmio_spte_mask(); > + kvm_enable_tdp(); > + } else > + kvm_disable_tdp(); > + > + update_ple_window_actual_max(); > + > + if (setup_vmcs_config(&vmcs_config) < 0) { > + r = -EIO; > + goto out7; > + } > > if (boot_cpu_has(X86_FEATURE_NX)) > kvm_enable_efer_bits(EFER_NX); > @@ -3169,10 +3461,38 @@ static __init int hardware_setup(void) > nested_vmx_setup_ctls_msrs(); > > return alloc_kvm_area(); > + > +out7: > + free_page((unsigned long)vmx_vmwrite_bitmap); > +out6: > + free_page((unsigned long)vmx_vmread_bitmap); > +out5: > + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); > +out4: > + free_page((unsigned long)vmx_msr_bitmap_longmode); > +out3: > + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); > +out2: > + free_page((unsigned long)vmx_msr_bitmap_legacy); > +out1: > + free_page((unsigned long)vmx_io_bitmap_b); > +out: > + free_page((unsigned long)vmx_io_bitmap_a); > + > + return r; > } > > static __exit void hardware_unsetup(void) > { > + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); > + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); > + free_page((unsigned long)vmx_msr_bitmap_legacy); > + free_page((unsigned long)vmx_msr_bitmap_longmode); > + free_page((unsigned long)vmx_io_bitmap_b); > + free_page((unsigned long)vmx_io_bitmap_a); > + free_page((unsigned long)vmx_vmwrite_bitmap); > + free_page((unsigned long)vmx_vmread_bitmap); > + > free_kvm_area(); > } > > @@ -4057,162 +4377,52 @@ static int alloc_apic_access_page(struct kvm *kvm) > kvm->arch.apic_access_page_done = true; > out: > mutex_unlock(&kvm->slots_lock); > - return r; > -} > - > -static int alloc_identity_pagetable(struct kvm *kvm) > -{ > - /* Called with kvm->slots_lock held. */ > - > - struct kvm_userspace_memory_region kvm_userspace_mem; > - int r = 0; > - > - BUG_ON(kvm->arch.ept_identity_pagetable_done); > - > - kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; > - kvm_userspace_mem.flags = 0; > - kvm_userspace_mem.guest_phys_addr = > - kvm->arch.ept_identity_map_addr; > - kvm_userspace_mem.memory_size = PAGE_SIZE; > - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); > - > - return r; > -} > - > -static void allocate_vpid(struct vcpu_vmx *vmx) > -{ > - int vpid; > - > - vmx->vpid = 0; > - if (!enable_vpid) > - return; > - spin_lock(&vmx_vpid_lock); > - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); > - if (vpid < VMX_NR_VPIDS) { > - vmx->vpid = vpid; > - __set_bit(vpid, vmx_vpid_bitmap); > - } > - spin_unlock(&vmx_vpid_lock); > -} > - > -static void free_vpid(struct vcpu_vmx *vmx) > -{ > - if (!enable_vpid) > - return; > - spin_lock(&vmx_vpid_lock); > - if (vmx->vpid != 0) > - __clear_bit(vmx->vpid, vmx_vpid_bitmap); > - spin_unlock(&vmx_vpid_lock); > -} > - > -#define MSR_TYPE_R 1 > -#define MSR_TYPE_W 2 > -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, > - u32 msr, int type) > -{ > - int f = sizeof(unsigned long); > - > - if (!cpu_has_vmx_msr_bitmap()) > - return; > - > - /* > - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals > - * have the write-low and read-high bitmap offsets the wrong way round. > - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. > - */ > - if (msr <= 0x1fff) { > - if (type & MSR_TYPE_R) > - /* read-low */ > - __clear_bit(msr, msr_bitmap + 0x000 / f); > - > - if (type & MSR_TYPE_W) > - /* write-low */ > - __clear_bit(msr, msr_bitmap + 0x800 / f); > - > - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > - msr &= 0x1fff; > - if (type & MSR_TYPE_R) > - /* read-high */ > - __clear_bit(msr, msr_bitmap + 0x400 / f); > - > - if (type & MSR_TYPE_W) > - /* write-high */ > - __clear_bit(msr, msr_bitmap + 0xc00 / f); > - > - } > -} > - > -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, > - u32 msr, int type) > -{ > - int f = sizeof(unsigned long); > - > - if (!cpu_has_vmx_msr_bitmap()) > - return; > - > - /* > - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals > - * have the write-low and read-high bitmap offsets the wrong way round. > - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. > - */ > - if (msr <= 0x1fff) { > - if (type & MSR_TYPE_R) > - /* read-low */ > - __set_bit(msr, msr_bitmap + 0x000 / f); > - > - if (type & MSR_TYPE_W) > - /* write-low */ > - __set_bit(msr, msr_bitmap + 0x800 / f); > - > - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > - msr &= 0x1fff; > - if (type & MSR_TYPE_R) > - /* read-high */ > - __set_bit(msr, msr_bitmap + 0x400 / f); > - > - if (type & MSR_TYPE_W) > - /* write-high */ > - __set_bit(msr, msr_bitmap + 0xc00 / f); > - > - } > -} > - > -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) > -{ > - if (!longmode_only) > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, > - msr, MSR_TYPE_R | MSR_TYPE_W); > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, > - msr, MSR_TYPE_R | MSR_TYPE_W); > -} > - > -static void vmx_enable_intercept_msr_read_x2apic(u32 msr) > -{ > - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > - msr, MSR_TYPE_R); > - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > - msr, MSR_TYPE_R); > + return r; > } > > -static void vmx_disable_intercept_msr_read_x2apic(u32 msr) > +static int alloc_identity_pagetable(struct kvm *kvm) > { > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > - msr, MSR_TYPE_R); > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > - msr, MSR_TYPE_R); > + /* Called with kvm->slots_lock held. */ > + > + struct kvm_userspace_memory_region kvm_userspace_mem; > + int r = 0; > + > + BUG_ON(kvm->arch.ept_identity_pagetable_done); > + > + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; > + kvm_userspace_mem.flags = 0; > + kvm_userspace_mem.guest_phys_addr = > + kvm->arch.ept_identity_map_addr; > + kvm_userspace_mem.memory_size = PAGE_SIZE; > + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); > + > + return r; > } > > -static void vmx_disable_intercept_msr_write_x2apic(u32 msr) > +static void allocate_vpid(struct vcpu_vmx *vmx) > { > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > - msr, MSR_TYPE_W); > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > - msr, MSR_TYPE_W); > + int vpid; > + > + vmx->vpid = 0; > + if (!enable_vpid) > + return; > + spin_lock(&vmx_vpid_lock); > + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); > + if (vpid < VMX_NR_VPIDS) { > + vmx->vpid = vpid; > + __set_bit(vpid, vmx_vpid_bitmap); > + } > + spin_unlock(&vmx_vpid_lock); > } > > -static int vmx_vm_has_apicv(struct kvm *kvm) > +static void free_vpid(struct vcpu_vmx *vmx) > { > - return enable_apicv && irqchip_in_kernel(kvm); > + if (!enable_vpid) > + return; > + spin_lock(&vmx_vpid_lock); > + if (vmx->vpid != 0) > + __clear_bit(vmx->vpid, vmx_vpid_bitmap); > + spin_unlock(&vmx_vpid_lock); > } > > /* > @@ -4376,17 +4586,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > return exec_control; > } > > -static void ept_set_mmio_spte_mask(void) > -{ > - /* > - * EPT Misconfigurations can be generated if the value of bits 2:0 > - * of an EPT paging-structure entry is 110b (write/execute). > - * Also, magic bits (0x3ull << 62) is set to quickly identify mmio > - * spte. > - */ > - kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); > -} > - > /* > * Sets up the vmcs for emulated real mode. > */ > @@ -5706,76 +5905,6 @@ out: > return ret; > } > > -static int __grow_ple_window(int val) > -{ > - if (ple_window_grow < 1) > - return ple_window; > - > - val = min(val, ple_window_actual_max); > - > - if (ple_window_grow < ple_window) > - val *= ple_window_grow; > - else > - val += ple_window_grow; > - > - return val; > -} > - > -static int __shrink_ple_window(int val, int modifier, int minimum) > -{ > - if (modifier < 1) > - return ple_window; > - > - if (modifier < ple_window) > - val /= modifier; > - else > - val -= modifier; > - > - return max(val, minimum); > -} > - > -static void grow_ple_window(struct kvm_vcpu *vcpu) > -{ > - struct vcpu_vmx *vmx = to_vmx(vcpu); > - int old = vmx->ple_window; > - > - vmx->ple_window = __grow_ple_window(old); > - > - if (vmx->ple_window != old) > - vmx->ple_window_dirty = true; > - > - trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old); > -} > - > -static void shrink_ple_window(struct kvm_vcpu *vcpu) > -{ > - struct vcpu_vmx *vmx = to_vmx(vcpu); > - int old = vmx->ple_window; > - > - vmx->ple_window = __shrink_ple_window(old, > - ple_window_shrink, ple_window); > - > - if (vmx->ple_window != old) > - vmx->ple_window_dirty = true; > - > - trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old); > -} > - > -/* > - * ple_window_actual_max is computed to be one grow_ple_window() below > - * ple_window_max. (See __grow_ple_window for the reason.) > - * This prevents overflows, because ple_window_max is int. > - * ple_window_max effectively rounded down to a multiple of ple_window_grow in > - * this process. > - * ple_window_max is also prevented from setting vmx->ple_window < ple_window. > - */ > -static void update_ple_window_actual_max(void) > -{ > - ple_window_actual_max = > - __shrink_ple_window(max(ple_window_max, ple_window), > - ple_window_grow, INT_MIN); > -} > - > /* > * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE > * exiting, so only get here on cpu with PAUSE-Loop-Exiting. > @@ -9158,150 +9287,23 @@ static struct kvm_x86_ops vmx_x86_ops = { > > static int __init vmx_init(void) > { > - int r, i, msr; > - > - rdmsrl_safe(MSR_EFER, &host_efer); > - > - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) > - kvm_define_shared_msr(i, vmx_msr_index[i]); > - > - vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_io_bitmap_a) > - return -ENOMEM; > - > - r = -ENOMEM; > - > - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_io_bitmap_b) > - goto out; > - > - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_msr_bitmap_legacy) > - goto out1; > - > - vmx_msr_bitmap_legacy_x2apic = > - (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_msr_bitmap_legacy_x2apic) > - goto out2; > - > - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_msr_bitmap_longmode) > - goto out3; > - > - vmx_msr_bitmap_longmode_x2apic = > - (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_msr_bitmap_longmode_x2apic) > - goto out4; > - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_vmread_bitmap) > - goto out5; > - > - vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); > - if (!vmx_vmwrite_bitmap) > - goto out6; > - > - memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); > - memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); > - > - /* > - * Allow direct access to the PC debug port (it is often used for I/O > - * delays, but the vmexits simply slow things down). > - */ > - memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); > - clear_bit(0x80, vmx_io_bitmap_a); > - > - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); > - > - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); > - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); > - > - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ > + int r = -ENOMEM; > > r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), > __alignof__(struct vcpu_vmx), THIS_MODULE); > if (r) > - goto out7; > + return r; > > #ifdef CONFIG_KEXEC > rcu_assign_pointer(crash_vmclear_loaded_vmcss, > crash_vmclear_local_loaded_vmcss); > #endif > > - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); > - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); > - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); > - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); > - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); > - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); > - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); > - > - memcpy(vmx_msr_bitmap_legacy_x2apic, > - vmx_msr_bitmap_legacy, PAGE_SIZE); > - memcpy(vmx_msr_bitmap_longmode_x2apic, > - vmx_msr_bitmap_longmode, PAGE_SIZE); > - > - if (enable_apicv) { > - for (msr = 0x800; msr <= 0x8ff; msr++) > - vmx_disable_intercept_msr_read_x2apic(msr); > - > - /* According SDM, in x2apic mode, the whole id reg is used. > - * But in KVM, it only use the highest eight bits. Need to > - * intercept it */ > - vmx_enable_intercept_msr_read_x2apic(0x802); > - /* TMCCT */ > - vmx_enable_intercept_msr_read_x2apic(0x839); > - /* TPR */ > - vmx_disable_intercept_msr_write_x2apic(0x808); > - /* EOI */ > - vmx_disable_intercept_msr_write_x2apic(0x80b); > - /* SELF-IPI */ > - vmx_disable_intercept_msr_write_x2apic(0x83f); > - } > - > - if (enable_ept) { > - kvm_mmu_set_mask_ptes(0ull, > - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, > - (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, > - 0ull, VMX_EPT_EXECUTABLE_MASK); > - ept_set_mmio_spte_mask(); > - kvm_enable_tdp(); > - } else > - kvm_disable_tdp(); > - > - update_ple_window_actual_max(); > - > return 0; > - > -out7: > - free_page((unsigned long)vmx_vmwrite_bitmap); > -out6: > - free_page((unsigned long)vmx_vmread_bitmap); > -out5: > - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); > -out4: > - free_page((unsigned long)vmx_msr_bitmap_longmode); > -out3: > - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); > -out2: > - free_page((unsigned long)vmx_msr_bitmap_legacy); > -out1: > - free_page((unsigned long)vmx_io_bitmap_b); > -out: > - free_page((unsigned long)vmx_io_bitmap_a); > - return r; > } > > static void __exit vmx_exit(void) > { > - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); > - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); > - free_page((unsigned long)vmx_msr_bitmap_legacy); > - free_page((unsigned long)vmx_msr_bitmap_longmode); > - free_page((unsigned long)vmx_io_bitmap_b); > - free_page((unsigned long)vmx_io_bitmap_a); > - free_page((unsigned long)vmx_vmwrite_bitmap); > - free_page((unsigned long)vmx_vmread_bitmap); > - > #ifdef CONFIG_KEXEC > RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); > synchronize_rcu(); > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html