Loading CR3 as part of emulating vmentry is different from regular CR3 loads, as implemented in kvm_set_cr3, in several ways. * different rules are followed to check CR3 and it is desirable for the caller to distinguish between the possible failures * PDPTRs are not loaded if PAE paging and nested EPT are both enabled * many MMU operations are not necessary This patch introduces nested_vmx_load_cr3 suitable for CR3 loads as part of nested vmentry and vmexit, and makes use of it on the nested vmentry path. Signed-off-by: Ladi Prosek <lprosek@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 59 +++++++++++++++++++++++++++++------------ arch/x86/kvm/x86.c | 3 ++- 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bdde807..9ec06a3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1062,6 +1062,7 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); +bool pdptrs_changed(struct kvm_vcpu *vcpu); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70598a1..58dc7f1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9796,6 +9796,44 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) } /* + * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are + * emulating VM entry into a guest with EPT enabled. + * Returns 0 on success, 1 on failure. Invalid state exit qualification code + * is assigned to entry_failure_code on failure. + */ +static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, + unsigned long *entry_failure_code) +{ + unsigned long invalid_mask; + + if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { + invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu); + if (cr3 & invalid_mask) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; + return 1; + } + + /* + * If PAE paging and EPT are both on, CR3 is not used by the CPU and + * must not be dereferenced. + */ + if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) && + !nested_ept) { + if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { + *entry_failure_code = ENTRY_FAIL_PDPTE; + return 1; + } + } + + vcpu->arch.cr3 = cr3; + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + } + + kvm_mmu_reset_context(vcpu); + return 0; +} + +/* * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 @@ -10118,23 +10156,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_set_cr4(vcpu, vmcs12->guest_cr4); vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); - /* - * Shadow page tables on either EPT or shadow page tables. - * If PAE and EPT are both on, CR3 is not used by the CPU and must not - * be dereferenced. - */ - if (is_pae(vcpu) && is_paging(vcpu) && !is_long_mode(vcpu) && - nested_ept_enabled) { - vcpu->arch.cr3 = vmcs12->guest_cr3; - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); - } else { - if (kvm_set_cr3(vcpu, vmcs12->guest_cr3)) { - *entry_failure_code = ENTRY_FAIL_DEFAULT; - return 1; - } - } - - kvm_mmu_reset_context(vcpu); + /* Shadow page tables on either EPT or shadow page tables. */ + if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled, + entry_failure_code)) + return 1; if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 78f6061..e4e06f9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -573,7 +573,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) } EXPORT_SYMBOL_GPL(load_pdptrs); -static bool pdptrs_changed(struct kvm_vcpu *vcpu) +bool pdptrs_changed(struct kvm_vcpu *vcpu) { u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)]; bool changed = true; @@ -599,6 +599,7 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) return changed; } +EXPORT_SYMBOL_GPL(pdptrs_changed); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html