This patch exposes 5 level page table feature to the VM, at the same time, the canonical virtual address checking is extended to support both 48-bits and 57-bits address width, it's the prerequisite to support 5 level paging guest. Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: "Radim Krčmář" <rkrcmar@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 12 ++++++------ arch/x86/kvm/cpuid.c | 15 +++++++++------ arch/x86/kvm/emulate.c | 15 ++++++++++----- arch/x86/kvm/kvm_cache_regs.h | 7 ++++++- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 8 ++++++-- 6 files changed, 39 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e505dac..57850b3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -86,8 +86,8 @@ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ - | X86_CR4_PKE)) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ + | X86_CR4_SMAP | X86_CR4_PKE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -1269,15 +1269,15 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -static inline u64 get_canonical(u64 la) +static inline u64 get_canonical(u64 la, u8 vaddr_bits) { - return ((int64_t)la << 16) >> 16; + return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits); } -static inline bool is_noncanonical_address(u64 la) +static inline bool is_noncanonical_address(u64 la, u8 vaddr_bits) { #ifdef CONFIG_X86_64 - return get_canonical(la) != la; + return get_canonical(la, vaddr_bits) != la; #else return false; #endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e85f6bd..69e8c1a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -126,13 +126,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) kvm_x86_ops->fpu_activate(vcpu); /* - * The existing code assumes virtual address is 48-bit in the canonical - * address checks; exit if it is ever changed. + * The existing code assumes virtual address is 48-bit or 57-bit in the + * canonical address checks; exit if it is ever changed. */ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best && ((best->eax & 0xff00) >> 8) != 48 && - ((best->eax & 0xff00) >> 8) != 0) - return -EINVAL; + if (best) { + int vaddr_bits = (best->eax & 0xff00) >> 8; + + if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0) + return -EINVAL; + } /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); @@ -383,7 +386,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/; + F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/; /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 56628a4..da01dd7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -676,6 +676,11 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size) } } +static __always_inline u8 virt_addr_bits(struct x86_emulate_ctxt *ctxt) +{ + return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48; +} + static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, unsigned *max_size, unsigned size, @@ -693,7 +698,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, switch (mode) { case X86EMUL_MODE_PROT64: *linear = la; - if (is_noncanonical_address(la)) + if (is_noncanonical_address(la, virt_addr_bits(ctxt))) goto bad; *max_size = min_t(u64, ~0u, (1ull << 48) - la); @@ -1721,7 +1726,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; if (is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32))) + ((u64)base3 << 32), virt_addr_bits(ctxt))) return emulate_gp(ctxt, 0); } load: @@ -2796,8 +2801,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (is_noncanonical_address(rcx) || - is_noncanonical_address(rdx)) + if (is_noncanonical_address(rcx, virt_addr_bits(ctxt)) || + is_noncanonical_address(rdx, virt_addr_bits(ctxt))) return emulate_gp(ctxt, 0); break; } @@ -3712,7 +3717,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - is_noncanonical_address(desc_ptr.address)) + is_noncanonical_address(desc_ptr.address, virt_addr_bits(ctxt))) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 762cdf2..5daf75f 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -4,7 +4,7 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, enum kvm_reg reg) @@ -78,6 +78,11 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) return kvm_read_cr4_bits(vcpu, ~0UL); } +static inline u8 get_virt_addr_bits(struct kvm_vcpu *vcpu) +{ + return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; +} + static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) { return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfc9f0a..183a53e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -123,7 +123,7 @@ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) @@ -7017,7 +7017,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * non-canonical form. This is the only check on the memory * destination for long mode! */ - exn = is_noncanonical_address(*ret); + exn = is_noncanonical_address(*ret, get_virt_addr_bits(vcpu)); } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 51ccfe0..b935658 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -762,6 +762,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE)) return 1; + if (!guest_cpuid_has_la57(vcpu) && (cr4 & X86_CR4_LA57)) + return 1; + if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) return 1; @@ -1074,7 +1077,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_KERNEL_GS_BASE: case MSR_CSTAR: case MSR_LSTAR: - if (is_noncanonical_address(msr->data)) + if (is_noncanonical_address(msr->data, + get_virt_addr_bits(vcpu))) return 1; break; case MSR_IA32_SYSENTER_EIP: @@ -1091,7 +1095,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER. */ - msr->data = get_canonical(msr->data); + msr->data = get_canonical(msr->data, get_virt_addr_bits(vcpu)); } return kvm_x86_ops->set_msr(vcpu, msr); } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html