On 6/12/20 1:02 PM, Jim Mattson wrote: > On Thu, Jun 11, 2020 at 2:48 PM Babu Moger <babu.moger@xxxxxxx> wrote: >> >> INVPCID instruction handling is mostly same across both VMX and >> SVM. So, move the code to common x86.c. >> >> Signed-off-by: Babu Moger <babu.moger@xxxxxxx> >> --- >> arch/x86/kvm/vmx/vmx.c | 78 +----------------------------------------- >> arch/x86/kvm/x86.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ >> arch/x86/kvm/x86.h | 2 + >> 3 files changed, 92 insertions(+), 77 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >> index 170cc76a581f..d9c35f337da6 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -5477,29 +5477,15 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) >> { >> u32 vmx_instruction_info; >> unsigned long type; >> - bool pcid_enabled; >> gva_t gva; >> - struct x86_exception e; >> - unsigned i; >> - unsigned long roots_to_free = 0; >> struct { >> u64 pcid; >> u64 gla; >> } operand; >> >> - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { >> - kvm_queue_exception(vcpu, UD_VECTOR); >> - return 1; >> - } >> - >> vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); >> type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); >> >> - if (type > 3) { >> - kvm_inject_gp(vcpu, 0); >> - return 1; >> - } >> - > > You've introduced some fault priority inversions by sinking the above > tests for #UD and #GP below the call to get_vmx_mem_address(), which > may raise #UD, #GP, or #SS. oh. Ok. I will restore the old order back. Thanks for spotting it. > >> /* According to the Intel instruction reference, the memory operand >> * is read even if it isn't needed (e.g., for type==all) >> */ >> @@ -5508,69 +5494,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) >> sizeof(operand), &gva)) >> return 1; >> >> - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { >> - kvm_inject_emulated_page_fault(vcpu, &e); >> - return 1; >> - } >> - >> - if (operand.pcid >> 12 != 0) { >> - kvm_inject_gp(vcpu, 0); >> - return 1; >> - } >> - >> - pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); >> - >> - switch (type) { >> - case INVPCID_TYPE_INDIV_ADDR: >> - if ((!pcid_enabled && (operand.pcid != 0)) || >> - is_noncanonical_address(operand.gla, vcpu)) { >> - kvm_inject_gp(vcpu, 0); >> - return 1; >> - } >> - kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); >> - return kvm_skip_emulated_instruction(vcpu); >> - >> - case INVPCID_TYPE_SINGLE_CTXT: >> - if (!pcid_enabled && (operand.pcid != 0)) { >> - kvm_inject_gp(vcpu, 0); >> - return 1; >> - } >> - >> - if (kvm_get_active_pcid(vcpu) == operand.pcid) { >> - kvm_mmu_sync_roots(vcpu); >> - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); >> - } >> - >> - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) >> - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd) >> - == operand.pcid) >> - roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); >> - >> - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); >> - /* >> - * If neither the current cr3 nor any of the prev_roots use the >> - * given PCID, then nothing needs to be done here because a >> - * resync will happen anyway before switching to any other CR3. >> - */ >> - >> - return kvm_skip_emulated_instruction(vcpu); >> - >> - case INVPCID_TYPE_ALL_NON_GLOBAL: >> - /* >> - * Currently, KVM doesn't mark global entries in the shadow >> - * page tables, so a non-global flush just degenerates to a >> - * global flush. If needed, we could optimize this later by >> - * keeping track of global entries in shadow page tables. >> - */ >> - >> - /* fall-through */ >> - case INVPCID_TYPE_ALL_INCL_GLOBAL: >> - kvm_mmu_unload(vcpu); >> - return kvm_skip_emulated_instruction(vcpu); >> - >> - default: >> - BUG(); /* We have already checked above that type <= 3 */ >> - } >> + return kvm_handle_invpcid_types(vcpu, gva, type); >> } >> >> static int handle_pml_full(struct kvm_vcpu *vcpu) >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 9e41b5135340..13373359608c 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -72,6 +72,7 @@ >> #include <asm/hypervisor.h> >> #include <asm/intel_pt.h> >> #include <asm/emulate_prefix.h> >> +#include <asm/tlbflush.h> >> #include <clocksource/hyperv_timer.h> >> >> #define CREATE_TRACE_POINTS >> @@ -10714,6 +10715,94 @@ u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu) >> } >> EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits); >> >> +int kvm_handle_invpcid_types(struct kvm_vcpu *vcpu, gva_t gva, >> + unsigned long type) >> +{ >> + unsigned long roots_to_free = 0; >> + struct x86_exception e; >> + bool pcid_enabled; >> + unsigned i; >> + struct { >> + u64 pcid; >> + u64 gla; >> + } operand; >> + >> + if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { >> + kvm_queue_exception(vcpu, UD_VECTOR); >> + return 1; >> + } >> + >> + if (type > 3) { >> + kvm_inject_gp(vcpu, 0); >> + return 1; >> + } >> + >> + if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { >> + kvm_inject_emulated_page_fault(vcpu, &e); >> + return 1; >> + } >> + >> + if (operand.pcid >> 12 != 0) { >> + kvm_inject_gp(vcpu, 0); >> + return 1; >> + } >> + >> + pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); >> + >> + switch (type) { >> + case INVPCID_TYPE_INDIV_ADDR: >> + if ((!pcid_enabled && (operand.pcid != 0)) || >> + is_noncanonical_address(operand.gla, vcpu)) { >> + kvm_inject_gp(vcpu, 0); >> + return 1; >> + } >> + kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); >> + return kvm_skip_emulated_instruction(vcpu); >> + >> + case INVPCID_TYPE_SINGLE_CTXT: >> + if (!pcid_enabled && (operand.pcid != 0)) { >> + kvm_inject_gp(vcpu, 0); >> + return 1; >> + } >> + >> + if (kvm_get_active_pcid(vcpu) == operand.pcid) { >> + kvm_mmu_sync_roots(vcpu); >> + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); >> + } >> + >> + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) >> + if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd) >> + == operand.pcid) >> + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); >> + >> + kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); >> + /* >> + * If neither the current cr3 nor any of the prev_roots use the >> + * given PCID, then nothing needs to be done here because a >> + * resync will happen anyway before switching to any other CR3. >> + */ >> + >> + return kvm_skip_emulated_instruction(vcpu); >> + >> + case INVPCID_TYPE_ALL_NON_GLOBAL: >> + /* >> + * Currently, KVM doesn't mark global entries in the shadow >> + * page tables, so a non-global flush just degenerates to a >> + * global flush. If needed, we could optimize this later by >> + * keeping track of global entries in shadow page tables. >> + */ >> + >> + /* fall-through */ >> + case INVPCID_TYPE_ALL_INCL_GLOBAL: >> + kvm_mmu_unload(vcpu); >> + return kvm_skip_emulated_instruction(vcpu); >> + >> + default: >> + BUG(); /* We have already checked above that type <= 3 */ >> + } >> +} >> +EXPORT_SYMBOL_GPL(kvm_handle_invpcid_types); >> + >> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); >> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); >> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); >> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h >> index 6eb62e97e59f..8e23f2705344 100644 >> --- a/arch/x86/kvm/x86.h >> +++ b/arch/x86/kvm/x86.h >> @@ -365,5 +365,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); >> void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); >> u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu); >> bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu); >> +int kvm_handle_invpcid_types(struct kvm_vcpu *vcpu, gva_t gva, >> + unsigned long type); >> >> #endif >>