Re: [PATCH 1/3] KVM: X86: Move handling of INVPCID types to x86

Jim Mattson <jmattson@xxxxxxxxxx> · Fri, 12 Jun 2020 11:02:28 -0700

On Thu, Jun 11, 2020 at 2:48 PM Babu Moger <babu.moger@xxxxxxx> wrote:
>
> INVPCID instruction handling is mostly same across both VMX and
> SVM. So, move the code to common x86.c.
>
> Signed-off-by: Babu Moger <babu.moger@xxxxxxx>
> ---
>  arch/x86/kvm/vmx/vmx.c |   78 +-----------------------------------------
>  arch/x86/kvm/x86.c     |   89 ++++++++++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.h     |    2 +
>  3 files changed, 92 insertions(+), 77 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 170cc76a581f..d9c35f337da6 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5477,29 +5477,15 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
>  {
>         u32 vmx_instruction_info;
>         unsigned long type;
> -       bool pcid_enabled;
>         gva_t gva;
> -       struct x86_exception e;
> -       unsigned i;
> -       unsigned long roots_to_free = 0;
>         struct {
>                 u64 pcid;
>                 u64 gla;
>         } operand;
>
> -       if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
> -               kvm_queue_exception(vcpu, UD_VECTOR);
> -               return 1;
> -       }
> -
>         vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
>         type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
>
> -       if (type > 3) {
> -               kvm_inject_gp(vcpu, 0);
> -               return 1;
> -       }
> -

You've introduced some fault priority inversions by sinking the above
tests for #UD and #GP below the call to get_vmx_mem_address(), which
may raise #UD, #GP, or #SS.

>         /* According to the Intel instruction reference, the memory operand
>          * is read even if it isn't needed (e.g., for type==all)
>          */
> @@ -5508,69 +5494,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
>                                 sizeof(operand), &gva))
>                 return 1;
>
> -       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
> -               kvm_inject_emulated_page_fault(vcpu, &e);
> -               return 1;
> -       }
> -
> -       if (operand.pcid >> 12 != 0) {
> -               kvm_inject_gp(vcpu, 0);
> -               return 1;
> -       }
> -
> -       pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
> -
> -       switch (type) {
> -       case INVPCID_TYPE_INDIV_ADDR:
> -               if ((!pcid_enabled && (operand.pcid != 0)) ||
> -                   is_noncanonical_address(operand.gla, vcpu)) {
> -                       kvm_inject_gp(vcpu, 0);
> -                       return 1;
> -               }
> -               kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
> -               return kvm_skip_emulated_instruction(vcpu);
> -
> -       case INVPCID_TYPE_SINGLE_CTXT:
> -               if (!pcid_enabled && (operand.pcid != 0)) {
> -                       kvm_inject_gp(vcpu, 0);
> -                       return 1;
> -               }
> -
> -               if (kvm_get_active_pcid(vcpu) == operand.pcid) {
> -                       kvm_mmu_sync_roots(vcpu);
> -                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
> -               }
> -
> -               for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> -                       if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
> -                           == operand.pcid)
> -                               roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
> -
> -               kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
> -               /*
> -                * If neither the current cr3 nor any of the prev_roots use the
> -                * given PCID, then nothing needs to be done here because a
> -                * resync will happen anyway before switching to any other CR3.
> -                */
> -
> -               return kvm_skip_emulated_instruction(vcpu);
> -
> -       case INVPCID_TYPE_ALL_NON_GLOBAL:
> -               /*
> -                * Currently, KVM doesn't mark global entries in the shadow
> -                * page tables, so a non-global flush just degenerates to a
> -                * global flush. If needed, we could optimize this later by
> -                * keeping track of global entries in shadow page tables.
> -                */
> -
> -               /* fall-through */
> -       case INVPCID_TYPE_ALL_INCL_GLOBAL:
> -               kvm_mmu_unload(vcpu);
> -               return kvm_skip_emulated_instruction(vcpu);
> -
> -       default:
> -               BUG(); /* We have already checked above that type <= 3 */
> -       }
> +       return kvm_handle_invpcid_types(vcpu,  gva, type);
>  }
>
>  static int handle_pml_full(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9e41b5135340..13373359608c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -72,6 +72,7 @@
>  #include <asm/hypervisor.h>
>  #include <asm/intel_pt.h>
>  #include <asm/emulate_prefix.h>
> +#include <asm/tlbflush.h>
>  #include <clocksource/hyperv_timer.h>
>
>  #define CREATE_TRACE_POINTS
> @@ -10714,6 +10715,94 @@ u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu)
>  }
>  EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits);
>
> +int kvm_handle_invpcid_types(struct kvm_vcpu *vcpu, gva_t gva,
> +                            unsigned long type)
> +{
> +       unsigned long roots_to_free = 0;
> +       struct x86_exception e;
> +       bool pcid_enabled;
> +       unsigned i;
> +       struct {
> +               u64 pcid;
> +               u64 gla;
> +       } operand;
> +
> +       if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
> +               kvm_queue_exception(vcpu, UD_VECTOR);
> +               return 1;
> +       }
> +
> +       if (type > 3) {
> +               kvm_inject_gp(vcpu, 0);
> +               return 1;
> +       }
> +
> +       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
> +               kvm_inject_emulated_page_fault(vcpu, &e);
> +               return 1;
> +       }
> +
> +       if (operand.pcid >> 12 != 0) {
> +               kvm_inject_gp(vcpu, 0);
> +               return 1;
> +       }
> +
> +       pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
> +
> +       switch (type) {
> +       case INVPCID_TYPE_INDIV_ADDR:
> +               if ((!pcid_enabled && (operand.pcid != 0)) ||
> +                   is_noncanonical_address(operand.gla, vcpu)) {
> +                       kvm_inject_gp(vcpu, 0);
> +                       return 1;
> +               }
> +               kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
> +               return kvm_skip_emulated_instruction(vcpu);
> +
> +       case INVPCID_TYPE_SINGLE_CTXT:
> +               if (!pcid_enabled && (operand.pcid != 0)) {
> +                       kvm_inject_gp(vcpu, 0);
> +                       return 1;
> +               }
> +
> +               if (kvm_get_active_pcid(vcpu) == operand.pcid) {
> +                       kvm_mmu_sync_roots(vcpu);
> +                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
> +               }
> +
> +               for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> +                       if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
> +                           == operand.pcid)
> +                               roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
> +
> +               kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
> +               /*
> +                * If neither the current cr3 nor any of the prev_roots use the
> +                * given PCID, then nothing needs to be done here because a
> +                * resync will happen anyway before switching to any other CR3.
> +                */
> +
> +               return kvm_skip_emulated_instruction(vcpu);
> +
> +       case INVPCID_TYPE_ALL_NON_GLOBAL:
> +               /*
> +                * Currently, KVM doesn't mark global entries in the shadow
> +                * page tables, so a non-global flush just degenerates to a
> +                * global flush. If needed, we could optimize this later by
> +                * keeping track of global entries in shadow page tables.
> +                */
> +
> +               /* fall-through */
> +       case INVPCID_TYPE_ALL_INCL_GLOBAL:
> +               kvm_mmu_unload(vcpu);
> +               return kvm_skip_emulated_instruction(vcpu);
> +
> +       default:
> +               BUG(); /* We have already checked above that type <= 3 */
> +       }
> +}
> +EXPORT_SYMBOL_GPL(kvm_handle_invpcid_types);
> +
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 6eb62e97e59f..8e23f2705344 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -365,5 +365,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
>  void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
>  u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu);
>  bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu);
> +int kvm_handle_invpcid_types(struct kvm_vcpu *vcpu, gva_t gva,
> +                            unsigned long type);
>
>  #endif
>