Due to the way ARMv8.4-NV suppresses traps when accessing EL2 system registers, we can't track when the guest changes its HCR_EL2.TGE setting. This means we always trap EL1 TLBIs, even if they don't affect any guest. This obviously has a huge impact on performance, as we handle TLBI traps as a normal exit, and a normal VHE host issues thousands of TLBIs when booting (and quite a few when running userspace). A cheap way to reduce the overhead is to handle the limited case of {E2H,TGE}=={1,1} as a guest fixup, as we already have the right mmu configuration in place. Just execute the decoded instruction right away and return to the guest. Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> --- arch/arm64/kvm/hyp/vhe/switch.c | 43 ++++++++++++++++++++++++++++++++- arch/arm64/kvm/hyp/vhe/tlb.c | 6 +++-- arch/arm64/kvm/sys_regs.c | 12 --------- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 238c6613cf47..a3555b90d9e1 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -168,6 +168,47 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) __deactivate_traps_common(vcpu); } +static bool kvm_hyp_handle_tlbi_el1(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + u32 instr; + u64 val; + + /* + * Ideally, we would never trap on EL1 TLB invalidations when the + * guest's HCR_EL2.{E2H,TGE} == {1,1}. But "thanks" to ARMv8.4, we + * don't trap writes to HCR_EL2, meaning that we can't track + * changes to the virtual TGE bit. So we leave HCR_EL2.TTLB set on + * the host. Oopsie... + * + * In order to speed-up EL1 TLBIs from the vEL2 guest when TGE is + * set, try and handle these invalidation as quickly as possible, + * without fully exiting (unless this needs forwarding). + */ + if (!vcpu_has_nv2(vcpu) || + !vcpu_is_el2(vcpu) || + (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) + return false; + + instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + if (sys_reg_Op0(instr) != TLBI_Op0 || + sys_reg_Op1(instr) != TLBI_Op1_EL1) + return false; + + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + __kvm_tlb_el1_instr(NULL, val, instr); + __kvm_skip_instr(vcpu); + + return true; +} + +static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (kvm_hyp_handle_tlbi_el1(vcpu, exit_code)) + return true; + + return kvm_hyp_handle_sysreg(vcpu, exit_code); +} + static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; @@ -216,7 +257,7 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code) static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, - [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg_vhe, [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index c4389db4cc22..beb162468c0b 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -201,7 +201,8 @@ void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding) dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + if (mmu) + __tlb_switch_to_guest(mmu, &cxt); /* * Execute the same instruction as the guest hypervisor did, @@ -240,5 +241,6 @@ void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding) dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + if (mmu) + __tlb_switch_to_host(&cxt); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4a1c4a97f702..a671629fb366 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3113,18 +3113,6 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, WARN_ON(!vcpu_is_el2(vcpu)); - if ((__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { - mutex_lock(&vcpu->kvm->lock); - /* - * ARMv8.4-NV allows the guest to change TGE behind - * our back, so we always trap EL1 TLBIs from vEL2... - */ - __kvm_tlb_el1_instr(&vcpu->kvm->arch.mmu, p->regval, sys_encoding); - mutex_unlock(&vcpu->kvm->lock); - - return true; - } - kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr), &(union tlbi_info) { .va = { -- 2.34.1