Now that we can handle faults triggered through VNCR_EL2, we need to map the corresponding page at EL2. But where, you'll ask? Since each CPU in the system can run a vcpu, we need a per-CPU mapping. For that, we carve a NR_CPUS range in the fixmap, giving us a per-CPU va at which to map the guest's VNCR's page. The mapping occurs both on vcpu load and on the back of a fault, both generating a request that will take care of the mapping. That mapping will also get dropped on vcpu put. Yes, this is a bit heavy handed, but it is simple. Eventually, we may want to have a per-VM, per-CPU mapping, which would avoid all the TLBI overhead. Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> --- arch/arm64/include/asm/fixmap.h | 6 ++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/kvm_nested.h | 7 +++ arch/arm64/kvm/nested.c | 98 ++++++++++++++++++++++++++--- 4 files changed, 103 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 87e307804b99c..635a43c4ec85b 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -48,6 +48,12 @@ enum fixed_addresses { FIX_EARLYCON_MEM_BASE, FIX_TEXT_POKE0, +#ifdef CONFIG_KVM + /* One slot per CPU, mapping the guest's VNCR page at EL2. */ + FIX_VNCR_END, + FIX_VNCR = FIX_VNCR_END + NR_CPUS, +#endif + #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ FIX_APEI_GHES_IRQ, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3cccf6fca4dfa..0e95f4adb72ba 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -623,6 +623,7 @@ struct kvm_host_data { #define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 #define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 #define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6 +#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7 unsigned long flags; struct kvm_cpu_context host_ctxt; diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 53ff314b9ecd1..284bedee742ec 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -336,4 +336,11 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu); +#define vncr_fixmap(c) \ + ({ \ + u32 __c = (c); \ + BUG_ON(__c >= NR_CPUS); \ + (FIX_VNCR - __c); \ + }) + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 2c4991a84e003..51e1a3eba996a 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -8,6 +8,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <asm/fixmap.h> #include <asm/kvm_arm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> @@ -702,23 +703,35 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu) void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) { /* - * The vCPU kept its reference on the MMU after the last put, keep - * rolling with it. + * If the vCPU kept its reference on the MMU after the last put, + * keep rolling with it. */ - if (vcpu->arch.hw_mmu) - return; - if (is_hyp_ctxt(vcpu)) { - vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; + if (!vcpu->arch.hw_mmu) + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; } else { - write_lock(&vcpu->kvm->mmu_lock); - vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu); - write_unlock(&vcpu->kvm->mmu_lock); + if (!vcpu->arch.hw_mmu) { + scoped_guard(write_lock, &vcpu->kvm->mmu_lock) + vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu); + } + + if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) + kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu); } } void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) { + /* Unconditionally drop the VNCR mapping if we have one */ + if (host_data_test_flag(L1_VNCR_MAPPED)) { + BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id()); + BUG_ON(is_hyp_ctxt(vcpu)); + + clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu)); + vcpu->arch.vncr_tlb->cpu = -1; + host_data_clear_flag(L1_VNCR_MAPPED); + } + /* * Keep a reference on the associated stage-2 MMU if the vCPU is * scheduling out and not in WFI emulation, suggesting it is likely to @@ -1039,6 +1052,70 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) return 1; } +static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) +{ + struct vncr_tlb *vt = vcpu->arch.vncr_tlb; + pgprot_t prot; + + guard(preempt)(); + guard(read_lock)(&vcpu->kvm->mmu_lock); + + /* + * The request to map VNCR may have raced against some other + * event, such as an interrupt, and may not be valid anymore. + */ + if (is_hyp_ctxt(vcpu)) + return; + + /* + * Check that the pseudo-TLB is valid and that VNCR_EL2 still + * contains the expected value. If it doesn't, we simply bail out + * without a mapping -- a transformed MSR/MRS will generate the + * fault and allows us to populate the pseudo-TLB. + */ + if (!vt->valid) + return; + + if (read_vncr_el2(vcpu) != vt->gva) + return; + + if (vt->wr.nG) { + u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + u64 ttbr = ((tcr & TCR_A1) ? + vcpu_read_sys_reg(vcpu, TTBR1_EL2) : + vcpu_read_sys_reg(vcpu, TTBR0_EL2)); + u16 asid; + + asid = FIELD_GET(TTBR_ASID_MASK, ttbr); + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || + !(tcr & TCR_ASID16)) + asid &= GENMASK(7, 0); + + if (asid != vt->wr.asid) + return; + } + + vt->cpu = smp_processor_id(); + + if (vt->wr.pw && vt->wr.pr) + prot = PAGE_KERNEL; + else if (vt->wr.pr) + prot = PAGE_KERNEL_RO; + else + prot = PAGE_NONE; + + /* + * We can't map write-only (or no permission at all) in the kernel, + * but the guest can do it if using POE, so we'll have to turn a + * translation fault into a permission fault at runtime. + * FIXME: WO doesn't work at all, need POE support in the kernel. + */ + if (pgprot_val(prot) != pgprot_val(PAGE_NONE)) { + __set_fixmap(vncr_fixmap(vt->cpu), vt->hpa, prot); + host_data_set_flag(L1_VNCR_MAPPED); + } +} + /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1551,4 +1628,7 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu) } write_unlock(&vcpu->kvm->mmu_lock); } + + if (kvm_check_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu)) + kvm_map_l1_vncr(vcpu); } -- 2.39.2