When userspace requests that a VCPU is not allowed to profile anymore via the KVM_ARM_VCPU_SPE_STOP attribute, keep all the register state in memory and trap all registers, not just the buffer registers, and don't copy any of this shadow state on the hardware. Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx> --- arch/arm64/include/asm/kvm_hyp.h | 2 + arch/arm64/include/asm/kvm_spe.h | 14 +++++++ arch/arm64/include/uapi/asm/kvm.h | 3 ++ arch/arm64/kvm/arm.c | 9 ++++ arch/arm64/kvm/debug.c | 13 ++++-- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 4 +- arch/arm64/kvm/hyp/nvhe/spe-sr.c | 24 +++++++++++ arch/arm64/kvm/hyp/vhe/spe-sr.c | 56 +++++++++++++++++++++++++ arch/arm64/kvm/spe.c | 67 ++++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 2 +- 10 files changed, 188 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 03bc51049996..ce365427b483 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -86,8 +86,10 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); #ifdef __KVM_NVHE_HYPERVISOR__ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +void __debug_save_spe(u64 *pmscr_el1); void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +void __debug_restore_spe(u64 pmscr_el1); #ifdef CONFIG_KVM_ARM_SPE void __spe_save_host_state_nvhe(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/include/asm/kvm_spe.h b/arch/arm64/include/asm/kvm_spe.h index d7d7b9e243de..f51561e3b43f 100644 --- a/arch/arm64/include/asm/kvm_spe.h +++ b/arch/arm64/include/asm/kvm_spe.h @@ -16,13 +16,23 @@ static __always_inline bool kvm_supports_spe(void) return static_branch_likely(&kvm_spe_available); } +/* Guest profiling disabled by the user. */ +#define KVM_VCPU_SPE_STOP_USER (1 << 0) +/* Stop profiling and exit to userspace when guest starts profiling. */ +#define KVM_VCPU_SPE_STOP_USER_EXIT (1 << 1) + struct kvm_vcpu_spe { bool initialized; /* SPE initialized for the VCPU */ int irq_num; /* Buffer management interrut number */ bool irq_level; /* 'true' if the interrupt is asserted at the VGIC */ bool hwirq_level; /* 'true' if the SPE hardware is asserting the interrupt */ + u64 flags; }; +#define kvm_spe_profiling_stopped(vcpu) \ + (((vcpu)->arch.spe.flags & KVM_VCPU_SPE_STOP_USER) || \ + ((vcpu)->arch.spe.flags & KVM_VCPU_SPE_STOP_USER_EXIT)) \ + struct kvm_spe { bool perfmon_capable; /* Is the VM perfmon_capable()? */ }; @@ -31,6 +41,7 @@ void kvm_spe_init_supported_cpus(void); void kvm_spe_vm_init(struct kvm *kvm); int kvm_spe_vcpu_first_run_init(struct kvm_vcpu *vcpu); void kvm_spe_sync_hwstate(struct kvm_vcpu *vcpu); +bool kvm_spe_exit_to_user(struct kvm_vcpu *vcpu); void kvm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 val); u64 kvm_spe_read_sysreg(struct kvm_vcpu *vcpu, int reg); @@ -48,6 +59,8 @@ int kvm_spe_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); struct kvm_vcpu_spe { }; +#define kvm_spe_profiling_stopped(vcpu) (false) + struct kvm_spe { }; @@ -55,6 +68,7 @@ static inline void kvm_spe_init_supported_cpus(void) {} static inline void kvm_spe_vm_init(struct kvm *kvm) {} static inline int kvm_spe_vcpu_first_run_init(struct kvm_vcpu *vcpu) { return -ENOEXEC; } static inline void kvm_spe_sync_hwstate(struct kvm_vcpu *vcpu) {} +static inline bool kvm_spe_exit_to_user(struct kvm_vcpu *vcpu) { return false; } static inline void kvm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 val) {} static inline u64 kvm_spe_read_sysreg(struct kvm_vcpu *vcpu, int reg) { return 0; } diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 75a5113f610e..63599ee39a7b 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -376,6 +376,9 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM_VCPU_SPE_STOP_EXIT (1 << 1) #define KVM_ARM_VCPU_SPE_RESUME (1 << 2) +/* run->fail_entry.hardware_entry_failure_reason codes. */ +#define KVM_EXIT_FAIL_ENTRY_SPE (1 << 0) + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_VCPU2_SHIFT 28 #define KVM_ARM_IRQ_VCPU2_MASK 0xf diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ec449bc5f811..b7aae25bb9da 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -873,6 +873,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) continue; } + if (unlikely(kvm_spe_exit_to_user(vcpu))) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason + = KVM_EXIT_FAIL_ENTRY_SPE; + ret = -EAGAIN; + preempt_enable(); + continue; + } + kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 6e5fc1887215..6a4277a23bbb 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -96,11 +96,18 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) if (kvm_supports_spe() && kvm_vcpu_has_spe(vcpu)) { /* * Use EL1&0 for the profiling buffer translation regime and - * trap accesses to the buffer control registers; leave - * MDCR_EL2.TPMS unset and do not trap accesses to the profiling - * control registers. + * trap accesses to the buffer control registers; if profiling + * is stopped, also set MSCR_EL2.TMPS to trap accesses to the + * rest of the registers, otherwise leave it clear. + * + * Leaving MDCR_EL2.E2P unset, like we do when the VCPU does not + * have SPE, means that the PMBIDR_EL1.P (which KVM does not + * trap) will be set and the guest will detect SPE as being + * unavailable. */ vcpu->arch.mdcr_el2 |= MDCR_EL2_E2PB_EL1_TRAP << MDCR_EL2_E2PB_SHIFT; + if (kvm_spe_profiling_stopped(vcpu)) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TPMS; } else { /* * Trap accesses to the profiling control registers; leave diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 1622615954b2..944972de0944 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -14,7 +14,7 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> -static void __debug_save_spe(u64 *pmscr_el1) +void __debug_save_spe(u64 *pmscr_el1) { u64 reg; @@ -40,7 +40,7 @@ static void __debug_save_spe(u64 *pmscr_el1) dsb(nsh); } -static void __debug_restore_spe(u64 pmscr_el1) +void __debug_restore_spe(u64 pmscr_el1) { if (!pmscr_el1) return; diff --git a/arch/arm64/kvm/hyp/nvhe/spe-sr.c b/arch/arm64/kvm/hyp/nvhe/spe-sr.c index b74131486a75..8ed03aa4f965 100644 --- a/arch/arm64/kvm/hyp/nvhe/spe-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/spe-sr.c @@ -23,6 +23,11 @@ void __spe_save_host_state_nvhe(struct kvm_vcpu *vcpu, { u64 pmblimitr; + if (kvm_spe_profiling_stopped(vcpu)) { + __debug_save_spe(__ctxt_sys_reg(host_ctxt, PMSCR_EL1)); + return; + } + pmblimitr = read_sysreg_s(SYS_PMBLIMITR_EL1); if (pmblimitr & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)) { psb_csync(); @@ -49,6 +54,13 @@ void __spe_save_guest_state_nvhe(struct kvm_vcpu *vcpu, { u64 pmbsr; + /* + * Profiling is stopped and all register accesses are trapped, nothing + * to save here. + */ + if (kvm_spe_profiling_stopped(vcpu)) + return; + if (read_sysreg_s(SYS_PMBLIMITR_EL1) & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)) { psb_csync(); dsb(nsh); @@ -82,6 +94,11 @@ void __spe_save_guest_state_nvhe(struct kvm_vcpu *vcpu, void __spe_restore_host_state_nvhe(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt) { + if (kvm_spe_profiling_stopped(vcpu)) { + __debug_restore_spe(ctxt_sys_reg(host_ctxt, PMSCR_EL1)); + return; + } + __spe_restore_common_state(host_ctxt); write_sysreg_s(ctxt_sys_reg(host_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1); @@ -94,6 +111,13 @@ void __spe_restore_host_state_nvhe(struct kvm_vcpu *vcpu, void __spe_restore_guest_state_nvhe(struct kvm_vcpu *vcpu, struct kvm_cpu_context *guest_ctxt) { + /* + * Profiling is stopped and all register accesses are trapped, nothing + * to restore here. + */ + if (kvm_spe_profiling_stopped(vcpu)) + return; + __spe_restore_common_state(guest_ctxt); write_sysreg_s(ctxt_sys_reg(guest_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1); diff --git a/arch/arm64/kvm/hyp/vhe/spe-sr.c b/arch/arm64/kvm/hyp/vhe/spe-sr.c index ea4b3b69bb32..024a4c0618cc 100644 --- a/arch/arm64/kvm/hyp/vhe/spe-sr.c +++ b/arch/arm64/kvm/hyp/vhe/spe-sr.c @@ -10,6 +10,34 @@ #include <hyp/spe-sr.h> +static void __spe_save_host_buffer(u64 *pmscr_el2) +{ + u64 pmblimitr; + + /* Disable guest profiling. */ + write_sysreg_el1(0, SYS_PMSCR); + + pmblimitr = read_sysreg_s(SYS_PMBLIMITR_EL1); + if (!(pmblimitr & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) { + *pmscr_el2 = 0; + return; + } + + *pmscr_el2 = read_sysreg_el2(SYS_PMSCR); + + /* Disable profiling at EL2 so we can drain the buffer. */ + write_sysreg_el2(0, SYS_PMSCR); + isb(); + + /* + * We're going to change the buffer owning exception level when we + * activate traps, drain the buffer now. + */ + psb_csync(); + dsb(nsh); +} +NOKPROBE_SYMBOL(__spe_save_host_buffer); + /* * Disable host profiling, drain the buffer and save the host SPE context. * Extra care must be taken because profiling might be in progress. @@ -19,6 +47,11 @@ void __spe_save_host_state_vhe(struct kvm_vcpu *vcpu, { u64 pmblimitr, pmscr_el2; + if (kvm_spe_profiling_stopped(vcpu)) { + __spe_save_host_buffer(__ctxt_sys_reg(host_ctxt, PMSCR_EL2)); + return; + } + /* Disable profiling while the SPE context is being switched. */ pmscr_el2 = read_sysreg_el2(SYS_PMSCR); write_sysreg_el2(__vcpu_sys_reg(vcpu, PMSCR_EL2), SYS_PMSCR); @@ -50,6 +83,9 @@ void __spe_save_guest_state_vhe(struct kvm_vcpu *vcpu, { u64 pmblimitr, pmbsr; + if (kvm_spe_profiling_stopped(vcpu)) + return; + /* * We're at EL2 and the buffer owning regime is EL1, which means that * profiling is disabled. After we disable traps and restore the host's @@ -78,6 +114,18 @@ void __spe_save_guest_state_vhe(struct kvm_vcpu *vcpu, } NOKPROBE_SYMBOL(__spe_save_guest_state_vhe); +static void __spe_restore_host_buffer(u64 pmscr_el2) +{ + if (!pmscr_el2) + return; + + /* Synchronize MDCR_EL2 write. */ + isb(); + + write_sysreg_el2(pmscr_el2, SYS_PMSCR); +} +NOKPROBE_SYMBOL(__spe_restore_host_buffer); + /* * Restore the host SPE context. Special care must be taken because we're * potentially resuming a profiling session which was stopped when we saved the @@ -86,6 +134,11 @@ NOKPROBE_SYMBOL(__spe_save_guest_state_vhe); void __spe_restore_host_state_vhe(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt) { + if (kvm_spe_profiling_stopped(vcpu)) { + __spe_restore_host_buffer(ctxt_sys_reg(host_ctxt, PMSCR_EL2)); + return; + } + __spe_restore_common_state(host_ctxt); write_sysreg_s(ctxt_sys_reg(host_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1); @@ -115,6 +168,9 @@ NOKPROBE_SYMBOL(__spe_restore_host_state_vhe); void __spe_restore_guest_state_vhe(struct kvm_vcpu *vcpu, struct kvm_cpu_context *guest_ctxt) { + if (kvm_spe_profiling_stopped(vcpu)) + return; + __spe_restore_common_state(guest_ctxt); /* diff --git a/arch/arm64/kvm/spe.c b/arch/arm64/kvm/spe.c index 2630e777fe1d..69ca731ba9d3 100644 --- a/arch/arm64/kvm/spe.c +++ b/arch/arm64/kvm/spe.c @@ -140,6 +140,28 @@ void kvm_spe_sync_hwstate(struct kvm_vcpu *vcpu) kvm_spe_update_irq(vcpu, true); } +static bool kvm_spe_buffer_enabled(struct kvm_vcpu *vcpu) +{ + return !vcpu->arch.spe.irq_level && + (__vcpu_sys_reg(vcpu, PMBLIMITR_EL1) & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)); +} + +bool kvm_spe_exit_to_user(struct kvm_vcpu *vcpu) +{ + u64 pmscr_enabled_mask = BIT(SYS_PMSCR_EL1_E0SPE_SHIFT) | + BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); + + if (!(vcpu->arch.spe.flags & KVM_VCPU_SPE_STOP_USER_EXIT)) + return false; + + /* + * We don't trap the guest dropping to EL0, so exit even if profiling is + * disabled at EL1, but enabled at EL0. + */ + return kvm_spe_buffer_enabled(vcpu) && + (__vcpu_sys_reg(vcpu, PMSCR_EL1) & pmscr_enabled_mask); +} + void kvm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 val) { __vcpu_sys_reg(vcpu, reg) = val; @@ -215,6 +237,31 @@ static bool kvm_spe_irq_is_valid(struct kvm *kvm, int irq) return true; } +static int kvm_spe_stop_user(struct kvm_vcpu *vcpu, int flags) +{ + struct kvm_vcpu_spe *spe = &vcpu->arch.spe; + + if (flags & KVM_ARM_VCPU_SPE_STOP_TRAP) { + if (flags & ~KVM_ARM_VCPU_SPE_STOP_TRAP) + return -EINVAL; + spe->flags = KVM_VCPU_SPE_STOP_USER; + } + + if (flags & KVM_ARM_VCPU_SPE_STOP_EXIT) { + if (flags & ~KVM_ARM_VCPU_SPE_STOP_EXIT) + return -EINVAL; + spe->flags = KVM_VCPU_SPE_STOP_USER_EXIT; + } + + if (flags & KVM_ARM_VCPU_SPE_RESUME) { + if (flags & ~KVM_ARM_VCPU_SPE_RESUME) + return -EINVAL; + spe->flags = 0; + } + + return 0; +} + int kvm_spe_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { if (!kvm_vcpu_supports_spe(vcpu)) @@ -268,6 +315,8 @@ int kvm_spe_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!flags) return -EINVAL; + + return kvm_spe_stop_user(vcpu, flags); } } @@ -293,6 +342,24 @@ int kvm_spe_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return 0; } + case KVM_ARM_VCPU_SPE_STOP: { + int __user *uaddr = (int __user *)(long)attr->addr; + struct kvm_vcpu_spe *spe = &vcpu->arch.spe; + int flag = 0; + + if (!vcpu->arch.spe.initialized) + return -EAGAIN; + + if (spe->flags & KVM_VCPU_SPE_STOP_USER) + flag = KVM_ARM_VCPU_SPE_STOP_TRAP; + else if (spe->flags & KVM_VCPU_SPE_STOP_USER_EXIT) + flag = KVM_ARM_VCPU_SPE_STOP_EXIT; + + if (put_user(flag, uaddr)) + return -EFAULT; + + return 0; + } } return -ENXIO; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 064742cee425..cc711b081f31 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -608,7 +608,7 @@ static bool access_spe_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { int reg = r->reg; u64 val = p->regval; - if (reg < PMBLIMITR_EL1) { + if (reg < PMBLIMITR_EL1 && !kvm_spe_profiling_stopped(vcpu)) { print_sys_reg_msg(p, "Unsupported guest SPE register access at: %lx [%08lx]\n", *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); } -- 2.33.0 _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm