From: Fuad Tabba <tabba@xxxxxxxxxx> Introduce separate El2 entry/exit handlers for protected and non-protected guests under pKVM and hook up the protected handlers to expose the minimum amount of data to the host required for EL1 handling. Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 230 ++++++++++++++++++++++++++++- 1 file changed, 228 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e987f34641dd..692576497ed9 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -20,6 +20,8 @@ #include <linux/irqchip/arm-gic-v3.h> +#include "../../sys_regs.h" + /* * Host FPSIMD state. Written to when the guest accesses its own FPSIMD state, * and read when the guest state is live and we need to switch back to the host. @@ -34,6 +36,207 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); typedef void (*shadow_entry_exit_handler_fn)(struct kvm_vcpu *, struct kvm_vcpu *); +static void handle_pvm_entry_wfx(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + shadow_vcpu->arch.flags |= READ_ONCE(host_vcpu->arch.flags) & + KVM_ARM64_INCREMENT_PC; +} + +static void handle_pvm_entry_sys64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + unsigned long host_flags; + + host_flags = READ_ONCE(host_vcpu->arch.flags); + + /* Exceptions have priority on anything else */ + if (host_flags & KVM_ARM64_PENDING_EXCEPTION) { + /* Exceptions caused by this should be undef exceptions. */ + u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + + __vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr; + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1); + + return; + } + + if (host_flags & KVM_ARM64_INCREMENT_PC) { + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; + } + + if (!esr_sys64_to_params(shadow_vcpu->arch.fault.esr_el2).is_write) { + /* r0 as transfer register between the guest and the host. */ + u64 rt_val = READ_ONCE(host_vcpu->arch.ctxt.regs.regs[0]); + int rt = kvm_vcpu_sys_get_rt(shadow_vcpu); + + vcpu_set_reg(shadow_vcpu, rt, rt_val); + } +} + +static void handle_pvm_entry_iabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + unsigned long cpsr = *vcpu_cpsr(shadow_vcpu); + unsigned long host_flags; + u32 esr = ESR_ELx_IL; + + host_flags = READ_ONCE(host_vcpu->arch.flags); + + if (!(host_flags & KVM_ARM64_PENDING_EXCEPTION)) + return; + + /* + * If the host wants to inject an exception, get syndrom and + * fault address. + */ + if ((cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT); + else + esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); + + esr |= ESR_ELx_FSC_EXTABT; + + __vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr; + __vcpu_sys_reg(shadow_vcpu, FAR_EL1) = kvm_vcpu_get_hfar(shadow_vcpu); + + /* Tell the run loop that we want to inject something */ + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1); +} + +static void handle_pvm_entry_dabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + unsigned long host_flags; + bool rd_update; + + host_flags = READ_ONCE(host_vcpu->arch.flags); + + /* Exceptions have priority over anything else */ + if (host_flags & KVM_ARM64_PENDING_EXCEPTION) { + unsigned long cpsr = *vcpu_cpsr(shadow_vcpu); + u32 esr = ESR_ELx_IL; + + if ((cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT); + else + esr |= (ESR_ELx_EC_DABT_CUR << ESR_ELx_EC_SHIFT); + + esr |= ESR_ELx_FSC_EXTABT; + + __vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr; + __vcpu_sys_reg(shadow_vcpu, FAR_EL1) = kvm_vcpu_get_hfar(shadow_vcpu); + /* Tell the run loop that we want to inject something */ + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1); + + /* Cancel potential in-flight MMIO */ + shadow_vcpu->mmio_needed = false; + return; + } + + /* Handle PC increment on MMIO */ + if ((host_flags & KVM_ARM64_INCREMENT_PC) && shadow_vcpu->mmio_needed) { + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; + } + + /* If we were doing an MMIO read access, update the register*/ + rd_update = (shadow_vcpu->mmio_needed && + (host_flags & KVM_ARM64_INCREMENT_PC)); + rd_update &= !kvm_vcpu_dabt_iswrite(shadow_vcpu); + + if (rd_update) { + /* r0 as transfer register between the guest and the host. */ + u64 rd_val = READ_ONCE(host_vcpu->arch.ctxt.regs.regs[0]); + int rd = kvm_vcpu_dabt_get_rd(shadow_vcpu); + + vcpu_set_reg(shadow_vcpu, rd, rd_val); + } + + shadow_vcpu->mmio_needed = false; +} + +static void handle_pvm_exit_wfx(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + WRITE_ONCE(host_vcpu->arch.ctxt.regs.pstate, + shadow_vcpu->arch.ctxt.regs.pstate & PSR_MODE_MASK); + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + shadow_vcpu->arch.fault.esr_el2); +} + +static void handle_pvm_exit_sys64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + u32 esr_el2 = shadow_vcpu->arch.fault.esr_el2; + + /* r0 as transfer register between the guest and the host. */ + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + esr_el2 & ~ESR_ELx_SYS64_ISS_RT_MASK); + + /* The mode is required for the host to emulate some sysregs */ + WRITE_ONCE(host_vcpu->arch.ctxt.regs.pstate, + shadow_vcpu->arch.ctxt.regs.pstate & PSR_MODE_MASK); + + if (esr_sys64_to_params(esr_el2).is_write) { + int rt = kvm_vcpu_sys_get_rt(shadow_vcpu); + u64 rt_val = vcpu_get_reg(shadow_vcpu, rt); + + WRITE_ONCE(host_vcpu->arch.ctxt.regs.regs[0], rt_val); + } +} + +static void handle_pvm_exit_iabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + shadow_vcpu->arch.fault.esr_el2); + WRITE_ONCE(host_vcpu->arch.fault.hpfar_el2, + shadow_vcpu->arch.fault.hpfar_el2); +} + +static void handle_pvm_exit_dabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + /* + * For now, we treat all data aborts as MMIO since we have no knowledge + * of the memslot configuration at EL2. + */ + shadow_vcpu->mmio_needed = true; + + if (shadow_vcpu->mmio_needed) { + /* r0 as transfer register between the guest and the host. */ + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + shadow_vcpu->arch.fault.esr_el2 & ~ESR_ELx_SRT_MASK); + + if (kvm_vcpu_dabt_iswrite(shadow_vcpu)) { + int rt = kvm_vcpu_dabt_get_rd(shadow_vcpu); + u64 rt_val = vcpu_get_reg(shadow_vcpu, rt); + + WRITE_ONCE(host_vcpu->arch.ctxt.regs.regs[0], rt_val); + } + } else { + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + shadow_vcpu->arch.fault.esr_el2 & ~ESR_ELx_ISV); + } + + WRITE_ONCE(host_vcpu->arch.ctxt.regs.pstate, + shadow_vcpu->arch.ctxt.regs.pstate & PSR_MODE_MASK); + WRITE_ONCE(host_vcpu->arch.fault.far_el2, + shadow_vcpu->arch.fault.far_el2 & GENMASK(11, 0)); + WRITE_ONCE(host_vcpu->arch.fault.hpfar_el2, + shadow_vcpu->arch.fault.hpfar_el2); + WRITE_ONCE(__vcpu_sys_reg(host_vcpu, SCTLR_EL1), + __vcpu_sys_reg(shadow_vcpu, SCTLR_EL1) & (SCTLR_ELx_EE | SCTLR_EL1_E0E)); +} + static void handle_vm_entry_generic(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) { unsigned long host_flags = READ_ONCE(host_vcpu->arch.flags); @@ -67,6 +270,22 @@ static void handle_vm_exit_abt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shad shadow_vcpu->arch.fault.disr_el1); } +static const shadow_entry_exit_handler_fn entry_pvm_shadow_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_WFx] = handle_pvm_entry_wfx, + [ESR_ELx_EC_SYS64] = handle_pvm_entry_sys64, + [ESR_ELx_EC_IABT_LOW] = handle_pvm_entry_iabt, + [ESR_ELx_EC_DABT_LOW] = handle_pvm_entry_dabt, +}; + +static const shadow_entry_exit_handler_fn exit_pvm_shadow_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_WFx] = handle_pvm_exit_wfx, + [ESR_ELx_EC_SYS64] = handle_pvm_exit_sys64, + [ESR_ELx_EC_IABT_LOW] = handle_pvm_exit_iabt, + [ESR_ELx_EC_DABT_LOW] = handle_pvm_exit_dabt, +}; + static const shadow_entry_exit_handler_fn entry_vm_shadow_handlers[] = { [0 ... ESR_ELx_EC_MAX] = handle_vm_entry_generic, }; @@ -219,9 +438,13 @@ static void flush_shadow_state(struct kvm_shadow_vcpu_state *shadow_state) break; case ARM_EXCEPTION_TRAP: esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(shadow_vcpu)); - ec_handler = entry_vm_shadow_handlers[esr_ec]; + if (shadow_state_is_protected(shadow_state)) + ec_handler = entry_pvm_shadow_handlers[esr_ec]; + else + ec_handler = entry_vm_shadow_handlers[esr_ec]; if (ec_handler) ec_handler(host_vcpu, shadow_vcpu); + break; default: BUG(); @@ -251,7 +474,10 @@ static void sync_shadow_state(struct kvm_shadow_vcpu_state *shadow_state, break; case ARM_EXCEPTION_TRAP: esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(shadow_vcpu)); - ec_handler = exit_vm_shadow_handlers[esr_ec]; + if (shadow_state_is_protected(shadow_state)) + ec_handler = exit_pvm_shadow_handlers[esr_ec]; + else + ec_handler = exit_vm_shadow_handlers[esr_ec]; if (ec_handler) ec_handler(host_vcpu, shadow_vcpu); break; -- 2.36.1.124.g0e6072fb45-goog