On 25/05/14 16:34, Christoffer Dall wrote: > On Tue, May 20, 2014 at 05:55:39PM +0100, Marc Zyngier wrote: >> Add handlers for all the AArch64 debug registers that are accessible >> from EL0 or EL1. The trapping code keeps track of the state of the >> debug registers, allowing for the switch code to implement a lazy >> switching strategy. >> >> Reviewed-by: Anup Patel <anup.patel@xxxxxxxxxx> >> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> >> --- >> arch/arm64/include/asm/kvm_asm.h | 28 ++++++-- >> arch/arm64/include/asm/kvm_host.h | 3 + >> arch/arm64/kvm/sys_regs.c | 130 +++++++++++++++++++++++++++++++++++++- >> 3 files changed, 151 insertions(+), 10 deletions(-) >> >> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h >> index 9fcd54b..e6b159a 100644 >> --- a/arch/arm64/include/asm/kvm_asm.h >> +++ b/arch/arm64/include/asm/kvm_asm.h >> @@ -43,14 +43,25 @@ >> #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ >> #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ >> #define PAR_EL1 21 /* Physical Address Register */ >> +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ >> +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ >> +#define DBGBCR15_EL1 38 >> +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ >> +#define DBGBVR15_EL1 54 >> +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ >> +#define DBGWCR15_EL1 70 >> +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ >> +#define DBGWVR15_EL1 86 >> +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ >> + >> /* 32bit specific registers. Keep them at the end of the range */ >> -#define DACR32_EL2 22 /* Domain Access Control Register */ >> -#define IFSR32_EL2 23 /* Instruction Fault Status Register */ >> -#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */ >> -#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */ >> -#define TEECR32_EL1 26 /* ThumbEE Configuration Register */ >> -#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */ >> -#define NR_SYS_REGS 28 >> +#define DACR32_EL2 88 /* Domain Access Control Register */ >> +#define IFSR32_EL2 89 /* Instruction Fault Status Register */ >> +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ >> +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ >> +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ >> +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ >> +#define NR_SYS_REGS 94 >> >> /* 32bit mapping */ >> #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ >> @@ -87,6 +98,9 @@ >> #define ARM_EXCEPTION_IRQ 0 >> #define ARM_EXCEPTION_TRAP 1 >> >> +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 >> +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) >> + >> #ifndef __ASSEMBLY__ >> struct kvm; >> struct kvm_vcpu; >> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h >> index 0a1d697..4737961 100644 >> --- a/arch/arm64/include/asm/kvm_host.h >> +++ b/arch/arm64/include/asm/kvm_host.h >> @@ -101,6 +101,9 @@ struct kvm_vcpu_arch { >> /* Exception Information */ >> struct kvm_vcpu_fault_info fault; >> >> + /* Debug state */ >> + u64 debug_flags; >> + >> /* Pointer to host CPU context */ >> kvm_cpu_context_t *host_cpu_context; >> >> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c >> index c3d28f1..d46a965 100644 >> --- a/arch/arm64/kvm/sys_regs.c >> +++ b/arch/arm64/kvm/sys_regs.c >> @@ -30,6 +30,7 @@ >> #include <asm/kvm_mmu.h> >> #include <asm/cacheflush.h> >> #include <asm/cputype.h> >> +#include <asm/debug-monitors.h> >> #include <trace/events/kvm.h> >> >> #include "sys_regs.h" >> @@ -173,6 +174,58 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, >> return read_zero(vcpu, p); >> } >> >> +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, >> + const struct sys_reg_params *p, >> + const struct sys_reg_desc *r) >> +{ >> + if (p->is_write) { >> + return ignore_write(vcpu, p); >> + } else { >> + *vcpu_reg(vcpu, p->Rt) = (1 << 3); >> + return true; >> + } >> +} >> + >> +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, >> + const struct sys_reg_params *p, >> + const struct sys_reg_desc *r) >> +{ >> + if (p->is_write) { >> + return ignore_write(vcpu, p); >> + } else { >> + *vcpu_reg(vcpu, p->Rt) = 0x2222; /* Implemented and disabled */ > > is this always safe? What happens when you stop trapping accesses to > this register and the hardware tells you something different? > > Are we assuming that this is always the case since otherwise none of > this works, or? No, that's probably a leftover from a previous implementation that didn't disable traps. I can make it return the actual value on the host, but we may see it change with migration anyway. >> + return true; >> + } >> +} >> + >> +/* >> + * Trap handler for DBG[BW][CV]Rn_EL1 and MDSCR_EL1. We track the >> + * "dirtiness" of the registers. >> + */ >> +static bool trap_debug_regs(struct kvm_vcpu *vcpu, >> + const struct sys_reg_params *p, >> + const struct sys_reg_desc *r) >> +{ >> + /* >> + * The best thing to do would be to trap MDSCR_EL1 >> + * independently, test if DBG_MDSCR_KDE or DBG_MDSCR_MDE is >> + * getting set, and only set the DIRTY bit in that case. > > this comment is really hard to understand in this patch without any > explanation of what the dirty flag does. Readers new to this code may > be in the same situation. Perhaps add a comment on the dirty bit (what > does this imply?) or explain the rationale here; iow. We want to avoid > world-switching all the DBG registers all the time, blah blah blah... Sure. >> + * >> + * Unfortunately, "old" Linux kernels tend to hit MDSCR_EL1 >> + * like a woodpecker on a tree, and it is better to disable >> + * trapping as soon as possible in this case. Some day, make >> + * this a tuneable... >> + */ >> + if (p->is_write) { >> + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); >> + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; >> + } else { >> + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); >> + } >> + >> + return true; >> +} >> + >> static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) >> { >> u64 amair; >> @@ -189,6 +242,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) >> vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); >> } >> >> +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go*/ >> +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ >> + /* DBGBVRn_EL1 */ \ >> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ >> + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ > > Shouldn't the reg field here be DBGBVR0_EL1? > > >> + /* DBGBCRn_EL1 */ \ >> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ >> + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ > > Shouldn't the reg field here be DBGBCR0_EL1? > >> + /* DBGWVRn_EL1 */ \ >> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ >> + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }, \ > > and DBGWVR0_EL1 here? > >> + /* DBGWCRn_EL1 */ \ >> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ >> + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 } > > and DBGWCR0_EL1 here? Blah. Nice catch! >> + >> /* >> * Architected system registers. >> * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 >> @@ -200,9 +268,6 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) >> * Therefore we tell the guest we have 0 counters. Unfortunately, we >> * must always support PMCCNTR (the cycle counter): we just RAZ/WI for >> * all PM registers, which doesn't crash the guest kernel at least. >> - * >> - * Same goes for the whole debug infrastructure, which probably breaks >> - * some guest functionnality. This should be fixed. >> */ >> static const struct sys_reg_desc sys_reg_descs[] = { >> /* DC ISW */ >> @@ -215,12 +280,71 @@ static const struct sys_reg_desc sys_reg_descs[] = { >> { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), >> access_dcsw }, >> >> + DBG_BCR_BVR_WCR_WVR_EL1(0), >> + DBG_BCR_BVR_WCR_WVR_EL1(1), >> + /* MDCCINT_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), >> + trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, >> + /* MDSCR_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), >> + trap_debug_regs, reset_val, MDSCR_EL1, 0 }, >> + DBG_BCR_BVR_WCR_WVR_EL1(2), >> + DBG_BCR_BVR_WCR_WVR_EL1(3), >> + DBG_BCR_BVR_WCR_WVR_EL1(4), >> + DBG_BCR_BVR_WCR_WVR_EL1(5), >> + DBG_BCR_BVR_WCR_WVR_EL1(6), >> + DBG_BCR_BVR_WCR_WVR_EL1(7), >> + DBG_BCR_BVR_WCR_WVR_EL1(8), >> + DBG_BCR_BVR_WCR_WVR_EL1(9), >> + DBG_BCR_BVR_WCR_WVR_EL1(10), >> + DBG_BCR_BVR_WCR_WVR_EL1(11), >> + DBG_BCR_BVR_WCR_WVR_EL1(12), >> + DBG_BCR_BVR_WCR_WVR_EL1(13), >> + DBG_BCR_BVR_WCR_WVR_EL1(14), >> + DBG_BCR_BVR_WCR_WVR_EL1(15), >> + >> + /* MDRAR_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), >> + trap_raz_wi }, >> + /* OSLAR_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), >> + trap_raz_wi }, > > so as long as you're trapping, if the guest writes to OSLK[1] and sets > the OS lock then it won't actually lock it, because when you read it > back from OSLSR_EL1 it will read as unlocked? Is that in line with the > architecture? The OSlock can always be cleared by external debug at any time, so I imagine that would be a valid implementation. >> + /* OSLSR_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), >> + trap_oslsr_el1 }, >> + /* OSDLR_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), >> + trap_raz_wi }, >> + /* DBGPRCR_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), >> + trap_raz_wi }, >> + /* DBGCLAIMSET_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), >> + trap_raz_wi }, >> + /* DBGCLAIMCLR_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), >> + trap_raz_wi }, >> + /* DBGAUTHSTATUS_EL1 */ >> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), >> + trap_dbgauthstatus_el1 }, >> + >> /* TEECR32_EL1 */ >> { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), >> NULL, reset_val, TEECR32_EL1, 0 }, >> /* TEEHBR32_EL1 */ >> { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), >> NULL, reset_val, TEEHBR32_EL1, 0 }, >> + >> + /* MDCCSR_EL1 */ >> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), >> + trap_raz_wi }, >> + /* DBGDTR_EL0 */ >> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), >> + trap_raz_wi }, >> + /* DBGDTR[TR]X_EL0 */ >> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), >> + trap_raz_wi }, >> + >> /* DBGVCR32_EL2 */ >> { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), >> NULL, reset_val, DBGVCR32_EL2, 0 }, >> -- >> 1.8.3.4 >> > Thanks, M. -- Jazz is not dead. It just smells funny... -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html