On Tue, May 20, 2014 at 05:55:39PM +0100, Marc Zyngier wrote: > Add handlers for all the AArch64 debug registers that are accessible > from EL0 or EL1. The trapping code keeps track of the state of the > debug registers, allowing for the switch code to implement a lazy > switching strategy. > > Reviewed-by: Anup Patel <anup.patel@xxxxxxxxxx> > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> > --- > arch/arm64/include/asm/kvm_asm.h | 28 ++++++-- > arch/arm64/include/asm/kvm_host.h | 3 + > arch/arm64/kvm/sys_regs.c | 130 +++++++++++++++++++++++++++++++++++++- > 3 files changed, 151 insertions(+), 10 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h > index 9fcd54b..e6b159a 100644 > --- a/arch/arm64/include/asm/kvm_asm.h > +++ b/arch/arm64/include/asm/kvm_asm.h > @@ -43,14 +43,25 @@ > #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ > #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ > #define PAR_EL1 21 /* Physical Address Register */ > +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ > +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ > +#define DBGBCR15_EL1 38 > +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ > +#define DBGBVR15_EL1 54 > +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ > +#define DBGWCR15_EL1 70 > +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ > +#define DBGWVR15_EL1 86 > +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ > + > /* 32bit specific registers. Keep them at the end of the range */ > -#define DACR32_EL2 22 /* Domain Access Control Register */ > -#define IFSR32_EL2 23 /* Instruction Fault Status Register */ > -#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */ > -#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */ > -#define TEECR32_EL1 26 /* ThumbEE Configuration Register */ > -#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */ > -#define NR_SYS_REGS 28 > +#define DACR32_EL2 88 /* Domain Access Control Register */ > +#define IFSR32_EL2 89 /* Instruction Fault Status Register */ > +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ > +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ > +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ > +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ > +#define NR_SYS_REGS 94 > > /* 32bit mapping */ > #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ > @@ -87,6 +98,9 @@ > #define ARM_EXCEPTION_IRQ 0 > #define ARM_EXCEPTION_TRAP 1 > > +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 > +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) > + > #ifndef __ASSEMBLY__ > struct kvm; > struct kvm_vcpu; > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 0a1d697..4737961 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -101,6 +101,9 @@ struct kvm_vcpu_arch { > /* Exception Information */ > struct kvm_vcpu_fault_info fault; > > + /* Debug state */ > + u64 debug_flags; > + > /* Pointer to host CPU context */ > kvm_cpu_context_t *host_cpu_context; > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index c3d28f1..d46a965 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -30,6 +30,7 @@ > #include <asm/kvm_mmu.h> > #include <asm/cacheflush.h> > #include <asm/cputype.h> > +#include <asm/debug-monitors.h> > #include <trace/events/kvm.h> > > #include "sys_regs.h" > @@ -173,6 +174,58 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, > return read_zero(vcpu, p); > } > > +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, > + const struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) { > + return ignore_write(vcpu, p); > + } else { > + *vcpu_reg(vcpu, p->Rt) = (1 << 3); > + return true; > + } > +} > + > +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, > + const struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) { > + return ignore_write(vcpu, p); > + } else { > + *vcpu_reg(vcpu, p->Rt) = 0x2222; /* Implemented and disabled */ is this always safe? What happens when you stop trapping accesses to this register and the hardware tells you something different? Are we assuming that this is always the case since otherwise none of this works, or? > + return true; > + } > +} > + > +/* > + * Trap handler for DBG[BW][CV]Rn_EL1 and MDSCR_EL1. We track the > + * "dirtiness" of the registers. > + */ > +static bool trap_debug_regs(struct kvm_vcpu *vcpu, > + const struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + /* > + * The best thing to do would be to trap MDSCR_EL1 > + * independently, test if DBG_MDSCR_KDE or DBG_MDSCR_MDE is > + * getting set, and only set the DIRTY bit in that case. this comment is really hard to understand in this patch without any explanation of what the dirty flag does. Readers new to this code may be in the same situation. Perhaps add a comment on the dirty bit (what does this imply?) or explain the rationale here; iow. We want to avoid world-switching all the DBG registers all the time, blah blah blah... > + * > + * Unfortunately, "old" Linux kernels tend to hit MDSCR_EL1 > + * like a woodpecker on a tree, and it is better to disable > + * trapping as soon as possible in this case. Some day, make > + * this a tuneable... > + */ > + if (p->is_write) { > + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); > + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; > + } else { > + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); > + } > + > + return true; > +} > + > static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) > { > u64 amair; > @@ -189,6 +242,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) > vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); > } > > +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go*/ > +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ > + /* DBGBVRn_EL1 */ \ > + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ > + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ Shouldn't the reg field here be DBGBVR0_EL1? > + /* DBGBCRn_EL1 */ \ > + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ > + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ Shouldn't the reg field here be DBGBCR0_EL1? > + /* DBGWVRn_EL1 */ \ > + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ > + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }, \ and DBGWVR0_EL1 here? > + /* DBGWCRn_EL1 */ \ > + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ > + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 } and DBGWCR0_EL1 here? > + > /* > * Architected system registers. > * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 > @@ -200,9 +268,6 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) > * Therefore we tell the guest we have 0 counters. Unfortunately, we > * must always support PMCCNTR (the cycle counter): we just RAZ/WI for > * all PM registers, which doesn't crash the guest kernel at least. > - * > - * Same goes for the whole debug infrastructure, which probably breaks > - * some guest functionnality. This should be fixed. > */ > static const struct sys_reg_desc sys_reg_descs[] = { > /* DC ISW */ > @@ -215,12 +280,71 @@ static const struct sys_reg_desc sys_reg_descs[] = { > { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), > access_dcsw }, > > + DBG_BCR_BVR_WCR_WVR_EL1(0), > + DBG_BCR_BVR_WCR_WVR_EL1(1), > + /* MDCCINT_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), > + trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, > + /* MDSCR_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), > + trap_debug_regs, reset_val, MDSCR_EL1, 0 }, > + DBG_BCR_BVR_WCR_WVR_EL1(2), > + DBG_BCR_BVR_WCR_WVR_EL1(3), > + DBG_BCR_BVR_WCR_WVR_EL1(4), > + DBG_BCR_BVR_WCR_WVR_EL1(5), > + DBG_BCR_BVR_WCR_WVR_EL1(6), > + DBG_BCR_BVR_WCR_WVR_EL1(7), > + DBG_BCR_BVR_WCR_WVR_EL1(8), > + DBG_BCR_BVR_WCR_WVR_EL1(9), > + DBG_BCR_BVR_WCR_WVR_EL1(10), > + DBG_BCR_BVR_WCR_WVR_EL1(11), > + DBG_BCR_BVR_WCR_WVR_EL1(12), > + DBG_BCR_BVR_WCR_WVR_EL1(13), > + DBG_BCR_BVR_WCR_WVR_EL1(14), > + DBG_BCR_BVR_WCR_WVR_EL1(15), > + > + /* MDRAR_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), > + trap_raz_wi }, > + /* OSLAR_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), > + trap_raz_wi }, so as long as you're trapping, if the guest writes to OSLK[1] and sets the OS lock then it won't actually lock it, because when you read it back from OSLSR_EL1 it will read as unlocked? Is that in line with the architecture? > + /* OSLSR_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), > + trap_oslsr_el1 }, > + /* OSDLR_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), > + trap_raz_wi }, > + /* DBGPRCR_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), > + trap_raz_wi }, > + /* DBGCLAIMSET_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), > + trap_raz_wi }, > + /* DBGCLAIMCLR_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), > + trap_raz_wi }, > + /* DBGAUTHSTATUS_EL1 */ > + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), > + trap_dbgauthstatus_el1 }, > + > /* TEECR32_EL1 */ > { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), > NULL, reset_val, TEECR32_EL1, 0 }, > /* TEEHBR32_EL1 */ > { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), > NULL, reset_val, TEEHBR32_EL1, 0 }, > + > + /* MDCCSR_EL1 */ > + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), > + trap_raz_wi }, > + /* DBGDTR_EL0 */ > + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), > + trap_raz_wi }, > + /* DBGDTR[TR]X_EL0 */ > + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), > + trap_raz_wi }, > + > /* DBGVCR32_EL2 */ > { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), > NULL, reset_val, DBGVCR32_EL2, 0 }, > -- > 1.8.3.4 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html