On Thu, May 07, 2015 at 10:07:12AM +0100, Alex Bennée wrote: > This adds support for userspace to control the HW debug registers for > guest debug. In the debug ioctl we copy the IMPDEF defined number of > registers into a new register set called host_debug_state. There is now > a new vcpu parameter called debug_ptr which selects which register set > is to copied into the real registers when world switch occurs. > > I've moved some helper functions into the hw_breakpoint.h header for > re-use. > > As with single step we need to tweak the guest registers to enable the > exceptions so we need to save and restore those bits. > > Two new capabilities have been added to the KVM_EXTENSION ioctl to allow > userspace to query the number of hardware break and watch points > available on the host hardware. > > Signed-off-by: Alex Bennée <alex.bennee@xxxxxxxxxx> > > --- > v2 > - switched to C setup > - replace host debug registers directly into context > - minor tweak to api docs > - setup right register for debug > - add FAR_EL2 to debug exit structure > - add support for trapping debug register access > v3 > - remove stray trace statement > - fix spacing around operators (various) > - clean-up usage of trap_debug > - introduce debug_ptr, replace excessive memcpy stuff > - don't use memcpy in ioctl, just assign > - update cap ioctl documentation > - reword a number comments > - rename host_debug_state->external_debug_state > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index 5ef937c..419f7a8 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -2668,7 +2668,7 @@ The top 16 bits of the control field are architecture specific control > flags which can include the following: > > - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64] > - - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390] > + - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64] > - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86] > - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86] > - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390] > @@ -2683,6 +2683,11 @@ updated to the correct (supplied) values. > The second part of the structure is architecture specific and > typically contains a set of debug registers. > > +For arm64 the number of debug registers is implementation defined and > +can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and > +KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which returns a +ve number s/returns/return/ s/+ve/positive/ > +indicating the number of supported registers. > + > When debug events exit the main run loop with the reason > KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run > structure containing architecture specific debug information. > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 9b3ed6d..2920185 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -279,6 +279,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > /* Set up the timer */ > kvm_timer_vcpu_init(vcpu); > > + /* Set the debug registers to be the guests */ > + vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *) > + &vcpu_sys_reg(vcpu, DBGBCR0_EL1); > + yikes, I don't like this cast, how bad is it to get rid of the debug registers in the sys_regs array ? Also, pretty sure this is part of the breakage for the 32-bit build... > return 0; > } > > @@ -304,6 +308,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > > #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ > KVM_GUESTDBG_USE_SW_BP | \ > + KVM_GUESTDBG_USE_HW_BP | \ > KVM_GUESTDBG_SINGLESTEP) > > /** > @@ -324,6 +329,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, > > if (dbg->control & KVM_GUESTDBG_ENABLE) { > vcpu->guest_debug = dbg->control; > + > + /* Hardware assisted Break and Watch points */ > + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { is this only breakpoints or breakpoints and watch points? > + vcpu->arch.external_debug_state = dbg->arch; > + } > + > } else { > /* If not enabled clear all flags */ > vcpu->guest_debug = 0; > diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h > index 52b484b..c450552 100644 > --- a/arch/arm64/include/asm/hw_breakpoint.h > +++ b/arch/arm64/include/asm/hw_breakpoint.h > @@ -130,6 +130,18 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) > } > #endif > > +/* Determine number of BRP registers available. */ > +static inline int get_num_brps(void) > +{ > + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; > +} > + > +/* Determine number of WRP registers available. */ > +static inline int get_num_wrps(void) > +{ > + return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; > +} > + I will need an ack from Catalin/Will to merge this. It may be better to move these functions in a separate patch. > extern struct pmu perf_ops_bp; > > #endif /* __KERNEL__ */ > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index b60fa7a..a44fb32 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -108,9 +108,18 @@ struct kvm_vcpu_arch { > /* Exception Information */ > struct kvm_vcpu_fault_info fault; > > - /* Debug state */ > + /* Guest debug state */ > u64 debug_flags; > > + /* > + * For debugging the guest we need to keep a set of debug > + * registers which can override the guests own debug state s/guests/guest's/ > + * while being used. These are set via the KVM_SET_GUEST_DEBUG > + * ioctl. > + */ > + struct kvm_guest_debug_arch *debug_ptr; > + struct kvm_guest_debug_arch external_debug_state; > + > /* Pointer to host CPU context */ > kvm_cpu_context_t *host_cpu_context; > > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h > index 04957d7..98e82ef 100644 > --- a/arch/arm64/include/uapi/asm/kvm.h > +++ b/arch/arm64/include/uapi/asm/kvm.h > @@ -121,7 +121,7 @@ struct kvm_guest_debug_arch { > > struct kvm_debug_exit_arch { > __u32 hsr; > - __u64 far; > + __u64 far; /* used for watchpoints */ seems strange to amend this now? > }; > > struct kvm_sync_regs { > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > index ce7b7dd..671ab13 100644 > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -116,6 +116,7 @@ int main(void) > DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); > DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); > DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); > + DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); > DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); > DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); > DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); > diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c > index e7d934d..3a41bbf 100644 > --- a/arch/arm64/kernel/hw_breakpoint.c > +++ b/arch/arm64/kernel/hw_breakpoint.c > @@ -49,18 +49,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp); > static int core_num_brps; > static int core_num_wrps; > > -/* Determine number of BRP registers available. */ > -static int get_num_brps(void) > -{ > - return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; > -} > - > -/* Determine number of WRP registers available. */ > -static int get_num_wrps(void) > -{ > - return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; > -} > - > int hw_breakpoint_slots(int type) > { > /* > diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c > index 19346e8..1ab63dd 100644 > --- a/arch/arm64/kvm/debug.c > +++ b/arch/arm64/kvm/debug.c > @@ -99,12 +99,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) > MDCR_EL2_TDRA | > MDCR_EL2_TDOSA); > > - /* Trap on access to debug registers? */ > - if (trap_debug) > - vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; > - else > - vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA; > - > /* Is Guest debugging in effect? */ > if (vcpu->guest_debug) { > vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE; > @@ -128,14 +122,54 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) > vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS; > } > > + /* > + * HW Break/Watch points > + * > + * We simply switch the debug_ptr to point to our new > + * external_debug_state which has been populated by the > + * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY > + * mechanism ensures the registers are updated on the > + * world switch. > + */ > + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { > + > + vcpu_sys_reg(vcpu, MDSCR_EL1) |= > + (DBG_MDSCR_KDE | DBG_MDSCR_MDE); Why do we need to set these two bits? Is it obvious or does it deserve a comment? > + > + vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; > + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; > + trap_debug = true; > + } > + > } else { > /* Debug operations can go straight to the guest */ > vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDE; > } > + > + /* > + * If the guest debug register state is dirty (the guest is > + * actively accessing them), then we context-switch the > + * registers in EL2. Otherwise, we trap-and-emulate all guest > + * accesses to them. > + */ I think this comment now feels strange, because it was explaining why we would set the trap_debug variable when the dirty flag was set, but the code just sets TDA when trap_debug is set. So you should either move this comment to the top of the function and have it above a separate line that sets trap_debug based on KVM_ARM64_DEBUG_DIRTY (instead of initializing at declaration), or you should explain which conditions set trap_debug (guest is using the regs or we are debugging the guest), or just get rid of the comment. > + if (trap_debug) > + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; > + else > + vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA; still don't need the else. > } > > void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) > { > - if (vcpu->guest_debug) > + if (vcpu->guest_debug) { > restore_guest_debug_regs(vcpu); > + > + /* > + * If we were using HW debug we need to restore the > + * debug_ptr to the guest debug state. > + */ > + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { > + vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *) > + &vcpu_sys_reg(vcpu, DBGBCR0_EL1); > + } I would find it easier to follow the code if you only configure the debug_ptr in kvm_arm_setup_debug() because it feels like you're setting up state here which will not be used before in a very long time (after handle_exit, exit to userspace etc.). > + } > } > diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c > index e9de13e..68a0759 100644 > --- a/arch/arm64/kvm/handle_exit.c > +++ b/arch/arm64/kvm/handle_exit.c > @@ -103,7 +103,11 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) > run->debug.arch.hsr = hsr; > > switch (hsr >> ESR_ELx_EC_SHIFT) { > + case ESR_ELx_EC_WATCHPT_LOW: > + run->debug.arch.far = vcpu->arch.fault.far_el2; > + /* fall through */ > case ESR_ELx_EC_SOFTSTP_LOW: > + case ESR_ELx_EC_BREAKPT_LOW: > case ESR_ELx_EC_BKPT32: > case ESR_ELx_EC_BRK64: > break; > @@ -132,6 +136,8 @@ static exit_handle_fn arm_exit_handlers[] = { > [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, > [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, > [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug, > + [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug, > + [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug, > [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug, > [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug, > }; > diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S > index dd51fb1..921d248 100644 > --- a/arch/arm64/kvm/hyp.S > +++ b/arch/arm64/kvm/hyp.S > @@ -706,7 +706,8 @@ ENTRY(__kvm_vcpu_run) > bl __restore_fpsimd > > skip_debug_state x3, 1f > - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) > + ldr x3, [x0, #VCPU_DEBUG_PTR] > + kern_hyp_va x3 > bl __restore_debug > 1: > restore_guest_32bit_state > @@ -727,7 +728,8 @@ __kvm_vcpu_return: > bl __save_sysregs > > skip_debug_state x3, 1f > - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) > + ldr x3, [x0, #VCPU_DEBUG_PTR] > + kern_hyp_va x3 > bl __save_debug > 1: > save_guest_32bit_state > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c > index 0b43265..21d5a62 100644 > --- a/arch/arm64/kvm/reset.c > +++ b/arch/arm64/kvm/reset.c > @@ -56,6 +56,12 @@ static bool cpu_has_32bit_el1(void) > return !!(pfr0 & 0x20); > } > > +/** > + * kvm_arch_dev_ioctl_check_extension > + * > + * We currently assume that the number of HW registers is uniform > + * across all CPUs (see cpuinfo_sanity_check). > + */ > int kvm_arch_dev_ioctl_check_extension(long ext) > { > int r; > @@ -64,6 +70,12 @@ int kvm_arch_dev_ioctl_check_extension(long ext) > case KVM_CAP_ARM_EL1_32BIT: > r = cpu_has_32bit_el1(); > break; > + case KVM_CAP_GUEST_DEBUG_HW_BPS: > + r = get_num_brps(); > + break; > + case KVM_CAP_GUEST_DEBUG_HW_WPS: > + r = get_num_wrps(); > + break; > default: > r = 0; > } > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 3b6252e..923c2aa 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -825,6 +825,8 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_S390_INJECT_IRQ 113 > #define KVM_CAP_S390_IRQ_STATE 114 > #define KVM_CAP_PPC_HWRNG 115 > +#define KVM_CAP_GUEST_DEBUG_HW_BPS 116 > +#define KVM_CAP_GUEST_DEBUG_HW_WPS 117 > > #ifdef KVM_CAP_IRQ_ROUTING > > -- > 2.3.5 > Thanks, -Christoffer -- To unsubscribe from this list: send the line "unsubscribe linux-doc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html