On 02.05.2013, at 11:46, Bhushan Bharat-R65777 wrote: > > >> -----Original Message----- >> From: Alexander Graf [mailto:agraf@xxxxxxx] >> Sent: Friday, April 26, 2013 4:46 PM >> To: Bhushan Bharat-R65777 >> Cc: kvm-ppc@xxxxxxxxxxxxxxx; kvm@xxxxxxxxxxxxxxx; Wood Scott-B07421; Bhushan >> Bharat-R65777 >> Subject: Re: [PATCH 7/7 v3] KVM: PPC: Add userspace debug stub support >> >> >> On 08.04.2013, at 12:32, Bharat Bhushan wrote: >> >>> From: Bharat Bhushan <bharat.bhushan@xxxxxxxxxxxxx> >>> >>> This patch adds the debug stub support on booke/bookehv. >>> Now QEMU debug stub can use hw breakpoint, watchpoint and software >>> breakpoint to debug guest. >>> >>> Debug registers are saved/restored on vcpu_put()/vcpu_get(). >>> Also the debug registers are saved restored only if guest >>> is using debug resources. >>> >>> Currently we do not support debug resource emulation to guest, >>> so always exit to user space irrespective of user space is expecting >>> the debug exception or not. This is unexpected event and let us >>> leave the action on user space. This is similar to what it was before, >>> only thing is that now we have proper exit state available to user space. >>> >>> Signed-off-by: Bharat Bhushan <bharat.bhushan@xxxxxxxxxxxxx> >>> --- >>> arch/powerpc/include/asm/kvm_host.h | 8 + >>> arch/powerpc/include/uapi/asm/kvm.h | 22 +++- >>> arch/powerpc/kvm/booke.c | 242 ++++++++++++++++++++++++++++++++--- >>> arch/powerpc/kvm/booke.h | 5 + >>> 4 files changed, 255 insertions(+), 22 deletions(-) >>> >>> diff --git a/arch/powerpc/include/asm/kvm_host.h >> b/arch/powerpc/include/asm/kvm_host.h >>> index e34f8fe..b9ad20f 100644 >>> --- a/arch/powerpc/include/asm/kvm_host.h >>> +++ b/arch/powerpc/include/asm/kvm_host.h >>> @@ -505,7 +505,15 @@ struct kvm_vcpu_arch { >>> u32 mmucfg; >>> u32 epr; >>> u32 crit_save; >>> + >>> + /* Flag indicating that debug registers are used by guest */ >>> + bool debug_active; >>> + /* for save/restore thread->dbcr0 on vcpu run/heavyweight_exit */ >>> + u32 saved_dbcr0; >>> + /* guest debug registers*/ >>> struct kvmppc_booke_debug_reg dbg_reg; >>> + /* shadow debug registers */ >>> + struct kvmppc_booke_debug_reg shadow_dbg_reg; >>> #endif >>> gpa_t paddr_accessed; >>> gva_t vaddr_accessed; >>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h >> b/arch/powerpc/include/uapi/asm/kvm.h >>> index c0c38ed..d7ce449 100644 >>> --- a/arch/powerpc/include/uapi/asm/kvm.h >>> +++ b/arch/powerpc/include/uapi/asm/kvm.h >>> @@ -25,6 +25,7 @@ >>> /* Select powerpc specific features in <linux/kvm.h> */ >>> #define __KVM_HAVE_SPAPR_TCE >>> #define __KVM_HAVE_PPC_SMT >>> +#define __KVM_HAVE_GUEST_DEBUG >>> >>> struct kvm_regs { >>> __u64 pc; >>> @@ -267,7 +268,24 @@ struct kvm_fpu { >>> __u64 fpr[32]; >>> }; >>> >>> +/* >>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and >>> + * software breakpoint. >>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status" >>> + * for KVM_DEBUG_EXIT. >>> + */ >>> +#define KVMPPC_DEBUG_NONE 0x0 >>> +#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) >>> +#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) >>> +#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) >>> struct kvm_debug_exit_arch { >>> + __u64 address; >>> + /* >>> + * exiting to userspace because of h/w breakpoint, watchpoint >>> + * (read, write or both) and software breakpoint. >>> + */ >>> + __u32 status; >>> + __u32 reserved; >>> }; >>> >>> /* for KVM_SET_GUEST_DEBUG */ >>> @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch { >>> * Type denotes h/w breakpoint, read watchpoint, write >>> * watchpoint or watchpoint (both read and write). >>> */ >>> -#define KVMPPC_DEBUG_NONE 0x0 >>> -#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) >>> -#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) >>> -#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) >>> __u32 type; >>> __u32 reserved; >>> } bp[16]; >>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c >>> index 97ae158..0e93416 100644 >>> --- a/arch/powerpc/kvm/booke.c >>> +++ b/arch/powerpc/kvm/booke.c >>> @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) >>> #endif >>> } >>> >>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) >>> +{ >>> + /* Synchronize guest's desire to get debug interrupts into shadow MSR */ >>> +#ifndef CONFIG_KVM_BOOKE_HV >>> + vcpu->arch.shadow_msr &= ~MSR_DE; >>> + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; >>> +#endif >>> + >>> + /* Force enable debug interrupts when user space wants to debug */ >>> + if (vcpu->guest_debug) { >>> +#ifdef CONFIG_KVM_BOOKE_HV >>> + /* >>> + * Since there is no shadow MSR, sync MSR_DE into the guest >>> + * visible MSR. >>> + */ >>> + vcpu->arch.shared->msr |= MSR_DE; >>> +#else >>> + vcpu->arch.shadow_msr |= MSR_DE; >>> + vcpu->arch.shared->msr &= ~MSR_DE; >>> +#endif >>> + } >>> +} >>> + >>> /* >>> * Helper function for "full" MSR writes. No need to call this if only >>> * EE/CE/ME/DE/RI are changing. >>> @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) >>> kvmppc_mmu_msr_notify(vcpu, old_msr); >>> kvmppc_vcpu_sync_spe(vcpu); >>> kvmppc_vcpu_sync_fpu(vcpu); >>> + kvmppc_vcpu_sync_debug(vcpu); >>> } >>> >>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, >>> @@ -646,6 +670,46 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) >>> return r; >>> } >>> >>> +static void kvmppc_load_usespace_gebug(void) >> >> ... > > Please tell What does "..." mean. Does that mean next comment is about the above function? It means "this one is so obvious I don't even have to write anything here". Usespace? Gebug? Seriously? :) > >> >>> +{ >>> + switch_booke_debug_regs(¤t->thread); >>> +} >>> + >>> +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu *vcpu) >>> +{ >>> + if (!vcpu->arch.debug_active) >>> + return; >>> + >>> + /* Disable all debug events and clead pending debug events */ >>> + mtspr(SPRN_DBCR0, 0x0); >>> + kvmppc_clear_dbsr(); >>> + >>> + /* >>> + * Check whether guest still need debug resource, if not then there >>> + * is no need to restore guest context. >>> + */ >>> + if (!vcpu->arch.shadow_dbg_reg.dbcr0) >>> + return; >>> + >>> + /* Load Guest Context */ >>> + mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1); >>> + mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); >>> +#ifdef CONFIG_KVM_E500MC >>> + mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); >> >> You need to make sure DBCR4 is 0 when you leave things back to normal user >> space. Otherwise guest debug can interfere with host debug. > > > ok > >> >>> +#endif >>> + mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]); >>> + mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]); >>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 >>> + mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]); >>> + mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]); >>> +#endif >>> + mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]); >>> + mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]); >>> + >>> + /* Enable debug events after other debug registers restored */ >>> + mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); >>> +} >> >> All of the code above looks suspiciously similar to prime_debug_regs();. Can't >> we somehow reuse that? > > I think we can if > - Save thread->debug_regs in local data structure Yes, it can even be on the stack. > - Load vcpu->arch->debug_regs in thread->debug_regs > - Call prime_debug_regs(); > - Restore thread->debug_regs from local save values in first step On heavyweight exit, based on the values on stack, yes. > >> >>> + >>> int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) >>> { >>> int ret, s; >>> @@ -693,11 +757,25 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct >> kvm_vcpu *vcpu) >>> kvmppc_load_guest_fp(vcpu); >>> #endif >>> >>> + /* >>> + * Clear current->thread.dbcr0 so that kernel does not >>> + * restore h/w registers on context switch in vcpu running state. >>> + */ >>> + vcpu->arch.debug_active = 1; >> >> = true; > > Ok > >> >>> + vcpu->arch.saved_dbcr0 = current->thread.dbcr0; >>> + current->thread.dbcr0 = 0; >>> + kvmppc_booke_vcpu_load_debug_regs(vcpu); >> >> static void switch_booke_debug_regs(struct thread_struct *new_thread) >> { >> if ((current->thread.dbcr0 & DBCR0_IDM) >> || (new_thread->dbcr0 & DBCR0_IDM)) >> prime_debug_regs(new_thread); >> } >> >> The kernel will also restore debug state if the process we come from has >> debugging enabled. Please adjust the comment accordingly. > > kvmppc_booke_vcpu_load_debug_regs(vcpu); cleares DBSR and DBCR0, even if previous process have used debug registers. Is not that sufficient? I do not think we should load all other registers. It's sufficient, but the comment is wrong. > >> >>> + >>> ret = __kvmppc_vcpu_run(kvm_run, vcpu); >>> >>> /* No need for kvm_guest_exit. It's done in handle_exit. >>> We also get here with interrupts enabled. */ >>> >>> + /* Restore thread->dbcr0 */ >>> + vcpu->arch.debug_active = 0; >>> + current->thread.dbcr0 = vcpu->arch.saved_dbcr0; >>> + kvmppc_load_usespace_gebug(); >>> + >>> #ifdef CONFIG_PPC_FPU >>> kvmppc_save_guest_fp(vcpu); >>> >>> @@ -753,6 +831,36 @@ static int emulation_exit(struct kvm_run *run, struct >> kvm_vcpu *vcpu) >>> } >>> } >>> >>> +/* >>> + * Currently we do not support debug resource emulation to guest, >>> + * so always exit to user space irrespective of user space is >>> + * expecting the debug exception or not. This is unexpected event >>> + * and let us leave the action on user space. >>> + */ >>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) >>> +{ >>> + u32 dbsr = mfspr(SPRN_DBSR); >>> + >>> + kvmppc_clear_dbsr(); >>> + run->debug.arch.status = 0; >>> + run->debug.arch.address = vcpu->arch.pc; >>> + >>> + if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { >>> + run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; >>> + } else { >>> + if (dbsr & (DBSR_DAC1W | DBSR_DAC2W)) >>> + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE; >>> + else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R)) >>> + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ; >>> + if (dbsr & (DBSR_DAC1R | DBSR_DAC1W)) >>> + run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0]; >>> + else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W)) >>> + run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1]; >>> + } >>> + >>> + return RESUME_HOST; >>> +} >>> + >>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) >>> { >>> ulong r1, ip, msr, lr; >>> @@ -1112,18 +1220,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct >> kvm_vcpu *vcpu, >>> } >>> >>> case BOOKE_INTERRUPT_DEBUG: { >>> - u32 dbsr; >>> - >>> - vcpu->arch.pc = mfspr(SPRN_CSRR0); >>> - >>> - /* clear IAC events in DBSR register */ >>> - dbsr = mfspr(SPRN_DBSR); >>> - dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; >>> - mtspr(SPRN_DBSR, dbsr); >>> - >>> - run->exit_reason = KVM_EXIT_DEBUG; >>> + r = kvmppc_handle_debug(run, vcpu); >>> + if (r == RESUME_HOST) >>> + run->exit_reason = KVM_EXIT_DEBUG; >>> kvmppc_account_exit(vcpu, DEBUG_EXITS); >>> - r = RESUME_HOST; >>> break; >>> } >>> >>> @@ -1174,7 +1274,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) >>> kvmppc_set_msr(vcpu, 0); >>> >>> #ifndef CONFIG_KVM_BOOKE_HV >>> - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; >>> + vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; >>> vcpu->arch.shadow_pid = 1; >>> vcpu->arch.shared->msr = 0; >>> #endif >>> @@ -1529,12 +1629,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, >> struct kvm_one_reg *reg) >>> return r; >>> } >>> >>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, >>> - struct kvm_guest_debug *dbg) >>> -{ >>> - return -EINVAL; >>> -} >>> - >>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) >>> { >>> return -ENOTSUPP; >>> @@ -1640,16 +1734,128 @@ void kvmppc_decrementer_func(unsigned long data) >>> kvmppc_set_tsr_bits(vcpu, TSR_DIS); >>> } >>> >>> +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) >>> +{ >>> + /* Disable all debug events First */ >> >> first >> >>> + mtspr(SPRN_DBCR0, 0x0); >>> + /* Disable pending debug event by clearing DBSR */ >>> + kvmppc_clear_dbsr(); >> >> kvmppc_handle_debug() happens with preemption enabled, no? > > Want to clarify, preemption will be enabled on calling local_irq_enable(); in kvmppc_handle_exit()? Yes. Implicitly :). > >> So we can have a >> debug event that gets cleared on preempt by this. > > Should we read the DBSR in before local_irq_enable() in kvmppc_handle_exit()? We have to, yes. Otherwise we could get preempted in between and get bogus data I presume. > >> >>> +} >>> + >>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, >>> + struct kvm_guest_debug *dbg) >>> +{ >>> + struct kvmppc_booke_debug_reg *dbg_reg; >>> + int n, b = 0, w = 0; >>> + const u32 bp_code[] = { >>> + DBCR0_IAC1 | DBCR0_IDM, >>> + DBCR0_IAC2 | DBCR0_IDM, >>> + DBCR0_IAC3 | DBCR0_IDM, >>> + DBCR0_IAC4 | DBCR0_IDM >>> + }; >>> + const u32 wp_code[] = { >>> + DBCR0_DAC1W | DBCR0_IDM, >>> + DBCR0_DAC2W | DBCR0_IDM, >>> + DBCR0_DAC1R | DBCR0_IDM, >>> + DBCR0_DAC2R | DBCR0_IDM >>> + }; >>> + >>> + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { >>> + /* Clear All debug events */ >>> + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; >>> + vcpu->guest_debug = 0; >>> +#ifdef CONFIG_KVM_BOOKE_HV >>> + /* >>> + * When user space is not using the debug resources >>> + * then allow guest to change the MSR.DE. >>> + */ >>> + vcpu->arch.shadow_msrp &= ~MSRP_DEP; >>> +#endif >>> + return 0; >>> + } >>> + >>> +#ifdef CONFIG_KVM_BOOKE_HV >>> + /* >>> + * When user space is using the debug resource then >>> + * do not allow guest to change the MSR.DE. >>> + */ >>> + vcpu->arch.shadow_msrp &= ~MSRP_DEP; >> >> This is supposed to be |= right? > > Yes :-) > >> >>> +#endif >>> + vcpu->guest_debug = dbg->control; >>> + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; >>> + /* Set DBCR0_EDM in guest visible DBCR0 register. */ >>> + vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; >>> + >>> + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) >>> + vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; >>> + >>> + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) >>> + /* Code below handles only HW breakpoints */ >> >> Please move the comment out of the one-lined branch. Just put it below the >> return. > > Ok > >> >>> + return 0; >>> + >>> + dbg_reg = &(vcpu->arch.shadow_dbg_reg); >>> + >>> + /* >>> + * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events >>> + * to occur when MSR.PR is set. >>> + * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we >>> + * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control >>> + * that debug events will not come in hypervisor (GS = 0). >>> + */ >>> +#ifdef CONFIG_KVM_BOOKE_HV >>> + dbg_reg->dbcr1 = 0; >>> + dbg_reg->dbcr2 = 0; >>> +#else >>> + dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | >>> + DBCR1_IAC4US; >>> + dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; >>> +#endif >>> + >>> + for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { >>> + u32 type = dbg->arch.bp[n].type; >>> + >>> + if (!type) >>> + continue; >> >> Scott's comment on why a zero-type is different from any other invalid type is >> still outstanding I think. > > Userspce does following > - dbg->arch.bp[] is first zero initialized. So dbg->arch.bp[n].type is 0 (KVMPPC_DEBUG_NONE) > - Then set following for valid breakpoints/watchpoints: > -- dbg->arch.bp[n].type (KVMPPC_DEBUG_BREAKPOINT or KVMPPC_DEBUG_WATCH_WRITE or KVMPPC_DEBUG_WATCH_READ) > -- dbg->arch.bp[n].addr > > I tried to avoid loop when type is 0 (probably saying type == KVMPPC_DEBUG_NONE should be more clear). That works, yes. > if type is invalid then should we return -EINVAL. Yes. Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html