On 21.03.2013, at 07:25, Bharat Bhushan wrote: > From: Bharat Bhushan <bharat.bhushan@xxxxxxxxxxxxx> > > This patch adds the debug stub support on booke/bookehv. > Now QEMU debug stub can use hw breakpoint, watchpoint and > software breakpoint to debug guest. > > Debug registers are saved/restored on vcpu_put()/vcpu_get(). > Also the debug registers are saved restored only if guest is using > debug resources. > > Signed-off-by: Bharat Bhushan <bharat.bhushan@xxxxxxxxxxxxx> > --- > v2: > - save/restore in vcpu_get()/vcpu_put() > - some more minor cleanup based on review comments. > > arch/powerpc/include/asm/kvm_host.h | 10 ++ > arch/powerpc/include/uapi/asm/kvm.h | 22 +++- > arch/powerpc/kvm/booke.c | 252 ++++++++++++++++++++++++++++++++--- > arch/powerpc/kvm/e500_emulate.c | 10 ++ > 4 files changed, 272 insertions(+), 22 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index f4ba881..8571952 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -504,7 +504,17 @@ struct kvm_vcpu_arch { > u32 mmucfg; > u32 epr; > u32 crit_save; > + /* guest debug registers*/ > struct kvmppc_booke_debug_reg dbg_reg; > + /* shadow debug registers */ > + struct kvmppc_booke_debug_reg shadow_dbg_reg; > + /* host debug registers*/ > + struct kvmppc_booke_debug_reg host_dbg_reg; > + /* > + * Flag indicating that debug registers are used by guest > + * and requires save restore. > + */ > + bool debug_save_restore; > #endif > gpa_t paddr_accessed; > gva_t vaddr_accessed; > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > index 15f9a00..d7ce449 100644 > --- a/arch/powerpc/include/uapi/asm/kvm.h > +++ b/arch/powerpc/include/uapi/asm/kvm.h > @@ -25,6 +25,7 @@ > /* Select powerpc specific features in <linux/kvm.h> */ > #define __KVM_HAVE_SPAPR_TCE > #define __KVM_HAVE_PPC_SMT > +#define __KVM_HAVE_GUEST_DEBUG > > struct kvm_regs { > __u64 pc; > @@ -267,7 +268,24 @@ struct kvm_fpu { > __u64 fpr[32]; > }; > > +/* > + * Defines for h/w breakpoint, watchpoint (read, write or both) and > + * software breakpoint. > + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status" > + * for KVM_DEBUG_EXIT. > + */ > +#define KVMPPC_DEBUG_NONE 0x0 > +#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) > +#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) > +#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) > struct kvm_debug_exit_arch { > + __u64 address; > + /* > + * exiting to userspace because of h/w breakpoint, watchpoint > + * (read, write or both) and software breakpoint. > + */ > + __u32 status; > + __u32 reserved; > }; > > /* for KVM_SET_GUEST_DEBUG */ > @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch { > * Type denotes h/w breakpoint, read watchpoint, write > * watchpoint or watchpoint (both read and write). > */ > -#define KVMPPC_DEBUG_NOTYPE 0x0 > -#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) > -#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) > -#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) > __u32 type; > __u32 reserved; > } bp[16]; > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c > index 1de93a8..bf20056 100644 > --- a/arch/powerpc/kvm/booke.c > +++ b/arch/powerpc/kvm/booke.c > @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) > #endif > } > > +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) > +{ > + /* Synchronize guest's desire to get debug interrupts into shadow MSR */ > +#ifndef CONFIG_KVM_BOOKE_HV > + vcpu->arch.shadow_msr &= ~MSR_DE; > + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; > +#endif > + > + /* Force enable debug interrupts when user space wants to debug */ > + if (vcpu->guest_debug) { > +#ifdef CONFIG_KVM_BOOKE_HV > + /* > + * Since there is no shadow MSR, sync MSR_DE into the guest > + * visible MSR. Do not allow guest to change MSR[DE]. > + */ > + vcpu->arch.shared->msr |= MSR_DE; > + mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP); This mtspr should really just be a bit or in shadow_mspr when guest_debug gets enabled. It should automatically get synchronized as soon as the next vpcu_load() happens. Also, what happens when user space disables guest_debug? > +#else > + vcpu->arch.shadow_msr |= MSR_DE; > + vcpu->arch.shared->msr &= ~MSR_DE; > +#endif > + } > +} > + > /* > * Helper function for "full" MSR writes. No need to call this if only > * EE/CE/ME/DE/RI are changing. > @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) > kvmppc_mmu_msr_notify(vcpu, old_msr); > kvmppc_vcpu_sync_spe(vcpu); > kvmppc_vcpu_sync_fpu(vcpu); > + kvmppc_vcpu_sync_debug(vcpu); > } > > static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, > @@ -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) > run->exit_reason = KVM_EXIT_DCR; > return RESUME_HOST; > > + case EMULATE_EXIT_USER: > + return RESUME_HOST; This should get folded into the previous patch or be a separate one. > + > case EMULATE_FAIL: > printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", > __func__, vcpu->arch.pc, vcpu->arch.last_inst); > @@ -751,6 +779,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) > } > } > > +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) > +{ > + u32 dbsr = mfspr(SPRN_DBSR); > + mtspr(SPRN_DBSR, dbsr); This definitely deserves a comment :). Also, are we non-preemptible here? > + > + run->debug.arch.status = 0; > + run->debug.arch.address = vcpu->arch.pc; > + > + if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { > + run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; > + } else { > + if (dbsr & (DBSR_DAC1W | DBSR_DAC2W)) > + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE; > + else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R)) > + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ; > + if (dbsr & (DBSR_DAC1R | DBSR_DAC1W)) > + run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0]; > + else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W)) > + run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1]; > + } > + > + return RESUME_HOST; Shouldn't this check for guest_debug and only go to user space if it asked for debugging? Can't you just leave it at the old code for !guest_debug? > +} > + > static void kvmppc_fill_pt_regs(struct pt_regs *regs) > { > ulong r1, ip, msr, lr; > @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, > } > > case BOOKE_INTERRUPT_DEBUG: { > - u32 dbsr; > - > - vcpu->arch.pc = mfspr(SPRN_CSRR0); > - > - /* clear IAC events in DBSR register */ > - dbsr = mfspr(SPRN_DBSR); > - dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; > - mtspr(SPRN_DBSR, dbsr); > - > - run->exit_reason = KVM_EXIT_DEBUG; > + r = kvmppc_handle_debug(run, vcpu); > + if (r == RESUME_HOST) > + run->exit_reason = KVM_EXIT_DEBUG; > kvmppc_account_exit(vcpu, DEBUG_EXITS); > - r = RESUME_HOST; > break; > } > > @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) > kvmppc_set_msr(vcpu, 0); > > #ifndef CONFIG_KVM_BOOKE_HV > - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; > + vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; > vcpu->arch.shadow_pid = 1; > vcpu->arch.shared->msr = 0; > #endif > @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) > return r; > } > > -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, > - struct kvm_guest_debug *dbg) > -{ > - return -EINVAL; > -} > - > int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) > { > return -ENOTSUPP; > @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data) > kvmppc_set_tsr_bits(vcpu, TSR_DIS); > } > > +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) > +{ > + if (!vcpu->arch.debug_save_restore) > + return; > + > + /* Restore Host Context. Disable all debug events First */ > + mtspr(SPRN_DBCR0, 0x0); > + /* Disable pending debug event by Clearing DBSR */ > + mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); > + > + mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1); > + mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2); > +#ifdef CONFIG_KVM_E500MC > + mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4); > +#endif > + mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]); > + mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]); > +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 > + mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]); > + mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]); > +#endif > + mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]); > + mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]); > + > + /* Enable debug events after all other debug registers restored */ > + mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0); How does the normal debug register switching code work in Linux? Can't we just reuse that? Or rely on it to restore working state when another process gets scheduled in? > + > + /* Host debug register are restored */ > + vcpu->arch.debug_save_restore = false; > +} > + > +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu *vcpu) > +{ > + /* > + * Check whether guest still need debug resource, if not then there > + * is no need to resotre guest context. restore > + */ > + if (!vcpu->arch.shadow_dbg_reg.dbcr0) > + return; Are we guaranteed that debugging is disabled here? We don't want to get debug exceptions that were meant for some other process. > + > + /* > + * Save Host debug register only after restore. If host debug > + * registers are saved and not restored then do not save again. > + */ > + if (!vcpu->arch.debug_save_restore) { > + /* Save Host context */ > + vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0); > + vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1); > + vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2); > +#ifdef CONFIG_KVM_E500MC > + vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4); > +#endif > + vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1); > + vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2); > +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 > + vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3); > + vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4); > +#endif > + vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1); > + vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2); > + vcpu->arch.debug_save_restore = true; > + } > + > + /* Restore Guest Context. Disable all debug events First */ > + mtspr(SPRN_DBCR0, 0x0); > + /* Clear h/w DBSR */ > + mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); > + > + mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1); > + mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); > +#ifdef CONFIG_KVM_E500MC > + mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); > +#endif > + mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]); > + mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]); > +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 > + mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]); > + mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]); > +#endif > + mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]); > + mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]); > + > + /* Enable debug events after other debug registers restored */ > + mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); > +} > + > +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, > + struct kvm_guest_debug *dbg) > +{ > + struct kvmppc_booke_debug_reg *dbg_reg; > + int n, b = 0, w = 0; > + const u32 bp_code[] = { > + DBCR0_IAC1 | DBCR0_IDM, > + DBCR0_IAC2 | DBCR0_IDM, > + DBCR0_IAC3 | DBCR0_IDM, > + DBCR0_IAC4 | DBCR0_IDM > + }; > + const u32 wp_code[] = { > + DBCR0_DAC1W | DBCR0_IDM, > + DBCR0_DAC2W | DBCR0_IDM, > + DBCR0_DAC1R | DBCR0_IDM, > + DBCR0_DAC2R | DBCR0_IDM > + }; > + > + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { > + /* Clear All debug events */ > + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; > + vcpu->guest_debug = 0; Ah, this is where we disable guest_debug. This needs to enable guest_debug for the guest again, so you need to remove the DE bit from shadow_msrp here. > + return 0; > + } > + > + vcpu->guest_debug = dbg->control; > + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; > + /* Set DBCR0_EDM in guest visible DBCR0 register. */ > + vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; > + > + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) > + vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; > + > + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { > + /* Code below handles only HW breakpoints */ > + kvmppc_booke_vcpu_load_debug_regs(vcpu); > + return 0; > + } > + > + dbg_reg = &(vcpu->arch.shadow_dbg_reg); > + > + /* > + * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events > + * to occur when MSR.PR is set. > + * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we > + * should clear DBCR1 and DBCR2. > + */ > +#ifdef CONFIG_KVM_BOOKE_HV > + dbg_reg->dbcr1 = 0; > + dbg_reg->dbcr2 = 0; Does that mean we can't debug guest user space? > +#else > + dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | > + DBCR1_IAC4US; > + dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; > +#endif > + > + for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { > + u32 type = dbg->arch.bp[n].type; > + > + if (!type) > + break; Not continue? > + > + if (type & (KVMPPC_DEBUG_WATCH_READ | > + KVMPPC_DEBUG_WATCH_WRITE)) { > + if (w >= KVMPPC_BOOKE_DAC_NUM) > + continue; This should result in an error, no? > + > + if (type & KVMPPC_DEBUG_WATCH_READ) > + dbg_reg->dbcr0 |= wp_code[w + 2]; > + if (type & KVMPPC_DEBUG_WATCH_WRITE) > + dbg_reg->dbcr0 |= wp_code[w]; > + > + dbg_reg->dac[w] = dbg->arch.bp[n].addr; > + w++; > + } else if (type & KVMPPC_DEBUG_BREAKPOINT) { > + if (b >= KVMPPC_BOOKE_IAC_NUM) > + continue; Same here. > + > + dbg_reg->dbcr0 |= bp_code[b]; > + dbg_reg->iac[b] = dbg->arch.bp[n].addr; > + b++; > + } > + } > + > + kvmppc_booke_vcpu_load_debug_regs(vcpu); > + return 0; > +} > + > void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > vcpu->cpu = smp_processor_id(); > current->thread.kvm_vcpu = vcpu; > + > + kvmppc_booke_vcpu_load_debug_regs(vcpu); > } > > void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) > { > current->thread.kvm_vcpu = NULL; > vcpu->cpu = -1; > + > + kvmppc_booke_vcpu_put_debug_regs(vcpu); > } > > int __init kvmppc_booke_init(void) > diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c > index e78f353..cefdd38 100644 > --- a/arch/powerpc/kvm/e500_emulate.c > +++ b/arch/powerpc/kvm/e500_emulate.c > @@ -26,6 +26,7 @@ > #define XOP_TLBRE 946 > #define XOP_TLBWE 978 > #define XOP_TLBILX 18 > +#define XOP_EHPRIV 270 > > #ifdef CONFIG_KVM_E500MC > static int dbell2prio(ulong param) > @@ -130,6 +131,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, > emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); > break; > > + case XOP_EHPRIV: > + run->exit_reason = KVM_EXIT_DEBUG; > + run->debug.arch.address = vcpu->arch.pc; > + run->debug.arch.status = 0; > + kvmppc_account_exit(vcpu, DEBUG_EXITS); > + emulated = EMULATE_EXIT_USER; > + *advance = 0; > + break; > + Looks good. I would appreciate if you could split this patch up a bit more. Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html