RE: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support

Bhushan Bharat-R65777 <R65777@xxxxxxxxxxxxx> · Thu, 4 Oct 2012 15:19:14 +0000

> >>>>> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
> >>>>> *vcpu)
> >>>>> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> >>>>> +			  int exit_nr)
> >>>>> {
> >>>>> 	enum emulation_result er;
> >>>>>
> >>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
> >>>>> +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
> >>>>
> >>>> This belongs into the normal emulation code path, behind the same
> >>>> switch() that everything else goes through.
> >>>
> >>> I am not sure I understood correctly. Below is the reason why I
> >>> placed this
> >> code here.
> >>> Instruction where software breakpoint is to be set is replaced by "ehpriv"
> >> instruction. On e500v2, this is not a valid instruction can causes
> >> program interrupt. On e500mc, "ehpriv" is a valid instruction. Both
> >> the exit path calls emulation_exit(), so we have placed the code in this
> function.
> >>> Do you want this code to be moved in program interrupt exit path for
> >>> e500v2
> >> and BOOKE_INTERRUPT_HV_PRIV for e500mc?
> >>
> >> Ok, in this patch you do (basically):
> >>
> >> int emulation_exit()
> >> {
> >>    if (inst == DEBUG_INST) {
> >>        debug_stuff();
> >>        return;
> >>    }
> >>
> >>    switch (inst) {
> >>        case INST_A:
> >>            foo();
> >>        ....
> >>    }
> >> }
> >
> > Are not we doing something like this:
> > int emulation_exit()
> > {
> >     if (inst == DEBUG_INST) {
> >         debug_stuff();
> >         return;
> >     }
> >
> >     status = kvmppc_emulate_instruction()
> >     switch (status) {
> >         case FAIL:
> >             foo();
> >         case DONE:
> > 		foo1();
> >         ....
> >     }
> > }
> >
> > Do you want something like this:
> >
> > int emulation_exit()
> > {
> >
> >     status = kvmppc_emulate_instruction()
> >     switch (status) {
> >         case FAIL:
> >     		if (inst == DEBUG_INST) {
> >         		debug_stuff();
> >    		      return;
> >     		}
> >             foo();
> >
> >         case DONE:
> > 		foo1();
> >         ....
> >     }
> > }
> 
> No, I want the DEBUG_INST be handled the same as any other instruction we
> emulate.

I would like to understand what you are thinking:
What I derived is , add the instruction in kvmppc_emulate_instruction() (or its child function) which, 
1) fill the relevant information in run-> , kvmppc_account_exit(vcpu, DEBUG_EXITS); and returns EMULATION_DONE
 And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
	case EMULATION_DONE:
		if (inst == DEBUG)
			return RESUME_HOST;
     }
 Or
2) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns EMULATION_DONE;
And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
	case EMULATION_DONE:
		if (inst == DEBUG) {
			fill run-> 
			return RESUME_HOST;
		}
     }

Or
3) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns a new status type (EMULATION_DEBUG_INST)
And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
	case EMULATION_DEBUG_INST:
			fill run-> 
			return RESUME_HOST;
     }

> 
> >>
> >> what I want is:
> >>
> >> int emulation_exit()
> >> {
> >>    switch (inst) {
> >>        case INST_A:
> >>            foo(); break;
> >>        case DEBUG_INST:
> >>            debug_stuff(); break;
> >>        ....
> >>    }
> >> }
> >>
> >>>
> >>>
> >>>>
> >>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		run->debug.arch.pc = vcpu->arch.pc;
> >>>>> +		run->debug.arch.exception = exit_nr;
> >>>>> +		run->debug.arch.status = 0;
> >>>>> +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>>>> +		return RESUME_HOST;
> >>>>> +	}
> >>>>> +
> >>>>> 	er = kvmppc_emulate_instruction(run, vcpu);
> >>>>> 	switch (er) {
> >>>>> 	case EMULATE_DONE:
> >>>>> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run
> >>>>> *run, struct
> >>>> kvm_vcpu *vcpu)
> >>>>> 	default:
> >>>>> 		BUG();
> >>>>> 	}
> >>>>> +
> >>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
> >>>>> +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
> >>>>
> >>>> I don't understand how this is supposed to work. When we enable
> >>>> singlestep, why would we end up in emulation_exit()?
> >>>
> >>> When singlestep is enabled then we set DBCR0[ICMP] and the debug
> >>> handler
> >> should be able to handle this. I think you are right.
> >>>
> >>>>
> >>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		return RESUME_HOST;
> >>>>> +	}
> >>>>> +}
> >>>>> +
> >>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct
> >>>>> +kvm_vcpu
> >>>>> +*vcpu) {
> >>>>> +	u32 dbsr;
> >>>>> +
> >>>>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>>>> +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
> >>>>> +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
> >>>>> +	else
> >>>>> +		vcpu->arch.pc = mfspr(SPRN_CSRR0); #endif
> >>>>
> >>>> Why doesn't this get handled in the asm code that recovers from the
> >>>> respective exceptions?
> >>>
> >>> Yes. I will remove this.
> >>>
> >>>>
> >>>>> +	dbsr = vcpu->arch.dbsr;
> >>>>> +
> >>>>> +	run->debug.arch.pc = vcpu->arch.pc;
> >>>>> +	run->debug.arch.status = 0;
> >>>>> +	vcpu->arch.dbsr = 0;
> >>>>> +
> >>>>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> >>>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> >>>>> +	} else {
> >>>>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> >>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> >>>>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> >>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> >>>>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> >>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
> >>>>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> >>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
> >>>>> +	}
> >>>>
> >>>> Mind to explain the guest register sharing mechanism you are
> >>>> envisioning? Which registers are used by the guest? Which are used
> >>>> by the host? Who decides what gets used by whom?
> >>>
> >>> struct kvm_vcpu_arch {
> >>> 	.
> >>> 	.
> >>>
> >>>      /* guest debug registers*/
> >>>       struct kvmppc_booke_debug_reg dbg_reg;
> >>>      /* shadow debug registers */
> >>>      struct kvmppc_booke_debug_reg shadow_dbg_reg;
> >>>      /* host debug registers*/
> >>>      struct kvmppc_booke_debug_reg host_dbg_reg;
> >>>
> >>> 	.
> >>> 	.
> >>> }
> >>>
> >>> dbg_reg: contains the values written by guest.
> >>> shadow_dbg_reg: This contains the actual values to be written on
> >>> behalf of
> >> guest/qemu.
> >>> host_dbg_reg: the debug register value of host (needed for
> >>> store/retore
> >> purpose).
> >>>
> >>> According to current design both, the qemu debug stub and guest,
> >>> cannot share
> >> the debug resources. QEMU debug stub is given priority.
> >>> But host and guest can share all debug registers and host cannot
> >>> debug guest
> >> if it is using the debug resource (which probably does not look like
> >> a good case).
> >>
> >> Ok. Assume that in guest context, we only ever want guest debugging
> >> enabled. If QEMU wants to inject a QEMU breakpoint, it will fiddle
> >> with the guest debug registers itself to inject its breakpoint into them.
> >
> > I would like to understand the context you are describing.
> > QEMU would like to inject debug interrupt to guest only if a debug interrupt
> happened in guest context and QEMU is not able to handle interrupt (because qemu
> debug have not set). Right?
> 
> Yes
> 
> > In that case the only thing that needed to be updated is guest->DBSR?
> 
> I think you lost me here :)

Really :) . May be I wrote something wrong. 

> 
> >
> >>
> >> With that scheme, would we still need shadow debug registers? Or
> >> could we remove that one level of indirection?
> >>
> >>>
> >>>>
> >>>>> +
> >>>>> +	return RESUME_HOST;
> >>>>> }
> >>>>>
> >>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) @@ -843,7
> >>>>> +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> >>>> kvm_vcpu *vcpu,
> >>>>> 		break;
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_HV_PRIV:
> >>>>> -		r = emulation_exit(run, vcpu);
> >>>>> +		r = emulation_exit(run, vcpu, exit_nr);
> >>>>> 		break;
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_PROGRAM:
> >>>>> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
> >>>>> struct
> >>>> kvm_vcpu *vcpu,
> >>>>> 			break;
> >>>>> 		}
> >>>>>
> >>>>> -		r = emulation_exit(run, vcpu);
> >>>>> +		r = emulation_exit(run, vcpu, exit_nr);
> >>>>> 		break;
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_FP_UNAVAIL:
> >>>>> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run
> >>>>> *run, struct
> >>>> kvm_vcpu *vcpu,
> >>>>> 	}
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_DEBUG: {
> >>>>> -		u32 dbsr;
> >>>>> -
> >>>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> >>>>> -
> >>>>> -		/* clear IAC events in DBSR register */
> >>>>> -		dbsr = mfspr(SPRN_DBSR);
> >>>>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> >>>>> -		mtspr(SPRN_DBSR, dbsr);
> >>>>> -
> >>>>> -		run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		r = kvmppc_handle_debug(run, vcpu);
> >>>>> +		if (r == RESUME_HOST) {
> >>>>> +			run->debug.arch.exception = exit_nr;
> >>>>> +			run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		}
> >>>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>>>> -		r = RESUME_HOST;
> >>>>> 		break;
> >>>>> 	}
> >>>>>
> >>>>> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct
> >>>>> kvm_vcpu *vcpu,
> >>>> struct kvm_one_reg *reg)
> >>>>> 	return r;
> >>>>> }
> >>>>>
> >>>>> +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
> >>>>> +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
> >>>>> +
> >>>>> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>>>> 					 struct kvm_guest_debug *dbg) {
> >>>>> -	return -EINVAL;
> >>>>> +
> >>>>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> >>>>> +		/* Clear All debug events */
> >>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +		vcpu->guest_debug = 0;
> >>>>> +		return 0;
> >>>>> +	}
> >>>>> +
> >>>>> +	vcpu->guest_debug = dbg->control;
> >>>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +
> >>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> >>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> >>>>> +
> >>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> >>>>> +		struct kvmppc_booke_debug_reg *gdbgr =
> >>>>> +				&(vcpu->arch.shadow_dbg_reg);
> >>>>> +		int n, b = 0, w = 0;
> >>>>> +		const u32 bp_code[] = {
> >>>>> +			DBCR0_IAC1 | DBCR0_IDM,
> >>>>> +			DBCR0_IAC2 | DBCR0_IDM,
> >>>>> +			DBCR0_IAC3 | DBCR0_IDM,
> >>>>> +			DBCR0_IAC4 | DBCR0_IDM
> >>>>> +		};
> >>>>> +		const u32 wp_code[] = {
> >>>>> +			DBCR0_DAC1W | DBCR0_IDM,
> >>>>> +			DBCR0_DAC2W | DBCR0_IDM,
> >>>>> +			DBCR0_DAC1R | DBCR0_IDM,
> >>>>> +			DBCR0_DAC2R | DBCR0_IDM
> >>>>> +		};
> >>>>> +
> >>>>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>>>> +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
> >>>>> +				DBCR1_IAC3US | DBCR1_IAC4US;
> >>>>> +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else
> >>>>> +		gdbgr->dbcr1 = 0;
> >>>>> +		gdbgr->dbcr2 = 0;
> >>>>> +#endif
> >>>>> +
> >>>>> +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
> >>>>> +			u32 type = dbg->arch.bp[n].type;
> >>>>> +
> >>>>> +			if (!type)
> >>>>> +				break;
> >>>>> +
> >>>>> +			if (type & (KVMPPC_DEBUG_WATCH_READ |
> >>>>> +				    KVMPPC_DEBUG_WATCH_WRITE)) {
> >>>>> +				if (w < WP_NUM) {
> >>>>> +					if (type & KVMPPC_DEBUG_WATCH_READ)
> >>>>> +						gdbgr->dbcr0 |= wp_code[w + 2];
> >>>>> +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
> >>>>> +						gdbgr->dbcr0 |= wp_code[w];
> >>>>> +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
> >>>>> +					w++;
> >>>>> +				}
> >>>>> +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> >>>>> +				if (b < BP_NUM) {
> >>>>> +					gdbgr->dbcr0 |= bp_code[b];
> >>>>> +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
> >>>>> +					b++;
> >>>>> +				}
> >>>>> +			}
> >>>>> +		}
> >>>>> +	}
> >>>>> +	return 0;
> >>>>> }
> >>>>>
> >>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
> >>>>> kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S
> >>>> b/arch/powerpc/kvm/booke_interrupts.S
> >>>>> index dd9c5d4..2202936 100644
> >>>>> --- a/arch/powerpc/kvm/booke_interrupts.S
> >>>>> +++ b/arch/powerpc/kvm/booke_interrupts.S
> >>>>> @@ -39,6 +39,8 @@
> >>>>> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4) #define
> >>>>> HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
> >>>>> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
> >>>>> +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 |
> DBCR0_IAC4 | \
> >>>>> +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R |
> DBCR0_DAC2W)
> >>>>>
> >>>>> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>>>                       (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ @@ -52,6
> >>>>> +54,8 @@
> >>>>>                      (1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>>>                      (1<<BOOKE_INTERRUPT_DTLB_MISS))
> >>>>>
> >>>>> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
> >>>>> +
> >>>>> .macro __KVM_HANDLER ivor_nr scratch srr0
> >>>>> 	stw	r3, VCPU_GPR(R3)(r4)
> >>>>> 	stw	r5, VCPU_GPR(R5)(r4)
> >>>>> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
> >>>>> 	stw	r9, VCPU_FAULT_ESR(r4)
> >>>>> ..skip_esr:
> >>>>>
> >>>>> +	addi	r7, r4, VCPU_HOST_DBG
> >>>>> +	mfspr	r9, SPRN_DBCR0
> >>>>> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
> >>>>> +	andis.	r9, r9, DBCR0_AC_BITS@h
> >>>>> +	beq	skip_load_host_debug
> >>>>> +	li	r9, 0
> >>>>> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
> >>>>> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
> >>>>> +	mtspr	SPRN_DBCR1, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
> >>>>> +	mtspr	SPRN_DBCR2, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
> >>>>> +	mtspr	SPRN_IAC1, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
> >>>>> +	mtspr	SPRN_IAC2, r9
> >>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
> >>>>> +	mtspr	SPRN_IAC3, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
> >>>>> +	mtspr	SPRN_IAC4, r9
> >>>>> +#endif
> >>>>> +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
> >>>>> +	mtspr	SPRN_DAC1, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
> >>>>
> >>>> Yikes. Please move this into a struct, similar to how the MSR
> >>>> save/restore happens on x86.
> >>>>
> >>>> Also, these are 64-bit loads and stores, no?
> >>>
> >>> Ok, you mean PPC_LD/STD?
> >>
> >> Yeah :).
> >>
> >>>
> >>>>
> >>>>> +	mtspr	SPRN_DAC2, r9
> >>>>> +skip_load_host_debug:
> >>>>> +	/* Clear h/w DBSR and save current(guest) DBSR */
> >>>>> +	mfspr	r9, SPRN_DBSR
> >>>>> +	mtspr	SPRN_DBSR, r9
> >>>>> +	isync
> >>>>> +	andi.	r7, r6, NEED_DEBUG_SAVE
> >>>>> +	beq	skip_dbsr_save
> >>>>> +	/*
> >>>>> +	 * If vcpu->guest_debug flag is set then do not check for
> >>>>> +	 * shared->msr.DE as this debugging (say by QEMU) does not
> >>>>> +	 * depends on shared->msr.de. In these scanerios MSR.DE is
> >>>>> +	 * always set using shared_msr and should be handled always.
> >>>>> +	 */
> >>>>> +	lwz	r7, VCPU_GUEST_DEBUG(r4)
> >>>>> +	cmpwi	r7, 0
> >>>>> +	bne	skip_save_trap_event
> >>>>> +	PPC_LL	r3, VCPU_SHARED(r4)
> >>>>> +#ifndef CONFIG_64BIT
> >>>>> +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
> >>>>> +#else
> >>>>> +	ld	r3, (VCPU_SHARED_MSR)(r3)
> >>>>> +#endif
> >>>>
> >>>> Didn't we have 64-bit access helpers for these?
> >>>
> >>> I will use PPC_LD.
> >>>
> >>>>
> >>>> Also this shouldn't happen in the entry/exit code. We have
> >>>> shadow_msr for these purposes.
> >>>
> >>> Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).
> >>
> >> Hrm. Yeah, must've misread something here.
> >>
> >> It would be awesome if we could make the debug register save/restore
> >> a bit more relaxed though. For example during preemption.
> >
> > If we delay this save/restore then host cannot do debugging of KVM code.
> Right?
> 
> You mean using kdb? Does anyone actually do that?
> 
> But let's leave this for later as an optimization.

Do you mean that debug registers should be saved/restored in preemption? any specific reason? I think the current save/restore does not add much overhead and we can better it now only, is not it? 

Thanks
-Bharat
> 
> 
> Alex
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html