> -----Original Message----- > From: Alexander Graf [mailto:agraf@xxxxxxx] > Sent: Tuesday, August 07, 2012 3:38 PM > To: Bhushan Bharat-R65777 > Cc: kvm-ppc@xxxxxxxxxxxxxxx; kvm@xxxxxxxxxxxxxxx; Bhushan Bharat-R65777 > Subject: Re: [PATCH 1/2] KVM: PPC: booke: Add watchdog emulation > > > On 03.08.2012, at 07:30, Bharat Bhushan wrote: > > > This patch adds the watchdog emulation in KVM. The watchdog > > emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl. > > The kernel timer are used for watchdog emulation and emulates > > h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU > > if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how > > it is configured. > > > > Signed-off-by: Liu Yu <yu.liu@xxxxxxxxxxxxx> > > Signed-off-by: Scott Wood <scottwood@xxxxxxxxxxxxx> > > [bharat.bhushan@xxxxxxxxxxxxx: reworked patch] > > Signed-off-by: Bharat Bhushan <bharat.bhushan@xxxxxxxxxxxxx> > > --- > > v7: > > - kvmppc_core_dequeue_watchdog() and kvmppc_core_enque_watchdog() > > are made staic > > - Clear the KVM_REQ_WATCHDOG request when TSR_ENW | ENW_WIS cleared > > rather than checking these bits on handling the request. > > Below changes (pasted for quick understanding) > > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c > > index 7eedaeb..a0f5922 100644 > > --- a/arch/powerpc/kvm/booke.c > > +++ b/arch/powerpc/kvm/booke.c > > @@ -629,8 +629,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct > kvm_vcpu *vcpu) > > goto out; > > } > > > > - if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) && > > - (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) { > > + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { > > kvm_run->exit_reason = KVM_EXIT_WATCHDOG; > > ret = 0; > > goto out; > > @@ -1098,8 +1097,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct > kvm_vcpu *vcpu, > > } > > } > > > > - if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) && > > - (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) { > > + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { > > run->exit_reason = KVM_EXIT_WATCHDOG; > > r = RESUME_HOST | (r & RESUME_FLAG_NV); > > } > > @@ -1251,8 +1249,10 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, > > > > vcpu->arch.tsr = sregs->u.e.tsr; > > > > - if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) > > + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) { > > + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); > > arm_next_watchdog(vcpu); > > + } > > > > update_timer_ints(vcpu); > > } > > @@ -1434,8 +1434,10 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 > tsr_bits) > > * We may have stopped the watchdog due to > > * being stuck on final expiration. > > */ > > - if (tsr_bits & (TSR_ENW | TSR_WIS)) > > + if (tsr_bits & (TSR_ENW | TSR_WIS)) { > > + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); > > arm_next_watchdog(vcpu); > > + } > > ---------- > > > > v6: > > - Added kvmppc_subarch_vcpu_unit/uninit() for subarch specific initialization > > - Using spin_lock_irqsave() > > - Using del_timer_sync(). > > > > v5: > > - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG. > > - Moved watchdog_next_timeout() in lock. > > > > arch/powerpc/include/asm/kvm_host.h | 3 + > > arch/powerpc/include/asm/kvm_ppc.h | 2 + > > arch/powerpc/include/asm/reg_booke.h | 7 ++ > > arch/powerpc/kvm/book3s.c | 9 ++ > > arch/powerpc/kvm/booke.c | 158 ++++++++++++++++++++++++++++++++++ > > arch/powerpc/kvm/booke_emulate.c | 8 ++ > > arch/powerpc/kvm/powerpc.c | 14 +++- > > include/linux/kvm.h | 2 + > > include/linux/kvm_host.h | 1 + > > 9 files changed, 202 insertions(+), 2 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/kvm_host.h > b/arch/powerpc/include/asm/kvm_host.h > > index 572ad01..873ec11 100644 > > --- a/arch/powerpc/include/asm/kvm_host.h > > +++ b/arch/powerpc/include/asm/kvm_host.h > > @@ -469,6 +469,8 @@ struct kvm_vcpu_arch { > > ulong fault_esr; > > ulong queued_dear; > > ulong queued_esr; > > + spinlock_t wdt_lock; > > + struct timer_list wdt_timer; > > u32 tlbcfg[4]; > > u32 mmucfg; > > u32 epr; > > @@ -484,6 +486,7 @@ struct kvm_vcpu_arch { > > u8 osi_needed; > > u8 osi_enabled; > > u8 papr_enabled; > > + u8 watchdog_enabled; > > u8 sane; > > u8 cpu_type; > > u8 hcall_needed; > > diff --git a/arch/powerpc/include/asm/kvm_ppc.h > b/arch/powerpc/include/asm/kvm_ppc.h > > index 0124937..1438a5e 100644 > > --- a/arch/powerpc/include/asm/kvm_ppc.h > > +++ b/arch/powerpc/include/asm/kvm_ppc.h > > @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); > > extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); > > extern void kvmppc_decrementer_func(unsigned long data); > > extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); > > +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); > > +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); > > > > /* Core-specific hooks */ > > > > diff --git a/arch/powerpc/include/asm/reg_booke.h > b/arch/powerpc/include/asm/reg_booke.h > > index 2d916c4..e07e6af 100644 > > --- a/arch/powerpc/include/asm/reg_booke.h > > +++ b/arch/powerpc/include/asm/reg_booke.h > > @@ -539,6 +539,13 @@ > > #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ > > #define TCR_ARE 0x00400000 /* Auto Reload Enable */ > > > > +#ifdef CONFIG_E500 > > +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \ > > + (((tcr) & 0x1E0000) >> 15)) > > +#else > > +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30) > > +#endif > > + > > /* Bit definitions for the TSR. */ > > #define TSR_ENW 0x80000000 /* Enable Next Watchdog */ > > #define TSR_WIS 0x40000000 /* WDT Interrupt Status */ > > diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c > > index 3f2a836..e946665 100644 > > --- a/arch/powerpc/kvm/book3s.c > > +++ b/arch/powerpc/kvm/book3s.c > > @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) > > return 0; > > } > > > > +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) > > +{ > > + return 0; > > +} > > + > > +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) > > +{ > > +} > > + > > int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > > { > > int i; > > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c > > index d25a097..a0f5922 100644 > > --- a/arch/powerpc/kvm/booke.c > > +++ b/arch/powerpc/kvm/booke.c > > @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, > > clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); > > } > > > > +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu) > > +{ > > + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG); > > +} > > + > > +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) > > +{ > > + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); > > +} > > + > > static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) > > { > > #ifdef CONFIG_KVM_BOOKE_HV > > @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu > *vcpu, > > msr_mask = MSR_CE | MSR_ME | MSR_DE; > > int_class = INT_CLASS_NONCRIT; > > break; > > + case BOOKE_IRQPRIO_WATCHDOG: > > case BOOKE_IRQPRIO_CRITICAL: > > case BOOKE_IRQPRIO_DBELL_CRIT: > > allowed = vcpu->arch.shared->msr & MSR_CE; > > @@ -404,12 +415,114 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu > *vcpu, > > return allowed; > > } > > > > +/* > > + * Return the number of jiffies until the next timeout. If the timeout is > > + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA > > + * because the larger value can break the timer APIs. > > + */ > > +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) > > +{ > > + u64 tb, wdt_tb, wdt_ticks = 0; > > + u64 nr_jiffies = 0; > > + u32 period = TCR_GET_WP(vcpu->arch.tcr); > > + > > + wdt_tb = 1ULL << (63 - period); > > + tb = get_tb(); > > + /* > > + * The watchdog timeout will hapeen when TB bit corresponding > > + * to watchdog will toggle from 0 to 1. > > + */ > > + if (tb & wdt_tb) > > + wdt_ticks = wdt_tb; > > + > > + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1)); > > + > > + /* Convert timebase ticks to jiffies */ > > + nr_jiffies = wdt_ticks; > > + > > + if (do_div(nr_jiffies, tb_ticks_per_jiffy)) > > + nr_jiffies++; > > + > > + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA); > > +} > > + > > +static void arm_next_watchdog(struct kvm_vcpu *vcpu) > > +{ > > + unsigned long nr_jiffies; > > + unsigned long flags; > > + > > + spin_lock_irqsave(&vcpu->arch.wdt_lock, flags); > > + nr_jiffies = watchdog_next_timeout(vcpu); > > + /* > > + * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA > > + * then do not run the watchdog timer as this can break timer APIs. > > + */ > > + if (nr_jiffies < NEXT_TIMER_MAX_DELTA) > > + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies); > > + else > > + del_timer(&vcpu->arch.wdt_timer); > > + spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); > > +} > > + > > +void kvmppc_watchdog_func(unsigned long data) > > +{ > > + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; > > + u32 tsr, new_tsr; > > + int final; > > + > > + do { > > + new_tsr = tsr = vcpu->arch.tsr; > > + final = 0; > > + > > + /* Time out event */ > > + if (tsr & TSR_ENW) { > > + if (tsr & TSR_WIS) > > + final = 1; > > + else > > + new_tsr = tsr | TSR_WIS; > > + } else { > > + new_tsr = tsr | TSR_ENW; > > + } > > + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr); > > + > > + if (new_tsr & TSR_WIS) { > > + smp_wmb(); > > + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); > > + kvm_vcpu_kick(vcpu); > > + } > > + > > + /* > > + * If this is final watchdog expiry and some action is required > > + * then exit to userspace. > > + */ > > + if (final && (vcpu->arch.tcr & TCR_WRC_MASK) && > > + vcpu->arch.watchdog_enabled) { > > + smp_wmb(); > > + kvm_make_request(KVM_REQ_WATCHDOG, vcpu); > > + kvm_vcpu_kick(vcpu); > > + } > > + > > + /* > > + * Stop running the watchdog timer after final expiration to > > + * prevent the host from being flooded with timers if the > > + * guest sets a short period. > > + * Timers will resume when TSR/TCR is updated next time. > > + */ > > + if (!final) > > + arm_next_watchdog(vcpu); > > +} > > + > > static void update_timer_ints(struct kvm_vcpu *vcpu) > > { > > if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) > > kvmppc_core_queue_dec(vcpu); > > else > > kvmppc_core_dequeue_dec(vcpu); > > + > > + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS)) > > + kvmppc_core_queue_watchdog(vcpu); > > + else > > + kvmppc_core_dequeue_watchdog(vcpu); > > } > > > > static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) > > @@ -516,6 +629,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct > kvm_vcpu *vcpu) > > goto out; > > } > > > > + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { > > + kvm_run->exit_reason = KVM_EXIT_WATCHDOG; > > + ret = 0; > > + goto out; > > + } > > + > > kvm_guest_enter(); > > > > #ifdef CONFIG_PPC_FPU > > @@ -978,6 +1097,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct > kvm_vcpu *vcpu, > > } > > } > > > > + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { > > + run->exit_reason = KVM_EXIT_WATCHDOG; > > + r = RESUME_HOST | (r & RESUME_FLAG_NV); > > + } > > + > > return r; > > } > > > > @@ -1011,6 +1135,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) > > return r; > > } > > > > +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) > > +{ > > + /* setup watchdog timer once */ > > + spin_lock_init(&vcpu->arch.wdt_lock); > > + setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, > > + (unsigned long)vcpu); > > + > > + return 0; > > +} > > + > > +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) > > +{ > > + del_timer_sync(&vcpu->arch.wdt_timer); > > +} > > + > > int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > > { > > int i; > > @@ -1106,7 +1245,15 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, > > } > > > > if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { > > + u32 old_tsr = vcpu->arch.tsr; > > + > > vcpu->arch.tsr = sregs->u.e.tsr; > > + > > + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) { > > + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); > > + arm_next_watchdog(vcpu); > > + } > > + > > update_timer_ints(vcpu); > > } > > > > @@ -1267,6 +1414,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, > > void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) > > { > > vcpu->arch.tcr = new_tcr; > > + arm_next_watchdog(vcpu); > > update_timer_ints(vcpu); > > } > > > > @@ -1281,6 +1429,16 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 > tsr_bits) > > void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) > > { > > clear_bits(tsr_bits, &vcpu->arch.tsr); > > + > > + /* > > + * We may have stopped the watchdog due to > > + * being stuck on final expiration. > > + */ > > + if (tsr_bits & (TSR_ENW | TSR_WIS)) { > > + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); > > + arm_next_watchdog(vcpu); > > Just a thought: Would it make sense to fold the clear_bit into > arm_next_watchdog()? We want to clear_bit(REQ_WATCHDOG) only when ENW or WIS are cleared. Arm_next_watchdog() is called from some more places where these bit are not cleared. We can do in arm_next_watchdog() in something like this: if (vcpu->arch.tsr & (TSR_ENW | TSR_WIS) != TSR_ENW | TSR_WIS) clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); Thanks -Bharat > > Otherwise looks good to me :) > > > Alex > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html