> -----Original Message----- > From: kvm-ppc-owner@xxxxxxxxxxxxxxx > [mailto:kvm-ppc-owner@xxxxxxxxxxxxxxx] On Behalf Of Alexander Graf > Sent: Wednesday, September 07, 2011 3:06 AM > To: Liu Yu-B13201 > Cc: Wood Scott-B07421; kvm-ppc@xxxxxxxxxxxxxxx > Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer > register emulation > > > > Am 06.09.2011 um 10:04 schrieb Alexander Graf <agraf@xxxxxxx>: > > > > > On 06.09.2011, at 05:17, Liu Yu-B13201 wrote: > > > >> > >> > >>> -----Original Message----- > >>> From: kvm-ppc-owner@xxxxxxxxxxxxxxx > >>> [mailto:kvm-ppc-owner@xxxxxxxxxxxxxxx] On Behalf Of Alexander Graf > >>> Sent: Tuesday, September 06, 2011 6:45 AM > >>> To: Wood Scott-B07421 > >>> Cc: kvm-ppc@xxxxxxxxxxxxxxx > >>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer > >>> register emulation > >>> > >>> > >>> On 27.08.2011, at 01:31, Scott Wood wrote: > >>> > >>>> From: Liu Yu <yu.liu@xxxxxxxxxxxxx> > >>>> > >>>> Decrementers are now properly driven by TCR/TSR, and the guest > >>>> has full read/write access to these registers. > >>>> > >>>> The decrementer keeps ticking (and setting the TSR bit) > >>> regardless of > >>>> whether the interrupts are enabled with TCR. > >>>> > >>>> The decrementer stops at zero, rather than going negative. > >>>> > >>>> Signed-off-by: Liu Yu <yu.liu@xxxxxxxxxxxxx> > >>>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and > >>> dec stop-at-zero] > >>>> Signed-off-by: Scott Wood <scottwood@xxxxxxxxxxxxx> > >>>> --- > >>>> arch/powerpc/include/asm/kvm_host.h | 2 +- > >>>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++ > >>>> arch/powerpc/kvm/book3s.c | 8 ++++ > >>>> arch/powerpc/kvm/booke.c | 80 > >>> +++++++++++++++++++++++++++-------- > >>>> arch/powerpc/kvm/booke.h | 4 ++ > >>>> arch/powerpc/kvm/booke_emulate.c | 11 ++++- > >>>> arch/powerpc/kvm/emulate.c | 45 ++++++++----------- > >>>> arch/powerpc/kvm/powerpc.c | 20 +-------- > >>>> 8 files changed, 114 insertions(+), 67 deletions(-) > >>>> > >>>> diff --git a/arch/powerpc/include/asm/kvm_host.h > >>> b/arch/powerpc/include/asm/kvm_host.h > >>>> index 3305af4..ea08c79 100644 > >>>> --- a/arch/powerpc/include/asm/kvm_host.h > >>>> +++ b/arch/powerpc/include/asm/kvm_host.h > >>>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch { > >>>> u32 tbl; > >>>> u32 tbu; > >>>> u32 tcr; > >>>> - u32 tsr; > >>>> + ulong tsr; /* we need to perform set/clr_bits() which > >>> requires ulong */ > >>>> u32 ivor[64]; > >>>> ulong ivpr; > >>>> u32 pvr; > >>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h > >>> b/arch/powerpc/include/asm/kvm_ppc.h > >>>> index bdaa6c8..ddaa615 100644 > >>>> --- a/arch/powerpc/include/asm/kvm_ppc.h > >>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h > >>>> @@ -66,6 +66,7 @@ extern int > >>> kvmppc_emulate_instruction(struct kvm_run *run, > >>>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct > >>> kvm_vcpu *vcpu); > >>>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); > >>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); > >>>> +extern void kvmppc_decrementer_func(unsigned long data); > >>>> > >>>> /* Core-specific hooks */ > >>>> > >>>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct > >>> kvm_vcpu *vcpu, > >>>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, > >>>> struct kvm_dirty_tlb *cfg); > >>>> > >>>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu) > >>>> +{ > >>>> + if (waitqueue_active(&vcpu->wq)) { > >>>> + wake_up_interruptible(&vcpu->wq); > >>>> + vcpu->stat.halt_wakeup++; > >>>> + } else if (vcpu->cpu != -1) { > >>>> + smp_send_reschedule(vcpu->cpu); > >>>> + } > >>>> +} > >>>> + > >>>> #endif /* __POWERPC_KVM_PPC_H__ */ > >>>> diff --git a/arch/powerpc/kvm/book3s.c > b/arch/powerpc/kvm/book3s.c > >>>> index f68a34d..b057856 100644 > >>>> --- a/arch/powerpc/kvm/book3s.c > >>>> +++ b/arch/powerpc/kvm/book3s.c > >>>> @@ -514,3 +514,11 @@ out: > >>>> mutex_unlock(&kvm->slots_lock); > >>>> return r; > >>>> } > >>>> + > >>>> +void kvmppc_decrementer_func(unsigned long data) > >>>> +{ > >>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; > >>>> + > >>>> + kvmppc_core_queue_dec(vcpu); > >>>> + kvmppc_wakeup_vcpu(vcpu); > >>>> +} > >>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c > >>>> index 0ed62c1..502f9ff 100644 > >>>> --- a/arch/powerpc/kvm/booke.c > >>>> +++ b/arch/powerpc/kvm/booke.c > >>>> @@ -205,7 +205,8 @@ void > >>> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, > >>>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, > >>>> unsigned int priority) > >>>> { > >>>> - int allowed = 0; > >>>> + int allowed = 1; > >>>> + int dequeue = 0; > >>>> ulong uninitialized_var(msr_mask); > >>>> bool update_esr = false, update_dear = false; > >>>> ulong crit_raw = vcpu->arch.shared->critical; > >>>> @@ -258,10 +259,15 @@ static int > >>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, > >>>> allowed = vcpu->arch.shared->msr & MSR_ME; > >>>> msr_mask = 0; > >>>> break; > >>>> - case BOOKE_IRQPRIO_EXTERNAL: > >>>> case BOOKE_IRQPRIO_DECREMENTER: > >>>> + if (!(vcpu->arch.tcr & TCR_DIE)) { > >>>> + allowed = 0; > >>>> + dequeue = 1; > >>>> + } > >>>> + /* fall through */ > >>>> + case BOOKE_IRQPRIO_EXTERNAL: > >>>> case BOOKE_IRQPRIO_FIT: > >>>> - allowed = vcpu->arch.shared->msr & MSR_EE; > >>>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE; > >>>> allowed = allowed && !crit; > >>>> msr_mask = MSR_CE|MSR_ME|MSR_DE; > >>>> break; > >>>> @@ -269,6 +275,8 @@ static int > >>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, > >>>> allowed = vcpu->arch.shared->msr & MSR_DE; > >>>> msr_mask = MSR_ME; > >>>> break; > >>>> + default: > >>>> + allowed = 0; > >>>> } > >>>> > >>>> if (allowed) { > >>>> @@ -283,6 +291,9 @@ static int > >>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, > >>>> > >>>> if (!keep_irq) > >>>> clear_bit(priority, > >>> &vcpu->arch.pending_exceptions); > >>>> + } else if (dequeue) { > >>>> + /* Should only happen due to races with masking */ > >>>> + clear_bit(priority, &vcpu->arch.pending_exceptions); > >>>> } > >>>> > >>>> return allowed; > >>>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct > >>> kvm_vcpu *vcpu, > >>>> vcpu->arch.shared->esr = sregs->u.e.esr; > >>>> vcpu->arch.shared->dar = sregs->u.e.dear; > >>>> vcpu->arch.vrsave = sregs->u.e.vrsave; > >>>> - vcpu->arch.tcr = sregs->u.e.tcr; > >>>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr); > >>>> > >>>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) > >>>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) { > >>>> vcpu->arch.dec = sregs->u.e.dec; > >>>> - > >>>> - kvmppc_emulate_dec(vcpu); > >>>> + kvmppc_emulate_dec(vcpu); > >>>> + } > >>>> > >>>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { > >>>> - /* > >>>> - * FIXME: existing KVM timer handling is incomplete. > >>>> - * TSR cannot be read by the guest, and its value in > >>>> - * vcpu->arch is always zero. For now, just handle > >>>> - * the case where the caller is trying to inject a > >>>> - * decrementer interrupt. > >>>> - */ > >>>> - > >>>> - if ((sregs->u.e.tsr & TSR_DIS) && > >>>> - (vcpu->arch.tcr & TCR_DIE)) > >>>> - kvmppc_core_queue_dec(vcpu); > >>>> + kvmppc_clr_tsr_bits(vcpu, ~0); > >>>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr); > >>>> } > >>>> > >>>> return 0; > >>>> @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) > >>>> { > >>>> } > >>>> > >>>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) > >>>> +{ > >>>> + vcpu->arch.tcr = new_tcr; > >>>> + smp_wmb(); > >>>> + > >>>> + /* > >>>> + * Since TCR changed, we need to check > >>>> + * if blocked interrupts are deliverable. > >>>> + */ > >>>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) { > >>>> + kvmppc_core_queue_dec(vcpu); > >>>> + kvmppc_wakeup_vcpu(vcpu); > >>>> + } > >>>> +} > >>>> + > >>>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) > >>>> +{ > >>>> + set_bits(tsr_bits, &vcpu->arch.tsr); > >>> > >>> I'm still missing the point why you need set_bits/clear_bits. > >>> Can't you just use |= and &= ~ like everyone else? :) > >>> > >> > >> Because set_bits/clear_bits is atomic. > >> set/clr_tsr_bits() may be called in different threads so > that on different cores. > >> We need to make sure modification to be atomic to prevent > missing changes. > > > > Ah, so you can set the TSR bit in the decrementer timer > callback? Hrm. Please add a comment there saying so. All the > other cases should be under vcpu_load() scope. If they're > not, we have bugs in our code :). So they shouldn't need > atomic operations (except for now they do because there's one > user that doesn't do vcpu_load). > > Thinking about this for a bit more, shouldn't we be able to > share the TSR with the guest so that it can clear the DEC > interrupt without exiting the guest? Then we can't do ulong > because that would make the pv abi 32/64 specific. > > Overall, I'm also not very sure it's too useful to set TSR > from non-vcpu context. After all, in most cases the guest > should have interrupts enabled at that point and probably > wants to receive the interrupt in that moment, not some > seconds later when the next intercept occurs. So we still > want to kick the vcpu thread on dec interrupts. And then we > don't need anything to be atomic. > We don't only set TSR for dec interrupt case. In future, we would send out watchdog stuff which also set/clear TSR in non-vcpu context. The watchdog bit of TSR is not only related to watchdog interrupt but maintains a simple statemachine. Not set those states may confuse guest OS. Thanks, Yu -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html