On 06.09.2011, at 05:17, Liu Yu-B13201 wrote: > > >> -----Original Message----- >> From: kvm-ppc-owner@xxxxxxxxxxxxxxx >> [mailto:kvm-ppc-owner@xxxxxxxxxxxxxxx] On Behalf Of Alexander Graf >> Sent: Tuesday, September 06, 2011 6:45 AM >> To: Wood Scott-B07421 >> Cc: kvm-ppc@xxxxxxxxxxxxxxx >> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer >> register emulation >> >> >> On 27.08.2011, at 01:31, Scott Wood wrote: >> >>> From: Liu Yu <yu.liu@xxxxxxxxxxxxx> >>> >>> Decrementers are now properly driven by TCR/TSR, and the guest >>> has full read/write access to these registers. >>> >>> The decrementer keeps ticking (and setting the TSR bit) >> regardless of >>> whether the interrupts are enabled with TCR. >>> >>> The decrementer stops at zero, rather than going negative. >>> >>> Signed-off-by: Liu Yu <yu.liu@xxxxxxxxxxxxx> >>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and >> dec stop-at-zero] >>> Signed-off-by: Scott Wood <scottwood@xxxxxxxxxxxxx> >>> --- >>> arch/powerpc/include/asm/kvm_host.h | 2 +- >>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++ >>> arch/powerpc/kvm/book3s.c | 8 ++++ >>> arch/powerpc/kvm/booke.c | 80 >> +++++++++++++++++++++++++++-------- >>> arch/powerpc/kvm/booke.h | 4 ++ >>> arch/powerpc/kvm/booke_emulate.c | 11 ++++- >>> arch/powerpc/kvm/emulate.c | 45 ++++++++----------- >>> arch/powerpc/kvm/powerpc.c | 20 +-------- >>> 8 files changed, 114 insertions(+), 67 deletions(-) >>> >>> diff --git a/arch/powerpc/include/asm/kvm_host.h >> b/arch/powerpc/include/asm/kvm_host.h >>> index 3305af4..ea08c79 100644 >>> --- a/arch/powerpc/include/asm/kvm_host.h >>> +++ b/arch/powerpc/include/asm/kvm_host.h >>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch { >>> u32 tbl; >>> u32 tbu; >>> u32 tcr; >>> - u32 tsr; >>> + ulong tsr; /* we need to perform set/clr_bits() which >> requires ulong */ >>> u32 ivor[64]; >>> ulong ivpr; >>> u32 pvr; >>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h >> b/arch/powerpc/include/asm/kvm_ppc.h >>> index bdaa6c8..ddaa615 100644 >>> --- a/arch/powerpc/include/asm/kvm_ppc.h >>> +++ b/arch/powerpc/include/asm/kvm_ppc.h >>> @@ -66,6 +66,7 @@ extern int >> kvmppc_emulate_instruction(struct kvm_run *run, >>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct >> kvm_vcpu *vcpu); >>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); >>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); >>> +extern void kvmppc_decrementer_func(unsigned long data); >>> >>> /* Core-specific hooks */ >>> >>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct >> kvm_vcpu *vcpu, >>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, >>> struct kvm_dirty_tlb *cfg); >>> >>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu) >>> +{ >>> + if (waitqueue_active(&vcpu->wq)) { >>> + wake_up_interruptible(&vcpu->wq); >>> + vcpu->stat.halt_wakeup++; >>> + } else if (vcpu->cpu != -1) { >>> + smp_send_reschedule(vcpu->cpu); >>> + } >>> +} >>> + >>> #endif /* __POWERPC_KVM_PPC_H__ */ >>> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c >>> index f68a34d..b057856 100644 >>> --- a/arch/powerpc/kvm/book3s.c >>> +++ b/arch/powerpc/kvm/book3s.c >>> @@ -514,3 +514,11 @@ out: >>> mutex_unlock(&kvm->slots_lock); >>> return r; >>> } >>> + >>> +void kvmppc_decrementer_func(unsigned long data) >>> +{ >>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; >>> + >>> + kvmppc_core_queue_dec(vcpu); >>> + kvmppc_wakeup_vcpu(vcpu); >>> +} >>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c >>> index 0ed62c1..502f9ff 100644 >>> --- a/arch/powerpc/kvm/booke.c >>> +++ b/arch/powerpc/kvm/booke.c >>> @@ -205,7 +205,8 @@ void >> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, >>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, >>> unsigned int priority) >>> { >>> - int allowed = 0; >>> + int allowed = 1; >>> + int dequeue = 0; >>> ulong uninitialized_var(msr_mask); >>> bool update_esr = false, update_dear = false; >>> ulong crit_raw = vcpu->arch.shared->critical; >>> @@ -258,10 +259,15 @@ static int >> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, >>> allowed = vcpu->arch.shared->msr & MSR_ME; >>> msr_mask = 0; >>> break; >>> - case BOOKE_IRQPRIO_EXTERNAL: >>> case BOOKE_IRQPRIO_DECREMENTER: >>> + if (!(vcpu->arch.tcr & TCR_DIE)) { >>> + allowed = 0; >>> + dequeue = 1; >>> + } >>> + /* fall through */ >>> + case BOOKE_IRQPRIO_EXTERNAL: >>> case BOOKE_IRQPRIO_FIT: >>> - allowed = vcpu->arch.shared->msr & MSR_EE; >>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE; >>> allowed = allowed && !crit; >>> msr_mask = MSR_CE|MSR_ME|MSR_DE; >>> break; >>> @@ -269,6 +275,8 @@ static int >> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, >>> allowed = vcpu->arch.shared->msr & MSR_DE; >>> msr_mask = MSR_ME; >>> break; >>> + default: >>> + allowed = 0; >>> } >>> >>> if (allowed) { >>> @@ -283,6 +291,9 @@ static int >> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, >>> >>> if (!keep_irq) >>> clear_bit(priority, >> &vcpu->arch.pending_exceptions); >>> + } else if (dequeue) { >>> + /* Should only happen due to races with masking */ >>> + clear_bit(priority, &vcpu->arch.pending_exceptions); >>> } >>> >>> return allowed; >>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct >> kvm_vcpu *vcpu, >>> vcpu->arch.shared->esr = sregs->u.e.esr; >>> vcpu->arch.shared->dar = sregs->u.e.dear; >>> vcpu->arch.vrsave = sregs->u.e.vrsave; >>> - vcpu->arch.tcr = sregs->u.e.tcr; >>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr); >>> >>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) >>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) { >>> vcpu->arch.dec = sregs->u.e.dec; >>> - >>> - kvmppc_emulate_dec(vcpu); >>> + kvmppc_emulate_dec(vcpu); >>> + } >>> >>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { >>> - /* >>> - * FIXME: existing KVM timer handling is incomplete. >>> - * TSR cannot be read by the guest, and its value in >>> - * vcpu->arch is always zero. For now, just handle >>> - * the case where the caller is trying to inject a >>> - * decrementer interrupt. >>> - */ >>> - >>> - if ((sregs->u.e.tsr & TSR_DIS) && >>> - (vcpu->arch.tcr & TCR_DIE)) >>> - kvmppc_core_queue_dec(vcpu); >>> + kvmppc_clr_tsr_bits(vcpu, ~0); >>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr); >>> } >>> >>> return 0; >>> @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) >>> { >>> } >>> >>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) >>> +{ >>> + vcpu->arch.tcr = new_tcr; >>> + smp_wmb(); >>> + >>> + /* >>> + * Since TCR changed, we need to check >>> + * if blocked interrupts are deliverable. >>> + */ >>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) { >>> + kvmppc_core_queue_dec(vcpu); >>> + kvmppc_wakeup_vcpu(vcpu); >>> + } >>> +} >>> + >>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) >>> +{ >>> + set_bits(tsr_bits, &vcpu->arch.tsr); >> >> I'm still missing the point why you need set_bits/clear_bits. >> Can't you just use |= and &= ~ like everyone else? :) >> > > Because set_bits/clear_bits is atomic. > set/clr_tsr_bits() may be called in different threads so that on different cores. > We need to make sure modification to be atomic to prevent missing changes. Ah, so you can set the TSR bit in the decrementer timer callback? Hrm. Please add a comment there saying so. All the other cases should be under vcpu_load() scope. If they're not, we have bugs in our code :). So they shouldn't need atomic operations (except for now they do because there's one user that doesn't do vcpu_load). Alex -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html