On Fri, Oct 28, 2016 at 11:27:50AM +0100, Marc Zyngier wrote: > Architecturally, TLBs are private to the (physical) CPU they're > associated with. But when multiple vcpus from the same VM are > being multiplexed on the same CPU, the TLBs are not private > to the vcpus (and are actually shared across the VMID). > > Let's consider the following scenario: > > - vcpu-0 maps PA to VA > - vcpu-1 maps PA' to VA > > If run on the same physical CPU, vcpu-1 can hit TLB entries generated > by vcpu-0 accesses, and access the wrong physical page. > > The solution to this is to keep a per-VM map of which vcpu ran last > on each given physical CPU, and invalidate local TLBs when switching > to a different vcpu from the same VM. > > Reviewed-by: Mark Rutland <mark.rutland@xxxxxxx> > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> > --- > Fixed comments, added Mark's RB. > > arch/arm/include/asm/kvm_host.h | 11 ++++++++++- > arch/arm/include/asm/kvm_hyp.h | 1 + > arch/arm/kvm/arm.c | 35 ++++++++++++++++++++++++++++++++++- > arch/arm/kvm/hyp/switch.c | 9 +++++++++ > arch/arm64/include/asm/kvm_host.h | 11 ++++++++++- > arch/arm64/kvm/hyp/switch.c | 8 ++++++++ > 6 files changed, 72 insertions(+), 3 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index 2d19e02..7290de6 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -57,6 +57,9 @@ struct kvm_arch { > /* VTTBR value associated with below pgd and vmid */ > u64 vttbr; > > + /* The last vcpu id that ran on each physical CPU */ > + int __percpu *last_vcpu_ran; > + > /* Timer */ > struct arch_timer_kvm timer; > > @@ -174,6 +177,13 @@ struct kvm_vcpu_arch { > /* vcpu power-off state */ > bool power_off; > > + /* > + * Local TLBs potentially contain conflicting entries from > + * another vCPU within this VMID. All entries for this VMID must > + * be invalidated from (local) TLBs before we run this vCPU. > + */ > + bool tlb_vmid_stale; > + > /* Don't run the guest (internal implementation need) */ > bool pause; > > @@ -292,7 +302,6 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); > static inline void kvm_arch_hardware_unsetup(void) {} > static inline void kvm_arch_sync_events(struct kvm *kvm) {} > static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} > -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} > static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} > > static inline void kvm_arm_init_debug(void) {} > diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h > index 343135e..5850890 100644 > --- a/arch/arm/include/asm/kvm_hyp.h > +++ b/arch/arm/include/asm/kvm_hyp.h > @@ -71,6 +71,7 @@ > #define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0) > #define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0) > #define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0) > +#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0) > #define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4) > #define PRRR __ACCESS_CP15(c10, 0, c2, 0) > #define NMRR __ACCESS_CP15(c10, 0, c2, 1) > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 08bb84f..e0d93cd 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn) > */ > int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > { > - int ret = 0; > + int ret, cpu; > > if (type) > return -EINVAL; > > + kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); > + if (!kvm->arch.last_vcpu_ran) > + return -ENOMEM; > + > + for_each_possible_cpu(cpu) > + *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; > + > ret = kvm_alloc_stage2_pgd(kvm); > if (ret) > goto out_fail_alloc; > @@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > out_free_stage2_pgd: > kvm_free_stage2_pgd(kvm); > out_fail_alloc: > + free_percpu(kvm->arch.last_vcpu_ran); > + kvm->arch.last_vcpu_ran = NULL; > return ret; > } > > @@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) > { > int i; > > + free_percpu(kvm->arch.last_vcpu_ran); > + kvm->arch.last_vcpu_ran = NULL; > + > for (i = 0; i < KVM_MAX_VCPUS; ++i) { > if (kvm->vcpus[i]) { > kvm_arch_vcpu_free(kvm->vcpus[i]); > @@ -310,6 +322,27 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > return 0; > } > > +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) > +{ why is calling this from here sufficient? You only get a notification from preempt notifiers if you were preempted while running (or rather while the vcpu was loaded). I think this needs to go in kvm_arch_vcpu_load, but be aware that the vcpu_load gets called for other vcpu ioctls and doesn't necessarily imply that the vcpu will actually run, which is also the case for the sched_in notification, btw. The worst that will happen in that case is a bit of extra TLB invalidation, so sticking with kvm_arch_vcpu_load is probably fine. > + int *last_ran; > + > + last_ran = per_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran, cpu); > + > + /* > + * We might get preempted before the vCPU actually runs, but > + * this is fine. Our TLBI stays pending until we actually make > + * it to __activate_vm, so we won't miss a TLBI. If another > + * vCPU gets scheduled, it will see our vcpu_id in last_ran, > + * and pend a TLBI for itself. > + */ > + if (*last_ran != vcpu->vcpu_id) { > + if (*last_ran != -1) > + vcpu->arch.tlb_vmid_stale = true; > + > + *last_ran = vcpu->vcpu_id; > + } > +} > + > void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > vcpu->cpu = cpu; > diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c > index 92678b7..a411762 100644 > --- a/arch/arm/kvm/hyp/switch.c > +++ b/arch/arm/kvm/hyp/switch.c > @@ -75,6 +75,15 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) > { > struct kvm *kvm = kern_hyp_va(vcpu->kvm); > write_sysreg(kvm->arch.vttbr, VTTBR); > + if (vcpu->arch.tlb_vmid_stale) { > + /* Force vttbr to be written */ > + isb(); > + /* Local invalidate only for this VMID */ > + write_sysreg(0, TLBIALL); > + dsb(nsh); > + vcpu->arch.tlb_vmid_stale = false; > + } > + why not call this directly when you notice it via kvm_call_hyp as opposed to adding another conditional in the critical path? > write_sysreg(vcpu->arch.midr, VPIDR); > } > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index bd94e67..0f62829 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -62,6 +62,9 @@ struct kvm_arch { > /* VTTBR value associated with above pgd and vmid */ > u64 vttbr; > > + /* The last vcpu id that ran on each physical CPU */ > + int __percpu *last_vcpu_ran; > + > /* The maximum number of vCPUs depends on the used GIC model */ > int max_vcpus; > > @@ -252,6 +255,13 @@ struct kvm_vcpu_arch { > /* vcpu power-off state */ > bool power_off; > > + /* > + * Local TLBs potentially contain conflicting entries from > + * another vCPU within this VMID. All entries for this VMID must > + * be invalidated from (local) TLBs before we run this vCPU. > + */ > + bool tlb_vmid_stale; > + > /* Don't run the guest (internal implementation need) */ > bool pause; > > @@ -368,7 +378,6 @@ static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, > static inline void kvm_arch_hardware_unsetup(void) {} > static inline void kvm_arch_sync_events(struct kvm *kvm) {} > static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} > -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} > static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} > > void kvm_arm_init_debug(void); > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > index 83037cd..99d0f33 100644 > --- a/arch/arm64/kvm/hyp/switch.c > +++ b/arch/arm64/kvm/hyp/switch.c > @@ -131,6 +131,14 @@ static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) > { > struct kvm *kvm = kern_hyp_va(vcpu->kvm); > write_sysreg(kvm->arch.vttbr, vttbr_el2); > + if (vcpu->arch.tlb_vmid_stale) { > + /* Force vttbr_el2 to be written */ > + isb(); > + /* Local invalidate only for this VMID */ > + asm volatile("tlbi vmalle1" : : ); > + dsb(nsh); > + vcpu->arch.tlb_vmid_stale = false; > + } > } > > static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) > -- > 2.1.4 > Thanks, -Christoffer -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html