On Wed, Jan 21, 2015 at 06:39:46PM +0000, Marc Zyngier wrote: > Trying to emulate the behaviour of set/way cache ops is fairly > pointless, as there are too many ways we can end-up missing stuff. > Also, there is some system caches out there that simply ignore > set/way operations. > > So instead of trying to implement them, let's convert it to VA ops, > and use them as a way to re-enable the trapping of VM ops. That way, > we can detect the point when the MMU/caches are turned off, and do > a full VM flush (which is what the guest was trying to do anyway). > > This allows a 32bit zImage to boot on the APM thingy, and will > probably help bootloaders in general. > > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> This had some conflicts with dirty page logging. I fixed it up here, and also removed some trailing white space and mixed spaces/tabs that patch complained about: http://git.linaro.org/people/christoffer.dall/linux-kvm-arm.git mm-fixes > --- > arch/arm/include/asm/kvm_emulate.h | 10 +++++ > arch/arm/include/asm/kvm_host.h | 3 -- > arch/arm/include/asm/kvm_mmu.h | 3 +- > arch/arm/kvm/arm.c | 10 ----- > arch/arm/kvm/coproc.c | 64 ++++++------------------------ > arch/arm/kvm/coproc_a15.c | 2 +- > arch/arm/kvm/coproc_a7.c | 2 +- > arch/arm/kvm/mmu.c | 70 ++++++++++++++++++++++++++++++++- > arch/arm/kvm/trace.h | 39 +++++++++++++++++++ > arch/arm64/include/asm/kvm_emulate.h | 10 +++++ > arch/arm64/include/asm/kvm_host.h | 3 -- > arch/arm64/include/asm/kvm_mmu.h | 3 +- > arch/arm64/kvm/sys_regs.c | 75 +++++------------------------------- > 13 files changed, 155 insertions(+), 139 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h > index 66ce176..7b01523 100644 > --- a/arch/arm/include/asm/kvm_emulate.h > +++ b/arch/arm/include/asm/kvm_emulate.h > @@ -38,6 +38,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) > vcpu->arch.hcr = HCR_GUEST_MASK; > } > > +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) > +{ > + return vcpu->arch.hcr; > +} > + > +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) > +{ > + vcpu->arch.hcr = hcr; > +} > + > static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) > { > return 1; > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index 254e065..04b4ea0 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -125,9 +125,6 @@ struct kvm_vcpu_arch { > * Anything that is not used directly from assembly code goes > * here. > */ > - /* dcache set/way operation pending */ > - int last_pcpu; > - cpumask_t require_dcache_flush; > > /* Don't run the guest on this vcpu */ > bool pause; > diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h > index 63e0ecc..286644c 100644 > --- a/arch/arm/include/asm/kvm_mmu.h > +++ b/arch/arm/include/asm/kvm_mmu.h > @@ -190,7 +190,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > > #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) > > -void stage2_flush_vm(struct kvm *kvm); > +void kvm_set_way_flush(struct kvm_vcpu *vcpu); > +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); > > #endif /* !__ASSEMBLY__ */ > > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 2d6d910..0b0d58a 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -281,15 +281,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > vcpu->cpu = cpu; > vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); > > - /* > - * Check whether this vcpu requires the cache to be flushed on > - * this physical CPU. This is a consequence of doing dcache > - * operations by set/way on this vcpu. We do it here to be in > - * a non-preemptible section. > - */ > - if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) > - flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ > - > kvm_arm_set_running_vcpu(vcpu); > } > > @@ -541,7 +532,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); > > vcpu->mode = OUTSIDE_GUEST_MODE; > - vcpu->arch.last_pcpu = smp_processor_id(); > kvm_guest_exit(); > trace_kvm_exit(*vcpu_pc(vcpu)); > /* > diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c > index 7928dbd..0afcc00 100644 > --- a/arch/arm/kvm/coproc.c > +++ b/arch/arm/kvm/coproc.c > @@ -189,82 +189,40 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, > return true; > } > > -/* See note at ARM ARM B1.14.4 */ > +/* > + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). > + */ > static bool access_dcsw(struct kvm_vcpu *vcpu, > const struct coproc_params *p, > const struct coproc_reg *r) > { > - unsigned long val; > - int cpu; > - > if (!p->is_write) > return read_from_write_only(vcpu, p); > > - cpu = get_cpu(); > - > - cpumask_setall(&vcpu->arch.require_dcache_flush); > - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); > - > - /* If we were already preempted, take the long way around */ > - if (cpu != vcpu->arch.last_pcpu) { > - flush_cache_all(); > - goto done; > - } > - > - val = *vcpu_reg(vcpu, p->Rt1); > - > - switch (p->CRm) { > - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ > - case 14: /* DCCISW */ > - asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val)); > - break; > - > - case 10: /* DCCSW */ > - asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val)); > - break; > - } > - > -done: > - put_cpu(); > - > + kvm_set_way_flush(vcpu); > return true; > } > > /* > * Generic accessor for VM registers. Only called as long as HCR_TVM > - * is set. > + * is set. If the guest enables the MMU, we stop trapping the VM > + * sys_regs and leave it in complete control of the caches. > + * > + * Used by the cpu-specific code. > */ > static bool access_vm_reg(struct kvm_vcpu *vcpu, > const struct coproc_params *p, > const struct coproc_reg *r) > { > + bool was_enabled = vcpu_has_cache_enabled(vcpu); > + > BUG_ON(!p->is_write); > > vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); > if (p->is_64bit) > vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); > > - return true; > -} > - > -/* > - * SCTLR accessor. Only called as long as HCR_TVM is set. If the > - * guest enables the MMU, we stop trapping the VM sys_regs and leave > - * it in complete control of the caches. > - * > - * Used by the cpu-specific code. > - */ > -bool access_sctlr(struct kvm_vcpu *vcpu, > - const struct coproc_params *p, > - const struct coproc_reg *r) > -{ I think you left in a prototype in arch/arm/kvm/coproc.h > - access_vm_reg(vcpu, p, r); > - > - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ > - vcpu->arch.hcr &= ~HCR_TVM; > - stage2_flush_vm(vcpu->kvm); > - } > - > + kvm_toggle_cache(vcpu, was_enabled); > return true; > } > > diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c > index e6f4ae4..a713675 100644 > --- a/arch/arm/kvm/coproc_a15.c > +++ b/arch/arm/kvm/coproc_a15.c > @@ -34,7 +34,7 @@ > static const struct coproc_reg a15_regs[] = { > /* SCTLR: swapped by interrupt.S. */ > { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, > - access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, > + access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 }, > }; > > static struct kvm_coproc_target_table a15_target_table = { > diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c > index 17fc7cd..b19e46d 100644 > --- a/arch/arm/kvm/coproc_a7.c > +++ b/arch/arm/kvm/coproc_a7.c > @@ -37,7 +37,7 @@ > static const struct coproc_reg a7_regs[] = { > /* SCTLR: swapped by interrupt.S. */ > { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, > - access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, > + access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 }, I'm confused how you tested this, this doesn't seem to compile for me (access_vm_reg is a static function). > }; > > static struct kvm_coproc_target_table a7_target_table = { > diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c > index 1dc9778..106737e 100644 > --- a/arch/arm/kvm/mmu.c > +++ b/arch/arm/kvm/mmu.c > @@ -278,7 +278,7 @@ static void stage2_flush_memslot(struct kvm *kvm, > * Go through the stage 2 page tables and invalidate any cache lines > * backing memory already mapped to the VM. > */ > -void stage2_flush_vm(struct kvm *kvm) > +static void stage2_flush_vm(struct kvm *kvm) > { > struct kvm_memslots *slots; > struct kvm_memory_slot *memslot; > @@ -1411,3 +1411,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, > unmap_stage2_range(kvm, gpa, size); > spin_unlock(&kvm->mmu_lock); > } > + > +/* > + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). > + * > + * Main problems: > + * - S/W ops are local to a CPU (not broadcast) > + * - We have line migration behind our back (speculation) > + * - System caches don't support S/W at all (damn!) > + * > + * In the face of the above, the best we can do is to try and convert > + * S/W ops to VA ops. Because the guest is not allowed to infer the > + * S/W to PA mapping, it can only use S/W to nuke the whole cache, > + * which is a rather good thing for us. > + * > + * Also, it is only used when turning caches on/off ("The expected > + * usage of the cache maintenance instructions that operate by set/way > + * is associated with the cache maintenance instructions associated > + * with the powerdown and powerup of caches, if this is required by > + * the implementation."). > + * > + * We use the following policy: > + * > + * - If we trap a S/W operation, we enable VM trapping to detect > + * caches being turned on/off, and do a full clean. > + * > + * - We flush the caches on both caches being turned on and off. > + * > + * - Once the caches are enabled, we stop trapping VM ops. > + */ > +void kvm_set_way_flush(struct kvm_vcpu *vcpu) > +{ > + unsigned long hcr = vcpu_get_hcr(vcpu); > + > + /* > + * If this is the first time we do a S/W operation > + * (i.e. HCR_TVM not set) flush the whole memory, and set the > + * VM trapping. what about when the guest boots, wouldn't it be using S/W operations to flush the whole cache before turning it on? But we ignore this case now because we're already trapping VM regs so this is deferred until caches are turned on. However, when we are turning off caches, we both flush the cache when doing the actual S/W operation and when the caches are then turned off. Why? > + * > + * Otherwise, rely on the VM trapping to wait for the MMU + > + * Caches to be turned off. At that point, we'll be able to > + * clean the caches again. > + */ > + if (!(hcr & HCR_TVM)) { > + trace_kvm_set_way_flush(*vcpu_pc(vcpu), > + vcpu_has_cache_enabled(vcpu)); > + stage2_flush_vm(vcpu->kvm); > + vcpu_set_hcr(vcpu, hcr | HCR_TVM); > + } > +} > + > +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) > +{ > + bool now_enabled = vcpu_has_cache_enabled(vcpu); > + > + /* > + * If switching the MMU+caches on, need to invalidate the caches. > + * If switching it off, need to clean the caches. > + * Clean + invalidate does the trick always. > + */ > + if (now_enabled != was_enabled) > + stage2_flush_vm(vcpu->kvm); > + > + /* Caches are now on, stop trapping VM ops (until a S/W op) */ > + if (now_enabled) > + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); > + > + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); > +} > diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h > index b1d640f..b6a6e71 100644 > --- a/arch/arm/kvm/trace.h > +++ b/arch/arm/kvm/trace.h > @@ -223,6 +223,45 @@ TRACE_EVENT(kvm_hvc, > __entry->vcpu_pc, __entry->r0, __entry->imm) > ); > > +TRACE_EVENT(kvm_set_way_flush, > + TP_PROTO(unsigned long vcpu_pc, bool cache), > + TP_ARGS(vcpu_pc, cache), > + > + TP_STRUCT__entry( > + __field( unsigned long, vcpu_pc ) > + __field( bool, cache ) > + ), > + > + TP_fast_assign( > + __entry->vcpu_pc = vcpu_pc; > + __entry->cache = cache; > + ), > + > + TP_printk("S/W flush at 0x%016lx (cache %s)", > + __entry->vcpu_pc, __entry->cache ? "on" : "off") > +); > + > +TRACE_EVENT(kvm_toggle_cache, > + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), > + TP_ARGS(vcpu_pc, was, now), > + > + TP_STRUCT__entry( > + __field( unsigned long, vcpu_pc ) > + __field( bool, was ) > + __field( bool, now ) > + ), > + > + TP_fast_assign( > + __entry->vcpu_pc = vcpu_pc; > + __entry->was = was; > + __entry->now = now; > + ), > + > + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", > + __entry->vcpu_pc, __entry->was ? "on" : "off", > + __entry->now ? "on" : "off") > +); > + > #endif /* _TRACE_KVM_H */ > > #undef TRACE_INCLUDE_PATH > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > index 865a7e2..3cb4c85 100644 > --- a/arch/arm64/include/asm/kvm_emulate.h > +++ b/arch/arm64/include/asm/kvm_emulate.h > @@ -45,6 +45,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) > vcpu->arch.hcr_el2 &= ~HCR_RW; > } > > +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) > +{ > + return vcpu->arch.hcr_el2; > +} > + > +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) > +{ > + vcpu->arch.hcr_el2 = hcr; > +} > + > static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) > { > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 0b7dfdb..acd101a 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -116,9 +116,6 @@ struct kvm_vcpu_arch { > * Anything that is not used directly from assembly code goes > * here. > */ > - /* dcache set/way operation pending */ > - int last_pcpu; > - cpumask_t require_dcache_flush; > > /* Don't run the guest */ > bool pause; > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index 14a74f1..92d22e9 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -260,7 +260,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > > #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) > > -void stage2_flush_vm(struct kvm *kvm); > +void kvm_set_way_flush(struct kvm_vcpu *vcpu); > +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); > > #endif /* __ASSEMBLY__ */ > #endif /* __ARM64_KVM_MMU_H__ */ > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 3d7c2df..95daa73 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -69,68 +69,31 @@ static u32 get_ccsidr(u32 csselr) > return ccsidr; > } > > -static void do_dc_cisw(u32 val) > -{ > - asm volatile("dc cisw, %x0" : : "r" (val)); > - dsb(ish); > -} > - > -static void do_dc_csw(u32 val) > -{ > - asm volatile("dc csw, %x0" : : "r" (val)); > - dsb(ish); > -} > - > -/* See note at ARM ARM B1.14.4 */ > +/* > + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). > + */ > static bool access_dcsw(struct kvm_vcpu *vcpu, > const struct sys_reg_params *p, > const struct sys_reg_desc *r) > { > - unsigned long val; > - int cpu; > - > if (!p->is_write) > return read_from_write_only(vcpu, p); > > - cpu = get_cpu(); > - > - cpumask_setall(&vcpu->arch.require_dcache_flush); > - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); > - > - /* If we were already preempted, take the long way around */ > - if (cpu != vcpu->arch.last_pcpu) { > - flush_cache_all(); > - goto done; > - } > - > - val = *vcpu_reg(vcpu, p->Rt); > - > - switch (p->CRm) { > - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ > - case 14: /* DCCISW */ > - do_dc_cisw(val); > - break; > - > - case 10: /* DCCSW */ > - do_dc_csw(val); > - break; > - } > - > -done: > - put_cpu(); > - > + kvm_set_way_flush(vcpu); > return true; > } > > /* > * Generic accessor for VM registers. Only called as long as HCR_TVM > - * is set. > + * is set. If the guest enables the MMU, we stop trapping the VM > + * sys_regs and leave it in complete control of the caches. > */ > static bool access_vm_reg(struct kvm_vcpu *vcpu, > const struct sys_reg_params *p, > const struct sys_reg_desc *r) > { > unsigned long val; > + bool was_enabled = vcpu_has_cache_enabled(vcpu); > > BUG_ON(!p->is_write); > > @@ -143,25 +106,7 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, > vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; > } > > - return true; > -} > - > -/* > - * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the > - * guest enables the MMU, we stop trapping the VM sys_regs and leave > - * it in complete control of the caches. > - */ > -static bool access_sctlr(struct kvm_vcpu *vcpu, > - const struct sys_reg_params *p, > - const struct sys_reg_desc *r) > -{ > - access_vm_reg(vcpu, p, r); > - > - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ > - vcpu->arch.hcr_el2 &= ~HCR_TVM; > - stage2_flush_vm(vcpu->kvm); > - } > - > + kvm_toggle_cache(vcpu, was_enabled); > return true; > } > > @@ -377,7 +322,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { > NULL, reset_mpidr, MPIDR_EL1 }, > /* SCTLR_EL1 */ > { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), > - access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, > + access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, > /* CPACR_EL1 */ > { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), > NULL, reset_val, CPACR_EL1, 0 }, > @@ -657,7 +602,7 @@ static const struct sys_reg_desc cp14_64_regs[] = { > * register). > */ > static const struct sys_reg_desc cp15_regs[] = { > - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, > + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, > { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, > { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, > { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, > -- > 2.1.4 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html