[Android-virt] [PATCH v2] ARM: KVM: Trap and propagate cache maintainance by set/way

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The ARM ARM says in bold (B1.14.4):
"Virtualizing a uniprocessor system within an MP system, permitting a
 virtual machine to move between different physical processors, makes
 cache maintenance by set/way difficult. This is because a set/way
 operation might be interrupted part way through its operation, and
 therefore the hypervisor must reproduce the effect of the maintenance
 on both physical processors."

The direct consequence of this is that we have to trap all set/way
operations and make sure the other CPUs get the memo. In order to
avoid performance degradation, we maintain a per vcpu cpumask that
tracks the physical CPUs on which the cache operation must be performed.
The remote operation is only executed when migrating the vcpu.

On the receiving end, we simply clean+invalidate the whole data cache
to avoid queueing up individual set/way operations.

Reported-by: Peter Maydell <peter.maydell at linaro.org>
Cc: Will Deacon <will.deacon at arm.com>
Cc: Rusty Russell <rusty.russell at linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
---
Take #2:
- Moved the cache nuking to kvm_arch_vcpu_load(), which is in a
  preempt-safe section (suggested by Will)
- Simplified the whole cpumask thing (Will again)
- Fix the current CPU cache ops (brown paper bag for me again...)
- Fixed typos in commit log (Peter)
- Actually tested this time!

 arch/arm/include/asm/kvm_arm.h  |    3 ++-
 arch/arm/include/asm/kvm_host.h |    3 +++
 arch/arm/kvm/arm.c              |   11 +++++++++++
 arch/arm/kvm/emulate.c          |   34 ++++++++++++++++++++++++++++++++++
 4 files changed, 50 insertions(+), 1 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index a28b5f0..5bdbe61 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -53,6 +53,7 @@
  * The bits we set in HCR:
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
+ * TSW:		Trap cache operations by set/way
  * TWI:		Trap WFI
  * BSU_IS:	Upgrade barriers to the inner shareable domain
  * FB:		Force broadcast of all maintainance operations
@@ -61,7 +62,7 @@
  * FMO:		Override CPSR.F and enable signaling with VF
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
-#define HCR_GUEST_MASK (HCR_TSC | HCR_TWI | HCR_VM | HCR_BSU_IS | HCR_FB | \
+#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | HCR_FB | \
 			HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 734a107..69ee513 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -113,6 +113,9 @@ struct kvm_vcpu_arch {
 	u32 hpfar;		/* Hyp IPA Fault Address Register */
 	u64 pc_ipa;		/* IPA for the current PC (VA to PA result) */
 
+	/* dcache set/way operation pending */
+	cpumask_t require_dcache_flush;
+
 	/* IO related fields */
 	bool mmio_sign_extend;	/* for byte/halfword loads */
 	u32 mmio_rd;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bf3e5f5..13681a1 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -33,6 +33,7 @@
 #include <asm/ptrace.h>
 #include <asm/mman.h>
 #include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
@@ -277,6 +278,16 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
+
+	/*
+	 * Check whether this vcpu requires the cache to be flushed on
+	 * this physical CPU. This is a consequence of doing dcache
+	 * operations by set/way on this vcpu. We do it here in order
+	 * to be in a non-preemptible section.
+	 */
+	if (cpumask_test_and_clear_cpu(cpu,
+				       &vcpu->arch.require_dcache_flush))
+		flush_cache_all(); /* We'd really want v7_flush_dcache_all()... */
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index 3ceab47..a86a0fa 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -255,6 +255,31 @@ static bool read_actlr(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static bool write_dcsw(struct kvm_vcpu *vcpu,
+		       const struct coproc_params *p,
+		       unsigned long cp15_reg)
+{
+	u32 val;
+
+	val = *vcpu_reg(vcpu, p->Rt1);
+	
+	switch(p->CRm) {
+	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+	case 14:		/* DCCISW */
+		asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
+		break;
+
+	case 10:		/* DCCSW */
+		asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
+		break;
+	}
+
+	cpumask_setall(&vcpu->arch.require_dcache_flush);
+	cpumask_clear_cpu(vcpu->cpu, &vcpu->arch.require_dcache_flush);
+
+	return true;
+}
+
 static bool access_cp15_reg(struct kvm_vcpu *vcpu,
 			    const struct coproc_params *p,
 			    unsigned long cp15_reg)
@@ -302,6 +327,15 @@ static const struct coproc_emulate coproc_emulate[] = {
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, WRITE, ignore_write},
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, READ,  read_actlr},
 	/*
+	 * DC{C,I,CI}SW operations:
+	 */
+	{ CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32,  WRITE, write_dcsw},	
+	{ CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32,  READ,  read_zero},	
+	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32,  WRITE, write_dcsw},	
+	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32,  READ,  read_zero},	
+	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32,  WRITE, write_dcsw},	
+	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32,  READ,  read_zero},	
+	/*
 	 * L2CTLR access:
 	 *
 	 * Ignore writes completely.
-- 
1.7.3.4



[Index of Archives]     [Linux KVM]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux