Re: [RFC PATCH 2/4] KVM: PPC: Book3E: Handle LRAT error exception

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 03.07.14 16:45, Mihai Caraman wrote:
Handle LRAT error exception with support for lrat mapping and invalidation.

Signed-off-by: Mihai Caraman <mihai.caraman@xxxxxxxxxxxxx>
---
  arch/powerpc/include/asm/kvm_host.h   |   1 +
  arch/powerpc/include/asm/kvm_ppc.h    |   2 +
  arch/powerpc/include/asm/mmu-book3e.h |   3 +
  arch/powerpc/include/asm/reg_booke.h  |  13 ++++
  arch/powerpc/kernel/asm-offsets.c     |   1 +
  arch/powerpc/kvm/booke.c              |  40 +++++++++++
  arch/powerpc/kvm/bookehv_interrupts.S |   9 ++-
  arch/powerpc/kvm/e500_mmu_host.c      | 125 ++++++++++++++++++++++++++++++++++
  arch/powerpc/kvm/e500mc.c             |   2 +
  9 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bb66d8b..7b6b2ec 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -433,6 +433,7 @@ struct kvm_vcpu_arch {
  	u32 eplc;
  	u32 epsc;
  	u32 oldpir;
+	u64 fault_lper;
  #endif
#if defined(CONFIG_BOOKE)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9c89cdd..2730a29 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -86,6 +86,8 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
                                gva_t eaddr);
  extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
  extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu);
extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                  unsigned int id);
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 088fd9f..ac6acf7 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -40,6 +40,8 @@
/* MAS registers bit definitions */ +#define MAS0_ATSEL 0x80000000
+#define MAS0_ATSEL_SHIFT	31
  #define MAS0_TLBSEL_MASK        0x30000000
  #define MAS0_TLBSEL_SHIFT       28
  #define MAS0_TLBSEL(x)          (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
@@ -53,6 +55,7 @@
  #define MAS0_WQ_CLR_RSRV       	0x00002000
#define MAS1_VALID 0x80000000
+#define MAS1_VALID_SHIFT	31
  #define MAS1_IPROT		0x40000000
  #define MAS1_TID(x)		(((x) << 16) & 0x3FFF0000)
  #define MAS1_IND		0x00002000
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 75bda23..783d617 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -43,6 +43,8 @@
/* Special Purpose Registers (SPRNs)*/
  #define SPRN_DECAR	0x036	/* Decrementer Auto Reload Register */
+#define SPRN_LPER	0x038	/* Logical Page Exception Register */
+#define SPRN_LPERU	0x039	/* Logical Page Exception Register Upper */
  #define SPRN_IVPR	0x03F	/* Interrupt Vector Prefix Register */
  #define SPRN_USPRG0	0x100	/* User Special Purpose Register General 0 */
  #define SPRN_SPRG3R	0x103	/* Special Purpose Register General 3 Read */
@@ -358,6 +360,9 @@
  #define ESR_ILK		0x00100000	/* Instr. Cache Locking */
  #define ESR_PUO		0x00040000	/* Unimplemented Operation exception */
  #define ESR_BO		0x00020000	/* Byte Ordering */
+#define ESR_DATA	0x00000400	/* Page Table Data Access */
+#define ESR_TLBI	0x00000200	/* Page Table TLB Ineligible */
+#define ESR_PT		0x00000100	/* Page Table Translation */
  #define ESR_SPV		0x00000080	/* Signal Processing operation */
/* Bit definitions related to the DBCR0. */
@@ -649,6 +654,14 @@
  #define EPC_EPID	0x00003fff
  #define EPC_EPID_SHIFT	0
+/* Bit definitions for LPER */
+#define LPER_ALPN		0x000FFFFFFFFFF000ULL
+#define LPER_ALPN_SHIFT		12
+#define LPER_WIMGE		0x00000F80
+#define LPER_WIMGE_SHIFT	7
+#define LPER_LPS		0x0000000F
+#define LPER_LPS_SHIFT		0
+
  /*
   * The IBM-403 is an even more odd special case, as it is much
   * older than the IBM-405 series.  We put these down here incase someone
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f5995a9..be6e329 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -713,6 +713,7 @@ int main(void)
  	DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
  	DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
  	DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+	DEFINE(VCPU_FAULT_LPER, offsetof(struct kvm_vcpu, arch.fault_lper));
  #endif
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a192975..ab1077f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1286,6 +1286,46 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
  		break;
  	}
+#ifdef CONFIG_KVM_BOOKE_HV
+	case BOOKE_INTERRUPT_LRAT_ERROR:
+	{
+		gfn_t gfn;
+
+		/*
+		 * Guest TLB management instructions (EPCR.DGTMI == 0) is not
+		 * supported for now
+		 */
+		if (!(vcpu->arch.fault_esr & ESR_PT)) {
+			WARN(1, "%s: Guest TLB management instructions not supported!\n", __func__);

Wouldn't this allow a guest to flood the host's kernel log?

+			break;
+		}
+
+		gfn = (vcpu->arch.fault_lper & LPER_ALPN) >> LPER_ALPN_SHIFT;

Maybe add an #ifdef and #error check to make sure that LPER_ALPN_SHIFT == PAGE_SHIFT?

+
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			kvmppc_lrat_map(vcpu, gfn);
+			r = RESUME_GUEST;
+		} else if (vcpu->arch.fault_esr & ESR_DATA) {
+			vcpu->arch.paddr_accessed = (gfn << PAGE_SHIFT)
+				| (vcpu->arch.fault_dear & (PAGE_SIZE - 1));
+			vcpu->arch.vaddr_accessed =
+				vcpu->arch.fault_dear;
+
+			r = kvmppc_emulate_mmio(run, vcpu);
+			kvmppc_account_exit(vcpu, MMIO_EXITS);

It's a shame we have to duplicate that logic from the normal TLB miss path, but I can't see any good way to combine them either.

+		} else {
+			kvmppc_booke_queue_irqprio(vcpu,
+						BOOKE_IRQPRIO_MACHINE_CHECK);
+			r = RESUME_GUEST;
+		}
+
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		break;
+	}
+#endif
+
  	case BOOKE_INTERRUPT_DEBUG: {
  		r = kvmppc_handle_debug(run, vcpu);
  		if (r == RESUME_HOST)
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index b3ecdd6..341c3a8 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -64,6 +64,7 @@
  #define NEED_EMU		0x00000001 /* emulation -- save nv regs */
  #define NEED_DEAR		0x00000002 /* save faulting DEAR */
  #define NEED_ESR		0x00000004 /* save faulting ESR */
+#define NEED_LPER		0x00000008 /* save faulting LPER */
/*
   * On entry:
@@ -203,6 +204,12 @@
  	PPC_STL	r9, VCPU_FAULT_DEAR(r4)
  	.endif
+ /* Only suppported on 64-bit cores for now */
+	.if	\flags & NEED_LPER
+	mfspr	r7, SPRN_LPER
+	std	r7, VCPU_FAULT_LPER(r4)
+	.endif
+
  	b	kvmppc_resume_host
  .endm
@@ -325,7 +332,7 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
  kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
  	SPRN_CSRR0, SPRN_CSRR1, 0
  kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR | NEED_LPER)
  #else
  /*
   * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 79677d7..be1454b 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -95,6 +95,131 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
  	                              stlbe->mas2, stlbe->mas7_3);
  }
+#ifdef CONFIG_KVM_BOOKE_HV
+#ifdef CONFIG_64BIT
+static inline int lrat_next(void)

No inline in .c files please. Just only make them "static".

+{
+	int this, next;
+
+	this = local_paca->tcd.lrat_next;
+	next = (this + 1) % local_paca->tcd.lrat_max;

Can we assume that lrat_max is always a power of 2? IIRC modulo functions with variables can be quite expensive. So if we can instead do

  next = (this + 1) & local_paca->tcd.lrat_mask;

we should be faster and not rely on division helpers.

+	local_paca->tcd.lrat_next = next;
+
+	return this;
+}
+
+static inline int lrat_size(void)
+{
+	return local_paca->tcd.lrat_max;
+}
+#else
+/* LRAT is only supported in 64-bit kernel for now */
+static inline int lrat_next(void)
+{
+	BUG();
+}
+
+static inline int lrat_size(void)
+{
+	return 0;
+}
+#endif
+
+void write_host_lrate(int tsize, gfn_t gfn, unsigned long pfn, uint32_t lpid,
+		      int valid, int lrat_entry)
+{
+	struct kvm_book3e_206_tlb_entry stlbe;
+	int esel = lrat_entry;
+	unsigned long flags;
+
+	stlbe.mas1 = (valid ? MAS1_VALID : 0) | MAS1_TSIZE(tsize);
+	stlbe.mas2 = ((u64)gfn << PAGE_SHIFT);
+	stlbe.mas7_3 = ((u64)pfn << PAGE_SHIFT);
+	stlbe.mas8 = MAS8_TGS | lpid;
+
+	local_irq_save(flags);
+	/* book3e_tlb_lock(); */

Hm?

+
+	if (esel == -1)
+		esel = lrat_next();
+	__write_host_tlbe(&stlbe, MAS0_ATSEL | MAS0_ESEL(esel));
+
+	/* book3e_tlb_unlock(); */
+	local_irq_restore(flags);
+}
+
+void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot;
+	unsigned long pfn;
+	unsigned long hva;
+	struct vm_area_struct *vma;
+	unsigned long psize;
+	int tsize;
+	unsigned long tsize_pages;
+
+	slot = gfn_to_memslot(vcpu->kvm, gfn);
+	if (!slot) {
+		pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n",
+				   __func__, (long)gfn);
+		return;
+	}
+
+	hva = slot->userspace_addr;
+
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, hva);
+	if (vma && (hva >= vma->vm_start)) {
+		psize = vma_kernel_pagesize(vma);
+	} else {
+		pr_err_ratelimited("%s: couldn't find virtual memory address for gfn %lx!\n", __func__, (long)gfn);
+		return;
+	}
+	up_read(&current->mm->mmap_sem);
+
+	pfn = gfn_to_pfn_memslot(slot, gfn);
+	if (is_error_noslot_pfn(pfn)) {
+		pr_err_ratelimited("%s: couldn't get real page for gfn %lx!\n",
+				   __func__, (long)gfn);
+		return;
+	}
+
+	tsize = __ilog2(psize) - 10;
+	tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+	gfn &= ~(tsize_pages - 1);
+	pfn &= ~(tsize_pages - 1);
+
+	write_host_lrate(tsize, gfn, pfn, vcpu->kvm->arch.lpid, 1, -1);
+	kvm_release_pfn_clean(pfn);

Don't we have to keep the page locked so it doesn't get swapped away?


Alex

+}
+
+void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu)
+{
+	uint32_t mas0, mas1 = 0;
+	int esel;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	/* book3e_tlb_lock(); */
+
+	/* LRAT does not have a dedicated instruction for invalidation */
+	for (esel = 0; esel < lrat_size(); esel++) {
+		mas0 = MAS0_ATSEL | MAS0_ESEL(esel);
+		mtspr(SPRN_MAS0, mas0);
+		asm volatile("isync; tlbre" : : : "memory");
+		mas1 = mfspr(SPRN_MAS1) & ~MAS1_VALID;
+		mtspr(SPRN_MAS1, mas1);
+		asm volatile("isync; tlbwe" : : : "memory");
+	}
+	/* Must clear mas8 for other host tlbwe's */
+	mtspr(SPRN_MAS8, 0);
+	isync();
+
+	/* book3e_tlb_unlock(); */
+	local_irq_restore(flags);
+}
+#endif
+
  /*
   * Acquire a mas0 with victim hint, as if we just took a TLB miss.
   *
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index b1d9939..5622d9a 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -99,6 +99,8 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
  	asm volatile("tlbilxlpid");
  	mtspr(SPRN_MAS5, 0);
  	local_irq_restore(flags);
+
+	kvmppc_lrat_invalidate(&vcpu_e500->vcpu);
  }
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux