[PATCH 04/14] KVM: PPC: e500: MMU API

Alexander Graf <agraf@xxxxxxx> · Mon, 31 Oct 2011 08:53:06 +0100

From: Scott Wood <scottwood@xxxxxxxxxxxxx>

This implements a shared-memory API for giving host userspace access to
the guest's TLB.

Signed-off-by: Scott Wood <scottwood@xxxxxxxxxxxxx>
Signed-off-by: Alexander Graf <agraf@xxxxxxx>
---
 Documentation/virtual/kvm/api.txt   |   74 +++++++
 arch/powerpc/include/asm/kvm.h      |   35 +++
 arch/powerpc/include/asm/kvm_e500.h |   24 +--
 arch/powerpc/include/asm/kvm_ppc.h  |    5 +
 arch/powerpc/kvm/e500.c             |    5 +-
 arch/powerpc/kvm/e500_emulate.c     |   12 +-
 arch/powerpc/kvm/e500_tlb.c         |  393 ++++++++++++++++++++++++-----------
 arch/powerpc/kvm/e500_tlb.h         |   38 ++--
 arch/powerpc/kvm/powerpc.c          |   28 +++
 include/linux/kvm.h                 |   18 ++
 10 files changed, 469 insertions(+), 163 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7945b0b..ab1136f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1383,6 +1383,38 @@ The following flags are defined:
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
 
+4.59 KVM_DIRTY_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_dirty_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
+This must be called whenever userspace has changed an entry in the shared
+TLB, prior to calling KVM_RUN on the associated vcpu.
+
+The "bitmap" field is the userspace address of an array.  This array
+consists of a number of bits, equal to the total number of TLB entries as
+determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
+nearest multiple of 64.
+
+Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
+array.
+
+The array is little-endian: the bit 0 is the least significant bit of the
+first byte, bit 8 is the least significant bit of the second byte, etc.
+This avoids any complications with differing word sizes.
+
+The "num_dirty" field is a performance hint for KVM to determine whether it
+should skip processing the bitmap and just invalidate everything.  It must
+be set to the number of set bits in the bitmap.
+
 4.62 KVM_CREATE_SPAPR_TCE
 
 Capability: KVM_CAP_SPAPR_TCE
@@ -1700,3 +1732,45 @@ HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
 HTAB invisible to the guest.
 
 When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
+
+6.3 KVM_CAP_SW_TLB
+
+Architectures: ppc
+Parameters: args[0] is the address of a struct kvm_config_tlb
+Returns: 0 on success; -1 on error
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+Configures the virtual CPU's TLB array, establishing a shared memory area
+between userspace and KVM.  The "params" and "array" fields are userspace
+addresses of mmu-type-specific data structures.  The "array_len" field is an
+safety mechanism, and should be set to the size in bytes of the memory that
+userspace has reserved for the array.  It must be at least the size dictated
+by "mmu_type" and "params".
+
+While KVM_RUN is active, the shared region is under control of KVM.  Its
+contents are undefined, and any modification by userspace results in
+boundedly undefined behavior.
+
+On return from KVM_RUN, the shared region will reflect the current state of
+the guest's TLB.  If userspace makes any changes, it must call KVM_DIRTY_TLB
+to tell KVM which entries have been changed, prior to calling KVM_RUN again
+on this vcpu.
+
+For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ - The "params" field is of type "struct kvm_book3e_206_tlb_params".
+ - The "array" field points to an array of type "struct
+   kvm_book3e_206_tlb_entry".
+ - The array consists of all entries in the first TLB, followed by all
+   entries in the second TLB.
+ - Within a TLB, entries are ordered first by increasing set number.  Within a
+   set, entries are ordered by way (increasing ESEL).
+ - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1)
+   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
+ - The tsize field of mas1 shall be set to 4K on TLB0, even though the
+   hardware ignores this value for TLB0.
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 08fe69e..71684b9 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -300,4 +300,39 @@ struct kvm_allocate_rma {
 	__u64 rma_size;
 };
 
+struct kvm_book3e_206_tlb_entry {
+	__u32 mas8;
+	__u32 mas1;
+	__u64 mas2;
+	__u64 mas7_3;
+};
+
+struct kvm_book3e_206_tlb_params {
+	/*
+	 * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+	 *
+	 * - The number of ways of TLB0 must be a power of two between 2 and
+	 *   16.
+	 * - TLB1 must be fully associative.
+	 * - The size of TLB0 must be a multiple of the number of ways, and
+	 *   the number of sets must be a power of two.
+	 * - The size of TLB1 may not exceed 64 entries.
+	 * - TLB0 supports 4 KiB pages.
+	 * - The page sizes supported by TLB1 are as indicated by
+	 *   TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
+	 *   as returned by KVM_GET_SREGS.
+	 * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
+	 *   and tlb_ways[] must be zero.
+	 *
+	 * tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
+	 *
+	 * KVM will adjust TLBnCFG based on the sizes configured here,
+	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+	 * set to zero.
+	 */
+	__u32 tlb_sizes[4];
+	__u32 tlb_ways[4];
+	__u32 reserved[8];
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index a5197d8..bc17441 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -22,13 +22,6 @@
 #define E500_PID_NUM   3
 #define E500_TLB_NUM   2
 
-struct tlbe{
-	u32 mas1;
-	u32 mas2;
-	u32 mas3;
-	u32 mas7;
-};
-
 #define E500_TLB_VALID 1
 #define E500_TLB_DIRTY 2
 
@@ -48,13 +41,17 @@ struct kvmppc_e500_tlb_params {
 };
 
 struct kvmppc_vcpu_e500 {
-	/* Unmodified copy of the guest's TLB. */
-	struct tlbe *gtlb_arch[E500_TLB_NUM];
+	/* Unmodified copy of the guest's TLB -- shared with host userspace. */
+	struct kvm_book3e_206_tlb_entry *gtlb_arch;
+
+	/* Starting entry number in gtlb_arch[] */
+	int gtlb_offset[E500_TLB_NUM];
 
 	/* KVM internal information associated with each guest TLB entry */
 	struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
 
-	unsigned int gtlb_size[E500_TLB_NUM];
+	struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
+
 	unsigned int gtlb_nv[E500_TLB_NUM];
 
 	/*
@@ -68,7 +65,6 @@ struct kvmppc_vcpu_e500 {
 	 * and back, and our host TLB entries got evicted).
 	 */
 	struct tlbe_ref *tlb_refs[E500_TLB_NUM];
-
 	unsigned int host_tlb1_nv;
 
 	u32 host_pid[E500_PID_NUM];
@@ -78,11 +74,10 @@ struct kvmppc_vcpu_e500 {
 	u32 mas0;
 	u32 mas1;
 	u32 mas2;
-	u32 mas3;
+	u64 mas7_3;
 	u32 mas4;
 	u32 mas5;
 	u32 mas6;
-	u32 mas7;
 
 	/* vcpu id table */
 	struct vcpu_id_table *idt;
@@ -95,6 +90,9 @@ struct kvmppc_vcpu_e500 {
 	u32 tlb1cfg;
 	u64 mcar;
 
+	struct page **shared_tlb_pages;
+	int num_shared_tlb_pages;
+
 	struct kvm_vcpu vcpu;
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 46efd1a..a284f20 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -193,4 +193,9 @@ static inline void kvm_rma_init(void)
 {}
 #endif
 
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg);
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *cfg);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 26d2090..14d6e6e 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -120,7 +120,7 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	sregs->u.e.mas0 = vcpu_e500->mas0;
 	sregs->u.e.mas1 = vcpu_e500->mas1;
 	sregs->u.e.mas2 = vcpu_e500->mas2;
-	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
+	sregs->u.e.mas7_3 = vcpu_e500->mas7_3;
 	sregs->u.e.mas4 = vcpu_e500->mas4;
 	sregs->u.e.mas6 = vcpu_e500->mas6;
 
@@ -153,8 +153,7 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 		vcpu_e500->mas0 = sregs->u.e.mas0;
 		vcpu_e500->mas1 = sregs->u.e.mas1;
 		vcpu_e500->mas2 = sregs->u.e.mas2;
-		vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
-		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+		vcpu_e500->mas7_3 = sregs->u.e.mas7_3;
 		vcpu_e500->mas4 = sregs->u.e.mas4;
 		vcpu_e500->mas6 = sregs->u.e.mas6;
 	}
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index d48ae39..e0d3609 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -95,13 +95,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_MAS2:
 		vcpu_e500->mas2 = spr_val; break;
 	case SPRN_MAS3:
-		vcpu_e500->mas3 = spr_val; break;
+		vcpu_e500->mas7_3 &= ~(u64)0xffffffff;
+		vcpu_e500->mas7_3 |= spr_val;
+		break;
 	case SPRN_MAS4:
 		vcpu_e500->mas4 = spr_val; break;
 	case SPRN_MAS6:
 		vcpu_e500->mas6 = spr_val; break;
 	case SPRN_MAS7:
-		vcpu_e500->mas7 = spr_val; break;
+		vcpu_e500->mas7_3 &= (u64)0xffffffff;
+		vcpu_e500->mas7_3 |= (u64)spr_val << 32;
+		break;
 	case SPRN_L1CSR0:
 		vcpu_e500->l1csr0 = spr_val;
 		vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
@@ -158,13 +162,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_MAS2:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
 	case SPRN_MAS3:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
+		kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break;
 	case SPRN_MAS4:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
 	case SPRN_MAS6:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
 	case SPRN_MAS7:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3 >> 32); break;
 
 	case SPRN_TLB0CFG:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 59221bb..f19ae2f 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -19,6 +19,11 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_e500.h>
 
@@ -66,6 +71,13 @@ static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
 
 static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
 
+static struct kvm_book3e_206_tlb_entry *get_entry(
+	struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+	int offset = vcpu_e500->gtlb_offset[tlbsel];
+	return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
 /*
  * Allocate a free shadow id and setup a valid sid mapping in given entry.
  * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
@@ -217,34 +229,13 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
 	preempt_enable();
 }
 
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *tlbe;
-	int i, tlbsel;
-
-	printk("| %8s | %8s | %8s | %8s | %8s |\n",
-			"nr", "mas1", "mas2", "mas3", "mas7");
-
-	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
-		printk("Guest TLB%d:\n", tlbsel);
-		for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
-			tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
-			if (tlbe->mas1 & MAS1_VALID)
-				printk(" G[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
-					tlbsel, i, tlbe->mas1, tlbe->mas2,
-					tlbe->mas3, tlbe->mas7);
-		}
-	}
-}
-
 static inline unsigned int gtlb0_get_next_victim(
 		struct kvmppc_vcpu_e500 *vcpu_e500)
 {
 	unsigned int victim;
 
 	victim = vcpu_e500->gtlb_nv[0]++;
-	if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+	if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways))
 		vcpu_e500->gtlb_nv[0] = 0;
 
 	return victim;
@@ -256,9 +247,9 @@ static inline unsigned int tlb1_max_shadow_size(void)
 	return host_tlb_params[1].entries - tlbcam_index - 1;
 }
 
-static inline int tlbe_is_writable(struct tlbe *tlbe)
+static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
 {
-	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+	return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
 }
 
 static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
@@ -289,39 +280,41 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
 /*
  * writing shadow tlb entry to host TLB
  */
-static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
+static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
+				     uint32_t mas0)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
 	mtspr(SPRN_MAS0, mas0);
 	mtspr(SPRN_MAS1, stlbe->mas1);
-	mtspr(SPRN_MAS2, stlbe->mas2);
-	mtspr(SPRN_MAS3, stlbe->mas3);
-	mtspr(SPRN_MAS7, stlbe->mas7);
+	mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+	mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+	mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
 	asm volatile("isync; tlbwe" : : : "memory");
 	local_irq_restore(flags);
 }
 
 /* esel is index into set, not whole array */
 static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel, struct tlbe *stlbe)
+		int tlbsel, int esel, struct kvm_book3e_206_tlb_entry *stlbe)
 {
 	if (tlbsel == 0) {
-		__write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(esel));
+		int way = esel & (vcpu_e500->gtlb_params[0].ways - 1);
+		__write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(way));
 	} else {
 		__write_host_tlbe(stlbe,
 				  MAS0_TLBSEL(1) |
 				  MAS0_ESEL(to_htlb1_esel(esel)));
 	}
 	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
+			     (u32)stlbe->mas7_3, (u32)(stlbe->mas7_3 >> 32));
 }
 
 void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe magic;
+	struct kvm_book3e_206_tlb_entry magic;
 	ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
 	unsigned int stid;
 	pfn_t pfn;
@@ -335,9 +328,8 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 	magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
 		     MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 	magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
-	magic.mas3 = (pfn << PAGE_SHIFT) |
-		     MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
-	magic.mas7 = pfn >> (32 - PAGE_SHIFT);
+	magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+		       MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
 
 	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
 	preempt_enable();
@@ -358,7 +350,8 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
 static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
 				int tlbsel, int esel)
 {
-	struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_book3e_206_tlb_entry *gtlbe =
+		get_entry(vcpu_e500, tlbsel, esel);
 	struct vcpu_id_table *idt = vcpu_e500->idt;
 	unsigned int pr, tid, ts, pid;
 	u32 val, eaddr;
@@ -424,9 +417,8 @@ static int tlb0_set_base(gva_t addr, int sets, int ways)
 
 static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
 {
-	int sets = KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
-
-	return tlb0_set_base(addr, sets, KVM_E500_TLB0_WAY_NUM);
+	return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets,
+			     vcpu_e500->gtlb_params[0].ways);
 }
 
 static int htlb0_set_base(gva_t addr)
@@ -440,10 +432,10 @@ static unsigned int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel)
 	unsigned int esel = get_tlb_esel_bit(vcpu_e500);
 
 	if (tlbsel == 0) {
-		esel &= KVM_E500_TLB0_WAY_NUM_MASK;
+		esel &= vcpu_e500->gtlb_params[0].ways - 1;
 		esel += gtlb0_set_base(vcpu_e500, vcpu_e500->mas2);
 	} else {
-		esel &= vcpu_e500->gtlb_size[tlbsel] - 1;
+		esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1;
 	}
 
 	return esel;
@@ -453,19 +445,22 @@ static unsigned int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel)
 static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 		gva_t eaddr, int tlbsel, unsigned int pid, int as)
 {
-	int size = vcpu_e500->gtlb_size[tlbsel];
-	unsigned int set_base;
+	int size = vcpu_e500->gtlb_params[tlbsel].entries;
+	unsigned int set_base, offset;
 	int i;
 
 	if (tlbsel == 0) {
 		set_base = gtlb0_set_base(vcpu_e500, eaddr);
-		size = KVM_E500_TLB0_WAY_NUM;
+		size = vcpu_e500->gtlb_params[0].ways;
 	} else {
 		set_base = 0;
 	}
 
+	offset = vcpu_e500->gtlb_offset[tlbsel];
+
 	for (i = 0; i < size; i++) {
-		struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i];
+		struct kvm_book3e_206_tlb_entry *tlbe =
+			&vcpu_e500->gtlb_arch[offset + set_base + i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -491,7 +486,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 }
 
 static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
-					 struct tlbe *gtlbe,
+					 struct kvm_book3e_206_tlb_entry *gtlbe,
 					 pfn_t pfn)
 {
 	ref->pfn = pfn;
@@ -518,7 +513,7 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
 	int tlbsel = 0;
 	int i;
 
-	for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
+	for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
 		struct tlbe_ref *ref =
 			&vcpu_e500->gtlb_priv[tlbsel][i].ref;
 		kvmppc_e500_ref_release(ref);
@@ -530,6 +525,8 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
 	int stlbsel = 1;
 	int i;
 
+	kvmppc_e500_id_table_reset_all(vcpu_e500);
+
 	for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
 		struct tlbe_ref *ref =
 			&vcpu_e500->tlb_refs[stlbsel][i];
@@ -559,18 +556,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 		| MAS1_TSIZE(tsized);
 	vcpu_e500->mas2 = (eaddr & MAS2_EPN)
 		| (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
-	vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+	vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
 	vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
 		| (get_cur_pid(vcpu) << 16)
 		| (as ? MAS6_SAS : 0);
-	vcpu_e500->mas7 = 0;
 }
 
 /* TID must be supplied by the caller */
-static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-					   struct tlbe *gtlbe, int tsize,
-					   struct tlbe_ref *ref,
-					   u64 gvaddr, struct tlbe *stlbe)
+static inline void kvmppc_e500_setup_stlbe(
+	struct kvmppc_vcpu_e500 *vcpu_e500,
+	struct kvm_book3e_206_tlb_entry *gtlbe,
+	int tsize, struct tlbe_ref *ref, u64 gvaddr,
+	struct kvm_book3e_206_tlb_entry *stlbe)
 {
 	pfn_t pfn = ref->pfn;
 
@@ -581,16 +578,16 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
 	stlbe->mas2 = (gvaddr & MAS2_EPN)
 		| e500_shadow_mas2_attrib(gtlbe->mas2,
 				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
-	stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
+	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
+		| e500_shadow_mas3_attrib(gtlbe->mas7_3,
 				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
-	stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
 }
 
 /* sesel is an index into the entire array, not just the set */
 static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int sesel,
-	struct tlbe *stlbe, struct tlbe_ref *ref)
+	u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+	int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe,
+	struct tlbe_ref *ref)
 {
 	struct kvm_memory_slot *slot;
 	unsigned long pfn, hva;
@@ -700,15 +697,16 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 /* XXX only map the one-one case, for now use TLB0 */
 static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-				int esel, struct tlbe *stlbe)
+				int esel,
+				struct kvm_book3e_206_tlb_entry *stlbe)
 {
-	struct tlbe *gtlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
 	struct tlbe_ref *ref;
 	int sesel = esel & (host_tlb_params[0].ways - 1);
 	int sesel_base;
 	gva_t ea;
 
-	gtlbe = &vcpu_e500->gtlb_arch[0][esel];
+	gtlbe = get_entry(vcpu_e500, 0, esel);
 	ref = &vcpu_e500->gtlb_priv[0][esel].ref;
 
 	ea = get_tlb_eaddr(gtlbe);
@@ -725,7 +723,8 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
  * the shadow TLB. */
 /* XXX for both one-one and one-to-many , for now use TLB1 */
 static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
+		u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+		struct kvm_book3e_206_tlb_entry *stlbe)
 {
 	struct tlbe_ref *ref;
 	unsigned int victim;
@@ -754,7 +753,8 @@ static inline int kvmppc_e500_gtlbe_invalidate(
 				struct kvmppc_vcpu_e500 *vcpu_e500,
 				int tlbsel, int esel)
 {
-	struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_book3e_206_tlb_entry *gtlbe =
+		get_entry(vcpu_e500, tlbsel, esel);
 
 	if (unlikely(get_tlb_iprot(gtlbe)))
 		return -1;
@@ -769,10 +769,10 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
 	int esel;
 
 	if (value & MMUCSR0_TLB0FI)
-		for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++)
+		for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++)
 			kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
 	if (value & MMUCSR0_TLB1FI)
-		for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++)
+		for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
 			kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
 
 	/* Invalidate all vcpu id mappings */
@@ -797,7 +797,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
 
 	if (ia) {
 		/* invalidate all entries */
-		for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++)
+		for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries;
+		     esel++)
 			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
 	} else {
 		ea &= 0xfffff000;
@@ -817,18 +818,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int tlbsel, esel;
-	struct tlbe *gtlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 	vcpu_e500->mas0 &= ~MAS0_NV(~0);
 	vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
 	vcpu_e500->mas1 = gtlbe->mas1;
 	vcpu_e500->mas2 = gtlbe->mas2;
-	vcpu_e500->mas3 = gtlbe->mas3;
-	vcpu_e500->mas7 = gtlbe->mas7;
+	vcpu_e500->mas7_3 = gtlbe->mas7_3;
 
 	return EMULATE_DONE;
 }
@@ -839,7 +839,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	int as = !!get_cur_sas(vcpu_e500);
 	unsigned int pid = get_cur_spid(vcpu_e500);
 	int esel, tlbsel;
-	struct tlbe *gtlbe = NULL;
+	struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
 	gva_t ea;
 
 	ea = kvmppc_get_gpr(vcpu, rb);
@@ -847,7 +847,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
 		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
 		if (esel >= 0) {
-			gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+			gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 			break;
 		}
 	}
@@ -857,8 +857,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
 		vcpu_e500->mas1 = gtlbe->mas1;
 		vcpu_e500->mas2 = gtlbe->mas2;
-		vcpu_e500->mas3 = gtlbe->mas3;
-		vcpu_e500->mas7 = gtlbe->mas7;
+		vcpu_e500->mas7_3 = gtlbe->mas7_3;
 	} else {
 		int victim;
 
@@ -873,8 +872,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 			| (vcpu_e500->mas4 & MAS4_TSIZED(~0));
 		vcpu_e500->mas2 &= MAS2_EPN;
 		vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
-		vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
-		vcpu_e500->mas7 = 0;
+		vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
 	}
 
 	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
@@ -883,8 +881,8 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 
 /* sesel is index into the set, not the whole array */
 static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-			struct tlbe *gtlbe,
-			struct tlbe *stlbe,
+			struct kvm_book3e_206_tlb_entry *gtlbe,
+			struct kvm_book3e_206_tlb_entry *stlbe,
 			int stlbsel, int sesel)
 {
 	int stid;
@@ -902,28 +900,27 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
 	int tlbsel, esel;
 
 	tlbsel = get_tlb_tlbsel(vcpu_e500);
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 
 	if (get_tlb_v(gtlbe))
 		inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
 
 	gtlbe->mas1 = vcpu_e500->mas1;
 	gtlbe->mas2 = vcpu_e500->mas2;
-	gtlbe->mas3 = vcpu_e500->mas3;
-	gtlbe->mas7 = vcpu_e500->mas7;
+	gtlbe->mas7_3 = vcpu_e500->mas7_3;
 
 	trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
-			     gtlbe->mas3, gtlbe->mas7);
+			     (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3 >> 32));
 
 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
-		struct tlbe stlbe;
+		struct kvm_book3e_206_tlb_entry stlbe;
 		int stlbsel, sesel;
 		u64 eaddr;
 		u64 raddr;
@@ -996,9 +993,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
 			gva_t eaddr)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe =
-		&vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
-	u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
+	u64 pgmask;
+
+	gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+	pgmask = get_tlb_bytes(gtlbe) - 1;
 
 	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
 }
@@ -1012,12 +1011,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	struct tlbe_priv *priv;
-	struct tlbe *gtlbe, stlbe;
+	struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
 	int tlbsel = tlbsel_of(index);
 	int esel = esel_of(index);
 	int stlbsel, sesel;
 
-	gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
 
 	switch (tlbsel) {
 	case 0:
@@ -1073,25 +1072,174 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
 
 void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	struct tlbe *tlbe;
+	struct kvm_book3e_206_tlb_entry *tlbe;
 
 	/* Insert large initial mapping for guest. */
-	tlbe = &vcpu_e500->gtlb_arch[1][0];
+	tlbe = get_entry(vcpu_e500, 1, 0);
 	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
 	tlbe->mas2 = 0;
-	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
 
 	/* 4K map for serial output. Used by kernel wrapper. */
-	tlbe = &vcpu_e500->gtlb_arch[1][1];
+	tlbe = get_entry(vcpu_e500, 1, 1);
 	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 	tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
-	tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int i;
+
+	clear_tlb_refs(vcpu_e500);
+	kfree(vcpu_e500->gtlb_priv[0]);
+	kfree(vcpu_e500->gtlb_priv[1]);
+
+	if (vcpu_e500->shared_tlb_pages) {
+		vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+					  PAGE_SIZE)));
+
+		for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) {
+			set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
+			put_page(vcpu_e500->shared_tlb_pages[i]);
+		}
+
+		vcpu_e500->num_shared_tlb_pages = 0;
+		vcpu_e500->shared_tlb_pages = NULL;
+	} else {
+		kfree(vcpu_e500->gtlb_arch);
+	}
+
+	vcpu_e500->gtlb_arch = NULL;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_book3e_206_tlb_params params;
+	char *virt;
+	struct page **pages;
+	struct tlbe_priv *privs[2] = {};
+	size_t array_len;
+	u32 sets;
+	int num_pages, ret, i;
+
+	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+		return -EINVAL;
+
+	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+			   sizeof(params)))
+		return -EFAULT;
+
+	if (params.tlb_sizes[1] > 64)
+		return -EINVAL;
+	if (params.tlb_ways[1] != params.tlb_sizes[1])
+		return -EINVAL;
+	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+		return -EINVAL;
+	if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0)
+		return -EINVAL;
+
+	if (!is_power_of_2(params.tlb_ways[0]))
+		return -EINVAL;
+
+	sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
+	if (!is_power_of_2(sets))
+		return -EINVAL;
+
+	array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
+	array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
+
+	if (cfg->array_len < array_len)
+		return -EINVAL;
+
+	num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
+		    cfg->array / PAGE_SIZE;
+	pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+	if (ret < 0)
+		goto err_pages;
+
+	if (ret != num_pages) {
+		num_pages = ret;
+		ret = -EFAULT;
+		goto err_put_page;
+	}
+
+	virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
+	if (!virt)
+		goto err_put_page;
+
+	privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
+			   GFP_KERNEL);
+	privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
+			   GFP_KERNEL);
+
+	if (!privs[0] || !privs[1])
+		goto err_put_page;
+
+	free_gtlb(vcpu_e500);
+
+	vcpu_e500->gtlb_priv[0] = privs[0];
+	vcpu_e500->gtlb_priv[1] = privs[1];
+
+	vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
+		(virt + (cfg->array & (PAGE_SIZE - 1)));
+
+	vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0];
+	vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1];
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
+
+	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
+	if (params.tlb_sizes[0] <= 2048)
+		vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
+
+	vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
+	vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
+
+	vcpu_e500->shared_tlb_pages = pages;
+	vcpu_e500->num_shared_tlb_pages = num_pages;
+
+	vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0];
+	vcpu_e500->gtlb_params[0].sets = sets;
+
+	vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
+	vcpu_e500->gtlb_params[1].sets = 1;
+
+	return 0;
+
+err_put_page:
+	kfree(privs[0]);
+	kfree(privs[1]);
+
+	for (i = 0; i < num_pages; i++)
+		put_page(pages[i]);
+
+err_pages:
+	kfree(pages);
+	return ret;
+}
+
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *dirty)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	clear_tlb_refs(vcpu_e500);
+	return 0;
 }
 
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
+	int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
+	int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
+
 	host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
 	host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
 
@@ -1124,17 +1272,22 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 		host_tlb_params[0].entries / host_tlb_params[0].ways;
 	host_tlb_params[1].sets = 1;
 
-	vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE;
-	vcpu_e500->gtlb_arch[0] =
-		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
-	if (vcpu_e500->gtlb_arch[0] == NULL)
-		goto err;
+	vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
+	vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
 
-	vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE;
-	vcpu_e500->gtlb_arch[1] =
-		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
-	if (vcpu_e500->gtlb_arch[1] == NULL)
-		goto err;
+	vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM;
+	vcpu_e500->gtlb_params[0].sets =
+		KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
+
+	vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
+	vcpu_e500->gtlb_params[1].sets = 1;
+
+	vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+	if (!vcpu_e500->gtlb_arch)
+		return -ENOMEM;
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
 
 	vcpu_e500->tlb_refs[0] =
 		kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries,
@@ -1148,15 +1301,15 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 	if (!vcpu_e500->tlb_refs[1])
 		goto err;
 
-	vcpu_e500->gtlb_priv[0] =
-		kzalloc(sizeof(struct tlbe_ref) * vcpu_e500->gtlb_size[0],
-			GFP_KERNEL);
+	vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
+					  vcpu_e500->gtlb_params[0].entries,
+					  GFP_KERNEL);
 	if (!vcpu_e500->gtlb_priv[0])
 		goto err;
 
-	vcpu_e500->gtlb_priv[1] =
-		kzalloc(sizeof(struct tlbe_ref) * vcpu_e500->gtlb_size[1],
-			GFP_KERNEL);
+	vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
+					  vcpu_e500->gtlb_params[1].entries,
+					  GFP_KERNEL);
 	if (!vcpu_e500->gtlb_priv[1])
 		goto err;
 
@@ -1165,32 +1318,24 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
 	/* Init TLB configuration register */
 	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
-	vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0];
+	vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries;
 	vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
-	vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1];
+	vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_params[1].entries;
 
 	return 0;
 
 err:
+	free_gtlb(vcpu_e500);
 	kfree(vcpu_e500->tlb_refs[0]);
 	kfree(vcpu_e500->tlb_refs[1]);
-	kfree(vcpu_e500->gtlb_priv[0]);
-	kfree(vcpu_e500->gtlb_priv[1]);
-	kfree(vcpu_e500->gtlb_arch[0]);
-	kfree(vcpu_e500->gtlb_arch[1]);
 	return -1;
 }
 
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
-	clear_tlb_refs(vcpu_e500);
-
+	free_gtlb(vcpu_e500);
 	kvmppc_e500_id_table_free(vcpu_e500);
 
 	kfree(vcpu_e500->tlb_refs[0]);
 	kfree(vcpu_e500->tlb_refs[1]);
-	kfree(vcpu_e500->gtlb_priv[0]);
-	kfree(vcpu_e500->gtlb_priv[1]);
-	kfree(vcpu_e500->gtlb_arch[1]);
-	kfree(vcpu_e500->gtlb_arch[0]);
 }
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index b587f69..2c29640 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -20,13 +20,9 @@
 #include <asm/tlb.h>
 #include <asm/kvm_e500.h>
 
-#define KVM_E500_TLB0_WAY_SIZE_BIT	7	/* Fixed */
-#define KVM_E500_TLB0_WAY_SIZE		(1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
-#define KVM_E500_TLB0_WAY_SIZE_MASK	(KVM_E500_TLB0_WAY_SIZE - 1)
-
-#define KVM_E500_TLB0_WAY_NUM_BIT	1	/* No greater than 7 */
-#define KVM_E500_TLB0_WAY_NUM		(1UL << KVM_E500_TLB0_WAY_NUM_BIT)
-#define KVM_E500_TLB0_WAY_NUM_MASK	(KVM_E500_TLB0_WAY_NUM - 1)
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE		128
+#define KVM_E500_TLB0_WAY_NUM		2
 
 #define KVM_E500_TLB0_SIZE  (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
 #define KVM_E500_TLB1_SIZE  16
@@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
 extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
 
 /* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 7) & 0x1f;
 }
 
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return tlbe->mas2 & 0xfffff000;
 }
 
-static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	unsigned int pgsize = get_tlb_size(tlbe);
 	return 1ULL << 10 << pgsize;
 }
 
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	u64 bytes = get_tlb_bytes(tlbe);
 	return get_tlb_eaddr(tlbe) + bytes - 1;
 }
 
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
-	u64 rpn = tlbe->mas7;
-	return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+	return tlbe->mas7_3 & ~0xfffULL;
 }
 
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 16) & 0xff;
 }
 
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 12) & 0x1;
 }
 
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 31) & 0x1;
 }
 
-static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	return (tlbe->mas1 >> 30) & 0x1;
 }
@@ -156,7 +156,7 @@ static inline unsigned int get_tlb_esel_bit(
 }
 
 static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-			const struct tlbe *tlbe)
+			const struct kvm_book3e_206_tlb_entry *tlbe)
 {
 	gpa_t gpa;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0d843c6..55b4233 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -221,6 +221,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
+#ifdef CONFIG_KVM_E500
+	case KVM_CAP_SW_TLB:
+#endif
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -601,6 +604,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = 0;
 		vcpu->arch.papr_enabled = true;
 		break;
+#ifdef CONFIG_KVM_E500
+	case KVM_CAP_SW_TLB: {
+		struct kvm_config_tlb cfg;
+		void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
+
+		r = -EFAULT;
+		if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
+			break;
+
+		r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
+		break;
+	}
+#endif
 	default:
 		r = -EINVAL;
 		break;
@@ -650,6 +666,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+
+#ifdef CONFIG_KVM_E500
+	case KVM_DIRTY_TLB: {
+		struct kvm_dirty_tlb dirty;
+		r = -EFAULT;
+		if (copy_from_user(&dirty, argp, sizeof(dirty)))
+			goto out;
+		r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+		break;
+	}
+#endif
+
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f47fcd3..76ef719 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -557,6 +557,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
 #define KVM_CAP_PPC_HIOR 67
 #define KVM_CAP_PPC_PAPR 68
+#define KVM_CAP_SW_TLB 69
 #define KVM_CAP_S390_GMAP 71
 
 #ifdef KVM_CAP_IRQ_ROUTING
@@ -637,6 +638,21 @@ struct kvm_clock_data {
 	__u32 pad[9];
 };
 
+#define KVM_MMU_FSL_BOOKE_NOHV		0
+#define KVM_MMU_FSL_BOOKE_HV		1
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
 /*
  * ioctls for VM fds
  */
@@ -763,6 +779,8 @@ struct kvm_clock_data {
 #define KVM_CREATE_SPAPR_TCE	  _IOW(KVMIO,  0xa8, struct kvm_create_spapr_tce)
 /* Available with KVM_CAP_RMA */
 #define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
+/* Available with KVM_CAP_SW_TLB */
+#define KVM_DIRTY_TLB		  _IOW(KVMIO,  0xaa, struct kvm_dirty_tlb)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html