Re: [PATCH 13/13] KVM: PPC: e500: MMU API

Alexander Graf <agraf@xxxxxxx> · Thu, 19 May 2011 16:10:48 +0200

On 05/18/2011 01:42 AM, Scott Wood wrote:
This implements a shared-memory API for giving Qemu access to the guest's
TLB.

Signed-off-by: Scott Wood<scottwood@xxxxxxxxxxxxx>
---
  Documentation/kvm/api.txt           |   79 +++++++-
  arch/powerpc/include/asm/kvm.h      |   35 +++
  arch/powerpc/include/asm/kvm_e500.h |   23 +-
  arch/powerpc/include/asm/kvm_ppc.h  |    7 +
  arch/powerpc/kvm/e500.c             |    5 +-
  arch/powerpc/kvm/e500_emulate.c     |   12 +-
  arch/powerpc/kvm/e500_tlb.c         |  418 ++++++++++++++++++++++++++---------
  arch/powerpc/kvm/e500_tlb.h         |   55 ++---
  arch/powerpc/kvm/powerpc.c          |   28 +++
  include/linux/kvm.h                 |   19 ++
  10 files changed, 515 insertions(+), 166 deletions(-)

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 42542eb..8f2de15 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1268,7 +1268,7 @@ struct kvm_assigned_msix_entry {
  	__u16 padding[3];
  };

-4.54 KVM_SET_TSC_KHZ
+4.55 KVM_SET_TSC_KHZ

  Capability: KVM_CAP_TSC_CONTROL
  Architectures: x86
@@ -1279,7 +1279,7 @@ Returns: 0 on success, -1 on error
  Specifies the tsc frequency for the virtual machine. The unit of the
  frequency is KHz.

-4.55 KVM_GET_TSC_KHZ
+4.56 KVM_GET_TSC_KHZ

  Capability: KVM_CAP_GET_TSC_KHZ
  Architectures: x86
@@ -1291,6 +1291,81 @@ Returns the tsc frequency of the guest. The unit of the return value is
  KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
  error.

+4.57 KVM_CONFIG_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_config_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+Configures the virtual CPU's TLB array, establishing a shared memory area
+between userspace and KVM.  The "params" and "array" fields are userspace
+addresses of mmu-type-specific data structures.  The "array_len" field is an
+safety mechanism, and should be set to the size in bytes of the memory that
+userspace has reserved for the array.  It must be at least the size dictated
+by "mmu_type" and "params".
+
+While KVM_RUN is active, the shared region is under control of KVM.  Its
+contents are undefined, and any modification by userspace results in
+boundedly undefined behavior.
+
+On return from KVM_RUN, the shared region will reflect the current state of
+the guest's TLB.  If userspace makes any changes, it must call KVM_DIRTY_TLB
+to tell KVM which entries have been changed, prior to calling KVM_RUN again
+on this vcpu.
+
+For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:

Can we rename those KVM_MMU_BOOKE_206_NOHV and KVM_MMU_BOOKE_206_HV? 
Same for the function names. It's just easier to read and more accurate. 
Also I don't like how it says KVM_MMU_FSL :). It's really a booke interface.

+ - The "params" field is of type "struct kvm_fsl_booke_tlb_params".

I suppose params is a pointer?

+ - The "array" field points to an array of type "struct
+   kvm_fsl_booke_tlb_entry".
+ - The array consists of all entries in the first TLB, followed by all
+   entries in the second TLB.
+ - Within TLB0 entries are ordered first by increasing set number.  Within a
+   set, entries are ordered by way (increasing ESEL).
+   The hash for determining set number is: (MAS2>>  12)&  (num_sets - 1)
+   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
+ - Within TLB1, entries are ordered by increasing ESEL.

Don't special-case it. TLB1 basically contains of only a single set. The 
MMU interface here shouldn't be too e500 specific :).

+
+4.58 KVM_DIRTY_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_dirty_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
+This must be called whenever userspace has changed an entry in the shared
+TLB, prior to calling KVM_RUN on the associated vcpu.
+
+The "bitmap" field is the userspace address of an array.  This array
+consists of a number of bits, equal to the total number of TLB entries as
+determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
+nearest multiple of 64.
+
+Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
+array.
+
+The array is little-endian: the bit 0 is the least significant bit of the
+first byte, bit 8 is the least significant bit of the second byte, etc.
+This avoids any complications with differing word sizes.
+
+The "num_dirty" field is a performance hint for KVM to determine whether it
+should skip processing the bitmap and just invalidate everything.  It must
+be set to the number of set bits in the bitmap.
+
  5. The kvm_run structure

  Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index d2ca5ed..2419be2 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -272,4 +272,39 @@ struct kvm_guest_debug_arch {
  #define KVM_INTERRUPT_UNSET	-2U
  #define KVM_INTERRUPT_SET_LEVEL	-3U

+struct kvm_fsl_booke_tlb_entry {
+	__u32 mas8;
+	__u32 mas1;
+	__u64 mas2;
+	__u64 mas7_3;

Is there a good reason to not include the other MAS registers as defined 
by the architecture? That'd make the interface easily extensible for the 
future and should only eat a few additional bytes, no?

+};
+
+struct kvm_fsl_booke_tlb_params {
+	/*
+	 * book3e defines 4 TLBs, but current FSL Book E chips implement
+	 * only the first two.  The second two entries in tlb_sizes[]
+	 * and tlb_ways[] are reserved and must be zero.

I don't think that part is necessary. It's really an implementation detail.

+	 *
+	 * A tlb_ways value of zero means the array is fully associative.
+	 * Only TLB0 may be configured with a different associativity.  The
+	 * number of ways of TLB0 must be a power of two between 2 and 16.
+	 *
+	 * The size of TLB0 must be a multiple of the number of ways, and
+	 * the number of sets must be a power of two.
+	 *
+	 * The size of TLB1 may not exceed 64 entries.

Why not?

+	 *
+	 * KVM will adjust TLBnCFG based on the sizes configured here,
+	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+	 * set to zero.
+	 *
+	 * TLB0 supports 4 KiB pages.
+	 * The page sizes supported by TLB1 are as indicated by
+	 * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1).

This is not part of the interface, is it?

+	 */
+	__u32 tlb_sizes[4];
+	__u8 tlb_ways[4];
+	__u32 reserved[11];
+};
+
  #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index adbfca9..fab626d 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -22,13 +22,6 @@
  #define E500_PID_NUM   3
  #define E500_TLB_NUM   2

-struct tlbe{
-	u32 mas1;
-	u32 mas2;
-	u32 mas3;
-	u32 mas7;
-};
-
  #define E500_TLB_VALID 1
  #define E500_TLB_DIRTY 2

@@ -40,8 +33,11 @@ struct tlbe_priv {
  struct vcpu_id_table;

  struct kvmppc_vcpu_e500 {
-	/* Unmodified copy of the guest's TLB. */
-	struct tlbe *gtlb_arch[E500_TLB_NUM];
+	/* Unmodified copy of the guest's TLB -- shared with Qemu. */

s/Qemu/user space/

+	struct kvm_fsl_booke_tlb_entry *gtlb_arch;
+
+	/* Starting entry number in gtlb_arch[] */
+	int gtlb_offset[E500_TLB_NUM];

  	/* KVM internal information associated with each guest TLB entry */
  	struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
@@ -49,6 +45,9 @@ struct kvmppc_vcpu_e500 {
  	unsigned int gtlb_size[E500_TLB_NUM];
  	unsigned int gtlb_nv[E500_TLB_NUM];

+	unsigned int gtlb0_ways;
+	unsigned int gtlb0_sets;
+
  	u32 host_pid[E500_PID_NUM];
  	u32 pid[E500_PID_NUM];
  	u32 svr;
@@ -56,11 +55,10 @@ struct kvmppc_vcpu_e500 {
  	u32 mas0;
  	u32 mas1;
  	u32 mas2;
-	u32 mas3;
+	u64 mas7_3;
  	u32 mas4;
  	u32 mas5;
  	u32 mas6;
-	u32 mas7;

  	/* vcpu id table */
  	struct vcpu_id_table *idt;
@@ -73,6 +71,9 @@ struct kvmppc_vcpu_e500 {
  	u32 tlb1cfg;
  	u64 mcar;

+	struct page **shared_tlb_pages;
+	int num_shared_tlb_pages;
+
  	struct kvm_vcpu vcpu;
  };

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c662f14..bb3d418 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -152,4 +152,11 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);

  void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);

+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg);
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *cfg);
+
+void kvmppc_core_heavy_exit(struct kvm_vcpu *vcpu);
+
  #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 43923c3..628f723 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -110,7 +110,7 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
  	sregs->u.e.mas0 = vcpu_e500->mas0;
  	sregs->u.e.mas1 = vcpu_e500->mas1;
  	sregs->u.e.mas2 = vcpu_e500->mas2;
-	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7<<  32) | vcpu_e500->mas3;
+	sregs->u.e.mas7_3 = vcpu_e500->mas7_3;
  	sregs->u.e.mas4 = vcpu_e500->mas4;
  	sregs->u.e.mas6 = vcpu_e500->mas6;

@@ -143,8 +143,7 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
  		vcpu_e500->mas0 = sregs->u.e.mas0;
  		vcpu_e500->mas1 = sregs->u.e.mas1;
  		vcpu_e500->mas2 = sregs->u.e.mas2;
-		vcpu_e500->mas7 = sregs->u.e.mas7_3>>  32;
-		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+		vcpu_e500->mas7_3 = sregs->u.e.mas7_3;
  		vcpu_e500->mas4 = sregs->u.e.mas4;
  		vcpu_e500->mas6 = sregs->u.e.mas6;
  	}
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 69cd665..4fb0ebb 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -91,13 +91,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
  	case SPRN_MAS2:
  		vcpu_e500->mas2 = spr_val; break;
  	case SPRN_MAS3:
-		vcpu_e500->mas3 = spr_val; break;
+		vcpu_e500->mas7_3&= ~(u64)0xffffffff;
+		vcpu_e500->mas7_3 |= spr_val;
+		break;
  	case SPRN_MAS4:
  		vcpu_e500->mas4 = spr_val; break;
  	case SPRN_MAS6:
  		vcpu_e500->mas6 = spr_val; break;
  	case SPRN_MAS7:
-		vcpu_e500->mas7 = spr_val; break;
+		vcpu_e500->mas7_3&= (u64)0xffffffff;
+		vcpu_e500->mas7_3 |= (u64)spr_val<<  32;
+		break;
  	case SPRN_L1CSR0:
  		vcpu_e500->l1csr0 = spr_val;
  		vcpu_e500->l1csr0&= ~(L1CSR0_DCFI | L1CSR0_CLFC);
@@ -154,13 +158,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
  	case SPRN_MAS2:
  		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
  	case SPRN_MAS3:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
+		kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break;
  	case SPRN_MAS4:
  		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
  	case SPRN_MAS6:
  		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
  	case SPRN_MAS7:
-		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3>>  32); break;

  	case SPRN_TLB0CFG:
  		kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d099d93..008f770 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -18,6 +18,11 @@
  #include<linux/kvm.h>
  #include<linux/kvm_host.h>
  #include<linux/highmem.h>
+#include<linux/log2.h>
+#include<linux/uaccess.h>
+#include<linux/sched.h>
+#include<linux/rwsem.h>
+#include<linux/vmalloc.h>
  #include<asm/kvm_ppc.h>
  #include<asm/kvm_e500.h>

@@ -63,6 +68,13 @@ static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);

  static unsigned int tlb1_entry_num;

+static struct kvm_fsl_booke_tlb_entry *
+get_entry(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+	int offset = vcpu_e500->gtlb_offset[tlbsel];
+	return&vcpu_e500->gtlb_arch[offset + entry];
+}
+
  /*
   * Allocate a free shadow id and setup a valid sid mapping in given entry.
   * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
@@ -192,20 +204,23 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
  void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
  {
  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *tlbe;
+	struct kvm_fsl_booke_tlb_entry *tlbe;
  	int i, tlbsel;

-	printk("| %8s | %8s | %8s | %8s | %8s |\n",
-			"nr", "mas1", "mas2", "mas3", "mas7");
+	printk("| %8s | %8s | %16s | %16s |\n",
+	       "nr", "mas1", "mas2", "mas7_3");

  	for (tlbsel = 0; tlbsel<  2; tlbsel++) {
+		int offset = vcpu_e500->gtlb_offset[tlbsel];
+
  		printk("Guest TLB%d:\n", tlbsel);
  		for (i = 0; i<  vcpu_e500->gtlb_size[tlbsel]; i++) {
-			tlbe =&vcpu_e500->gtlb_arch[tlbsel][i];
+			tlbe =&vcpu_e500->gtlb_arch[offset + i];
  			if (tlbe->mas1&  MAS1_VALID)
-				printk(" G[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
-					tlbsel, i, tlbe->mas1, tlbe->mas2,
-					tlbe->mas3, tlbe->mas7);
+				printk(" G[%d][%3d] |  %08X | %016llX | %016llX |\n",
+				       tlbsel, i, tlbe->mas1,
+				       (unsigned long long)tlbe->mas2,
+				       (unsigned long long)tlbe->mas7_3);
  		}
  	}
  }
@@ -216,7 +231,7 @@ static inline unsigned int tlb0_get_next_victim(
  	unsigned int victim;

  	victim = vcpu_e500->gtlb_nv[0]++;
-	if (unlikely(vcpu_e500->gtlb_nv[0]>= KVM_E500_TLB0_WAY_NUM))
+	if (unlikely(vcpu_e500->gtlb_nv[0]>= vcpu_e500->gtlb0_ways))
  		vcpu_e500->gtlb_nv[0] = 0;

  	return victim;
@@ -228,9 +243,9 @@ static inline unsigned int tlb1_max_shadow_size(void)
  	return tlb1_entry_num - tlbcam_index - 1;
  }

-static inline int tlbe_is_writable(struct tlbe *tlbe)
+static inline int tlbe_is_writable(struct kvm_fsl_booke_tlb_entry *tlbe)
  {
-	return tlbe->mas3&  (MAS3_SW|MAS3_UW);
+	return tlbe->mas7_3&  (MAS3_SW|MAS3_UW);
  }

  static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
@@ -261,40 +276,40 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
  /*
   * writing shadow tlb entry to host TLB
   */
-static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
+static inline void __write_host_tlbe(struct kvm_fsl_booke_tlb_entry *stlbe,
+				     uint32_t mas0)
  {
  	unsigned long flags;

  	local_irq_save(flags);
  	mtspr(SPRN_MAS0, mas0);
  	mtspr(SPRN_MAS1, stlbe->mas1);
-	mtspr(SPRN_MAS2, stlbe->mas2);
-	mtspr(SPRN_MAS3, stlbe->mas3);
-	mtspr(SPRN_MAS7, stlbe->mas7);
+	mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+	mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+	mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3>>  32));
  	asm volatile("isync; tlbwe" : : : "memory");
  	local_irq_restore(flags);
  }

  static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-		int tlbsel, int esel, struct tlbe *stlbe)
+		int tlbsel, int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
  {
  	if (tlbsel == 0) {
-		__write_host_tlbe(stlbe,
-				  MAS0_TLBSEL(0) |
-				  MAS0_ESEL(esel&  (KVM_E500_TLB0_WAY_NUM - 1)));
+		int way = esel&  (vcpu_e500->gtlb0_ways - 1);
+		__write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(way));
  	} else {
  		__write_host_tlbe(stlbe,
  				  MAS0_TLBSEL(1) |
  				  MAS0_ESEL(to_htlb1_esel(esel)));
  	}
  	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
+			     (u32)stlbe->mas7_3, (u32)(stlbe->mas7_3>>  32));
  }

  void kvmppc_map_magic(struct kvm_vcpu *vcpu)
  {
  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe magic;
+	struct kvm_fsl_booke_tlb_entry magic;
  	ulong shared_page = ((ulong)vcpu->arch.shared)&  PAGE_MASK;
  	unsigned int stid;
  	pfn_t pfn;
@@ -308,9 +323,8 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
  	magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
  		     MAS1_TSIZE(BOOK3E_PAGESZ_4K);
  	magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
-	magic.mas3 = (pfn<<  PAGE_SHIFT) |
-		     MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
-	magic.mas7 = pfn>>  (32 - PAGE_SHIFT);
+	magic.mas7_3 = ((u64)pfn<<  PAGE_SHIFT) |
+		       MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;

  	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
  	preempt_enable();
@@ -331,7 +345,8 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
  static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
  					 int tlbsel, int esel)
  {
-	struct tlbe *gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_fsl_booke_tlb_entry *gtlbe =
+		get_entry(vcpu_e500, tlbsel, esel);
  	struct vcpu_id_table *idt = vcpu_e500->idt;
  	unsigned int pr, tid, ts, pid;
  	u32 val, eaddr;
@@ -377,25 +392,50 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
  	}
  }

+static int tlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
+{
+	int set_base;
+
+	set_base = (addr>>  PAGE_SHIFT)&  (vcpu_e500->gtlb0_sets - 1);
+	set_base *= vcpu_e500->gtlb0_ways;
+
+	return set_base;
+}
+
+static int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel)
+{
+	int esel = get_tlb_esel_bit(vcpu_e500);
+
+	if (tlbsel == 0) {
+		esel&= vcpu_e500->gtlb0_ways - 1;
+		esel += tlb0_set_base(vcpu_e500, vcpu_e500->mas2);
+	} else {
+		esel&= vcpu_e500->gtlb_size[1] - 1;
+	}
+
+	return esel;
+}
+
  /* Search the guest TLB for a matching entry. */
  static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
  		gva_t eaddr, int tlbsel, unsigned int pid, int as)
  {
  	int size = vcpu_e500->gtlb_size[tlbsel];
-	int set_base;
+	int set_base, offset;
  	int i;

  	if (tlbsel == 0) {
-		int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
-		set_base = (eaddr>>  PAGE_SHIFT)&  mask;
-		set_base *= KVM_E500_TLB0_WAY_NUM;
-		size = KVM_E500_TLB0_WAY_NUM;
+		set_base = tlb0_set_base(vcpu_e500, eaddr);
+		size = vcpu_e500->gtlb0_ways;
  	} else {
  		set_base = 0;
  	}

+	offset = vcpu_e500->gtlb_offset[tlbsel];
+
  	for (i = 0; i<  size; i++) {
-		struct tlbe *tlbe =&vcpu_e500->gtlb_arch[tlbsel][set_base + i];
+		struct kvm_fsl_booke_tlb_entry *tlbe =
+			&vcpu_e500->gtlb_arch[offset + set_base + i];
  		unsigned int tid;

  		if (eaddr<  get_tlb_eaddr(tlbe))
@@ -421,7 +461,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
  }

  static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
-					  struct tlbe *gtlbe,
+					  struct kvm_fsl_booke_tlb_entry *gtlbe,
  					  pfn_t pfn)
  {
  	priv->pfn = pfn;
@@ -463,17 +503,17 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
  		| MAS1_TSIZE(tsized);
  	vcpu_e500->mas2 = (eaddr&  MAS2_EPN)
  		| (vcpu_e500->mas4&  MAS2_ATTRIB_MASK);
-	vcpu_e500->mas3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+	vcpu_e500->mas7_3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
  	vcpu_e500->mas6 = (vcpu_e500->mas6&  MAS6_SPID1)
  		| (get_cur_pid(vcpu)<<  16)
  		| (as ? MAS6_SAS : 0);
-	vcpu_e500->mas7 = 0;
  }

  static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-					   struct tlbe *gtlbe, int tsize,
-					   struct tlbe_priv *priv,
-					   u64 gvaddr, struct tlbe *stlbe)
+					   struct kvm_fsl_booke_tlb_entry *gtlbe,
+					   int tsize, struct tlbe_priv *priv,
+					   u64 gvaddr,
+					   struct kvm_fsl_booke_tlb_entry *stlbe)
  {
  	pfn_t pfn = priv->pfn;
  	unsigned int stid;
@@ -488,16 +528,14 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
  	stlbe->mas2 = (gvaddr&  MAS2_EPN)
  		| e500_shadow_mas2_attrib(gtlbe->mas2,
  				vcpu_e500->vcpu.arch.shared->msr&  MSR_PR);
-	stlbe->mas3 = ((pfn<<  PAGE_SHIFT)&  MAS3_RPN)
-		| e500_shadow_mas3_attrib(gtlbe->mas3,
+	stlbe->mas7_3 = ((u64)pfn<<  PAGE_SHIFT)
+		| e500_shadow_mas3_attrib(gtlbe->mas7_3,
  				vcpu_e500->vcpu.arch.shared->msr&  MSR_PR);
-	stlbe->mas7 = (pfn>>  (32 - PAGE_SHIFT))&  MAS7_RPN;
  }

-
  static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
-	struct tlbe *stlbe)
+	u64 gvaddr, gfn_t gfn, struct kvm_fsl_booke_tlb_entry *gtlbe,
+	int tlbsel, int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
  {
  	struct kvm_memory_slot *slot;
  	unsigned long pfn, hva;
@@ -609,11 +647,11 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,

  /* XXX only map the one-one case, for now use TLB0 */
  static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-				int esel, struct tlbe *stlbe)
+				int esel, struct kvm_fsl_booke_tlb_entry *stlbe)
  {
-	struct tlbe *gtlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;

-	gtlbe =&vcpu_e500->gtlb_arch[0][esel];
+	gtlbe = get_entry(vcpu_e500, 0, esel);

  	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
  			get_tlb_raddr(gtlbe)>>  PAGE_SHIFT,
@@ -626,7 +664,8 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
   * the shadow TLB. */
  /* XXX for both one-one and one-to-many , for now use TLB1 */
  static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
-		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
+		u64 gvaddr, gfn_t gfn, struct kvm_fsl_booke_tlb_entry *gtlbe,
+		struct kvm_fsl_booke_tlb_entry *stlbe)
  {
  	unsigned int victim;

@@ -652,7 +691,8 @@ static inline int kvmppc_e500_gtlbe_invalidate(
  				struct kvmppc_vcpu_e500 *vcpu_e500,
  				int tlbsel, int esel)
  {
-	struct tlbe *gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
+	struct kvm_fsl_booke_tlb_entry *gtlbe =
+		get_entry(vcpu_e500, tlbsel, esel);

  	if (unlikely(get_tlb_iprot(gtlbe)))
  		return -1;
@@ -715,18 +755,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
  {
  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
  	int tlbsel, esel;
-	struct tlbe *gtlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;

  	tlbsel = get_tlb_tlbsel(vcpu_e500);
  	esel = get_tlb_esel(vcpu_e500, tlbsel);

-	gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
  	vcpu_e500->mas0&= ~MAS0_NV(~0);
  	vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
  	vcpu_e500->mas1 = gtlbe->mas1;
  	vcpu_e500->mas2 = gtlbe->mas2;
-	vcpu_e500->mas3 = gtlbe->mas3;
-	vcpu_e500->mas7 = gtlbe->mas7;
+	vcpu_e500->mas7_3 = gtlbe->mas7_3;

  	return EMULATE_DONE;
  }
@@ -737,7 +776,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
  	int as = !!get_cur_sas(vcpu_e500);
  	unsigned int pid = get_cur_spid(vcpu_e500);
  	int esel, tlbsel;
-	struct tlbe *gtlbe = NULL;
+	struct kvm_fsl_booke_tlb_entry *gtlbe = NULL;
  	gva_t ea;

  	ea = kvmppc_get_gpr(vcpu, rb);
@@ -745,7 +784,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
  	for (tlbsel = 0; tlbsel<  2; tlbsel++) {
  		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
  		if (esel>= 0) {
-			gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
+			gtlbe = get_entry(vcpu_e500, tlbsel, esel);
  			break;
  		}
  	}
@@ -755,8 +794,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
  			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
  		vcpu_e500->mas1 = gtlbe->mas1;
  		vcpu_e500->mas2 = gtlbe->mas2;
-		vcpu_e500->mas3 = gtlbe->mas3;
-		vcpu_e500->mas7 = gtlbe->mas7;
+		vcpu_e500->mas7_3 = gtlbe->mas7_3;
  	} else {
  		int victim;

@@ -771,8 +809,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
  			| (vcpu_e500->mas4&  MAS4_TSIZED(~0));
  		vcpu_e500->mas2&= MAS2_EPN;
  		vcpu_e500->mas2 |= vcpu_e500->mas4&  MAS2_ATTRIB_MASK;
-		vcpu_e500->mas3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
-		vcpu_e500->mas7 = 0;
+		vcpu_e500->mas7_3&= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
  	}

  	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
@@ -782,28 +819,27 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
  int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
  {
  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;
  	int tlbsel, esel;

  	tlbsel = get_tlb_tlbsel(vcpu_e500);
  	esel = get_tlb_esel(vcpu_e500, tlbsel);

-	gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);

  	if (get_tlb_v(gtlbe))
  		kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);

  	gtlbe->mas1 = vcpu_e500->mas1;
  	gtlbe->mas2 = vcpu_e500->mas2;
-	gtlbe->mas3 = vcpu_e500->mas3;
-	gtlbe->mas7 = vcpu_e500->mas7;
+	gtlbe->mas7_3 = vcpu_e500->mas7_3;

  	trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
-			     gtlbe->mas3, gtlbe->mas7);
+			     (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3>>  32));

  	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
  	if (tlbe_is_host_safe(vcpu, gtlbe)) {
-		struct tlbe stlbe;
+		struct kvm_fsl_booke_tlb_entry stlbe;
  		int stlbsel, sesel;
  		u64 eaddr;
  		u64 raddr;
@@ -877,9 +913,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
  			gva_t eaddr)
  {
  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-	struct tlbe *gtlbe =
-		&vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
-	u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+	struct kvm_fsl_booke_tlb_entry *gtlbe;
+	u64 pgmask;
+
+	gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+	pgmask = get_tlb_bytes(gtlbe) - 1;

  	return get_tlb_raddr(gtlbe) | (eaddr&  pgmask);
  }
@@ -893,12 +931,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
  {
  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
  	struct tlbe_priv *priv;
-	struct tlbe *gtlbe, stlbe;
+	struct kvm_fsl_booke_tlb_entry *gtlbe, stlbe;
  	int tlbsel = tlbsel_of(index);
  	int esel = esel_of(index);
  	int stlbsel, sesel;

-	gtlbe =&vcpu_e500->gtlb_arch[tlbsel][esel];
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);

  	preempt_disable();
  	switch (tlbsel) {
@@ -956,51 +994,229 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)

  void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
  {
-	struct tlbe *tlbe;
+	struct kvm_fsl_booke_tlb_entry *tlbe;

  	/* Insert large initial mapping for guest. */
-	tlbe =&vcpu_e500->gtlb_arch[1][0];
+	tlbe = get_entry(vcpu_e500, 1, 0);
  	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
  	tlbe->mas2 = 0;
-	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;

  	/* 4K map for serial output. Used by kernel wrapper. */

Which kernel wrapper? At the end of the day, these initial TLB values 
should be pushed down from Qemu btw. They really don't belong here. A 
different guest (u-boot for example) might need completely different 
mappings.

-	tlbe =&vcpu_e500->gtlb_arch[1][1];
+	tlbe = get_entry(vcpu_e500, 1, 1);
  	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
  	tlbe->mas2 = (0xe0004500&  0xFFFFF000) | MAS2_I | MAS2_G;
-	tlbe->mas3 = (0xe0004500&  0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
-	tlbe->mas7 = 0;
+	tlbe->mas7_3 = (0xe0004500&  0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
+static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int stlbsel, i;
+
+	for (stlbsel = 0; stlbsel<  2; stlbsel++) {
+		for (i = 0; i<  vcpu_e500->gtlb_size[stlbsel]; i++) {
+			struct tlbe_priv *priv =
+				&vcpu_e500->gtlb_priv[stlbsel][i];
+			kvmppc_e500_priv_release(priv);
+		}
+	}
+}
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int i;
+
+	clear_tlb_privs(vcpu_e500);
+
+	kfree(vcpu_e500->gtlb_priv[0]);
+	kfree(vcpu_e500->gtlb_priv[1]);
+
+	if (vcpu_e500->shared_tlb_pages) {
+		vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+					  PAGE_SIZE)));
+
+		for (i = 0; i<  vcpu_e500->num_shared_tlb_pages; i++)
+			put_page(vcpu_e500->shared_tlb_pages[i]);
+
+		vcpu_e500->num_shared_tlb_pages = 0;
+		vcpu_e500->shared_tlb_pages = NULL;
+	} else {
+		kfree(vcpu_e500->gtlb_arch);
+	}
+
+	vcpu_e500->gtlb_arch = NULL;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_fsl_booke_tlb_params params;
+	char *virt;
+	struct page **pages;
+	struct tlbe_priv *privs[2] = {};
+	size_t array_len;
+	u32 sets;
+	int num_pages, ret, i;
+
+	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+		return -EINVAL;
+
+	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+			   sizeof(params)))
+		return -EFAULT;
+
+	if (params.tlb_sizes[1]>  64)
+		return -EINVAL;
+	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+		return -EINVAL;
+	if (params.tlb_ways[1] != 0 || params.tlb_ways[2] != 0 |

Hrm - I always thought that basically the TLB ID would be:

  (some bits of the EA) || (ESEL)

So in the TLB1 case, "some bits" would mean "no bits" and ESEL is all of 
the information we get. And ESEL is ways, no?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html