On 03.07.14 16:45, Mihai Caraman wrote:
Handle LRAT error exception with support for lrat mapping and invalidation.
Signed-off-by: Mihai Caraman <mihai.caraman@xxxxxxxxxxxxx>
---
arch/powerpc/include/asm/kvm_host.h | 1 +
arch/powerpc/include/asm/kvm_ppc.h | 2 +
arch/powerpc/include/asm/mmu-book3e.h | 3 +
arch/powerpc/include/asm/reg_booke.h | 13 ++++
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/powerpc/kvm/booke.c | 40 +++++++++++
arch/powerpc/kvm/bookehv_interrupts.S | 9 ++-
arch/powerpc/kvm/e500_mmu_host.c | 125 ++++++++++++++++++++++++++++++++++
arch/powerpc/kvm/e500mc.c | 2 +
9 files changed, 195 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bb66d8b..7b6b2ec 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -433,6 +433,7 @@ struct kvm_vcpu_arch {
u32 eplc;
u32 epsc;
u32 oldpir;
+ u64 fault_lper;
#endif
#if defined(CONFIG_BOOKE)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9c89cdd..2730a29 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -86,6 +86,8 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
gva_t eaddr);
extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu);
extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
unsigned int id);
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 088fd9f..ac6acf7 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -40,6 +40,8 @@
/* MAS registers bit definitions */
+#define MAS0_ATSEL 0x80000000
+#define MAS0_ATSEL_SHIFT 31
#define MAS0_TLBSEL_MASK 0x30000000
#define MAS0_TLBSEL_SHIFT 28
#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
@@ -53,6 +55,7 @@
#define MAS0_WQ_CLR_RSRV 0x00002000
#define MAS1_VALID 0x80000000
+#define MAS1_VALID_SHIFT 31
#define MAS1_IPROT 0x40000000
#define MAS1_TID(x) (((x) << 16) & 0x3FFF0000)
#define MAS1_IND 0x00002000
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 75bda23..783d617 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -43,6 +43,8 @@
/* Special Purpose Registers (SPRNs)*/
#define SPRN_DECAR 0x036 /* Decrementer Auto Reload Register */
+#define SPRN_LPER 0x038 /* Logical Page Exception Register */
+#define SPRN_LPERU 0x039 /* Logical Page Exception Register Upper */
#define SPRN_IVPR 0x03F /* Interrupt Vector Prefix Register */
#define SPRN_USPRG0 0x100 /* User Special Purpose Register General 0 */
#define SPRN_SPRG3R 0x103 /* Special Purpose Register General 3 Read */
@@ -358,6 +360,9 @@
#define ESR_ILK 0x00100000 /* Instr. Cache Locking */
#define ESR_PUO 0x00040000 /* Unimplemented Operation exception */
#define ESR_BO 0x00020000 /* Byte Ordering */
+#define ESR_DATA 0x00000400 /* Page Table Data Access */
+#define ESR_TLBI 0x00000200 /* Page Table TLB Ineligible */
+#define ESR_PT 0x00000100 /* Page Table Translation */
#define ESR_SPV 0x00000080 /* Signal Processing operation */
/* Bit definitions related to the DBCR0. */
@@ -649,6 +654,14 @@
#define EPC_EPID 0x00003fff
#define EPC_EPID_SHIFT 0
+/* Bit definitions for LPER */
+#define LPER_ALPN 0x000FFFFFFFFFF000ULL
+#define LPER_ALPN_SHIFT 12
+#define LPER_WIMGE 0x00000F80
+#define LPER_WIMGE_SHIFT 7
+#define LPER_LPS 0x0000000F
+#define LPER_LPS_SHIFT 0
+
/*
* The IBM-403 is an even more odd special case, as it is much
* older than the IBM-405 series. We put these down here incase someone
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f5995a9..be6e329 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -713,6 +713,7 @@ int main(void)
DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+ DEFINE(VCPU_FAULT_LPER, offsetof(struct kvm_vcpu, arch.fault_lper));
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a192975..ab1077f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1286,6 +1286,46 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
+#ifdef CONFIG_KVM_BOOKE_HV
+ case BOOKE_INTERRUPT_LRAT_ERROR:
+ {
+ gfn_t gfn;
+
+ /*
+ * Guest TLB management instructions (EPCR.DGTMI == 0) is not
+ * supported for now
+ */
+ if (!(vcpu->arch.fault_esr & ESR_PT)) {
+ WARN(1, "%s: Guest TLB management instructions not supported!\n", __func__);
Wouldn't this allow a guest to flood the host's kernel log?
+ break;
+ }
+
+ gfn = (vcpu->arch.fault_lper & LPER_ALPN) >> LPER_ALPN_SHIFT;
Maybe add an #ifdef and #error check to make sure that LPER_ALPN_SHIFT
== PAGE_SHIFT?
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ kvmppc_lrat_map(vcpu, gfn);
+ r = RESUME_GUEST;
+ } else if (vcpu->arch.fault_esr & ESR_DATA) {
+ vcpu->arch.paddr_accessed = (gfn << PAGE_SHIFT)
+ | (vcpu->arch.fault_dear & (PAGE_SIZE - 1));
+ vcpu->arch.vaddr_accessed =
+ vcpu->arch.fault_dear;
+
+ r = kvmppc_emulate_mmio(run, vcpu);
+ kvmppc_account_exit(vcpu, MMIO_EXITS);
It's a shame we have to duplicate that logic from the normal TLB miss
path, but I can't see any good way to combine them either.
+ } else {
+ kvmppc_booke_queue_irqprio(vcpu,
+ BOOKE_IRQPRIO_MACHINE_CHECK);
+ r = RESUME_GUEST;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ break;
+ }
+#endif
+
case BOOKE_INTERRUPT_DEBUG: {
r = kvmppc_handle_debug(run, vcpu);
if (r == RESUME_HOST)
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index b3ecdd6..341c3a8 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -64,6 +64,7 @@
#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
#define NEED_DEAR 0x00000002 /* save faulting DEAR */
#define NEED_ESR 0x00000004 /* save faulting ESR */
+#define NEED_LPER 0x00000008 /* save faulting LPER */
/*
* On entry:
@@ -203,6 +204,12 @@
PPC_STL r9, VCPU_FAULT_DEAR(r4)
.endif
+ /* Only suppported on 64-bit cores for now */
+ .if \flags & NEED_LPER
+ mfspr r7, SPRN_LPER
+ std r7, VCPU_FAULT_LPER(r4)
+ .endif
+
b kvmppc_resume_host
.endm
@@ -325,7 +332,7 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
SPRN_CSRR0, SPRN_CSRR1, 0
kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
- SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR | NEED_LPER)
#else
/*
* For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 79677d7..be1454b 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -95,6 +95,131 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
stlbe->mas2, stlbe->mas7_3);
}
+#ifdef CONFIG_KVM_BOOKE_HV
+#ifdef CONFIG_64BIT
+static inline int lrat_next(void)
No inline in .c files please. Just only make them "static".
+{
+ int this, next;
+
+ this = local_paca->tcd.lrat_next;
+ next = (this + 1) % local_paca->tcd.lrat_max;
Can we assume that lrat_max is always a power of 2? IIRC modulo
functions with variables can be quite expensive. So if we can instead do
next = (this + 1) & local_paca->tcd.lrat_mask;
we should be faster and not rely on division helpers.
+ local_paca->tcd.lrat_next = next;
+
+ return this;
+}
+
+static inline int lrat_size(void)
+{
+ return local_paca->tcd.lrat_max;
+}
+#else
+/* LRAT is only supported in 64-bit kernel for now */
+static inline int lrat_next(void)
+{
+ BUG();
+}
+
+static inline int lrat_size(void)
+{
+ return 0;
+}
+#endif
+
+void write_host_lrate(int tsize, gfn_t gfn, unsigned long pfn, uint32_t lpid,
+ int valid, int lrat_entry)
+{
+ struct kvm_book3e_206_tlb_entry stlbe;
+ int esel = lrat_entry;
+ unsigned long flags;
+
+ stlbe.mas1 = (valid ? MAS1_VALID : 0) | MAS1_TSIZE(tsize);
+ stlbe.mas2 = ((u64)gfn << PAGE_SHIFT);
+ stlbe.mas7_3 = ((u64)pfn << PAGE_SHIFT);
+ stlbe.mas8 = MAS8_TGS | lpid;
+
+ local_irq_save(flags);
+ /* book3e_tlb_lock(); */
Hm?
+
+ if (esel == -1)
+ esel = lrat_next();
+ __write_host_tlbe(&stlbe, MAS0_ATSEL | MAS0_ESEL(esel));
+
+ /* book3e_tlb_unlock(); */
+ local_irq_restore(flags);
+}
+
+void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long pfn;
+ unsigned long hva;
+ struct vm_area_struct *vma;
+ unsigned long psize;
+ int tsize;
+ unsigned long tsize_pages;
+
+ slot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!slot) {
+ pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n",
+ __func__, (long)gfn);
+ return;
+ }
+
+ hva = slot->userspace_addr;
+
+ down_read(¤t->mm->mmap_sem);
+ vma = find_vma(current->mm, hva);
+ if (vma && (hva >= vma->vm_start)) {
+ psize = vma_kernel_pagesize(vma);
+ } else {
+ pr_err_ratelimited("%s: couldn't find virtual memory address for gfn %lx!\n", __func__, (long)gfn);
+ return;
+ }
+ up_read(¤t->mm->mmap_sem);
+
+ pfn = gfn_to_pfn_memslot(slot, gfn);
+ if (is_error_noslot_pfn(pfn)) {
+ pr_err_ratelimited("%s: couldn't get real page for gfn %lx!\n",
+ __func__, (long)gfn);
+ return;
+ }
+
+ tsize = __ilog2(psize) - 10;
+ tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+ gfn &= ~(tsize_pages - 1);
+ pfn &= ~(tsize_pages - 1);
+
+ write_host_lrate(tsize, gfn, pfn, vcpu->kvm->arch.lpid, 1, -1);
+ kvm_release_pfn_clean(pfn);
Don't we have to keep the page locked so it doesn't get swapped away?
Alex
+}
+
+void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu)
+{
+ uint32_t mas0, mas1 = 0;
+ int esel;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ /* book3e_tlb_lock(); */
+
+ /* LRAT does not have a dedicated instruction for invalidation */
+ for (esel = 0; esel < lrat_size(); esel++) {
+ mas0 = MAS0_ATSEL | MAS0_ESEL(esel);
+ mtspr(SPRN_MAS0, mas0);
+ asm volatile("isync; tlbre" : : : "memory");
+ mas1 = mfspr(SPRN_MAS1) & ~MAS1_VALID;
+ mtspr(SPRN_MAS1, mas1);
+ asm volatile("isync; tlbwe" : : : "memory");
+ }
+ /* Must clear mas8 for other host tlbwe's */
+ mtspr(SPRN_MAS8, 0);
+ isync();
+
+ /* book3e_tlb_unlock(); */
+ local_irq_restore(flags);
+}
+#endif
+
/*
* Acquire a mas0 with victim hint, as if we just took a TLB miss.
*
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index b1d9939..5622d9a 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -99,6 +99,8 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
asm volatile("tlbilxlpid");
mtspr(SPRN_MAS5, 0);
local_irq_restore(flags);
+
+ kvmppc_lrat_invalidate(&vcpu_e500->vcpu);
}
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html