RE: [PATCH 07/11] E500 TLB emulation

"Liu Yu" <Yu.Liu@xxxxxxxxxxxxx> · Tue, 16 Dec 2008 18:08:26 +0800



> -----Original Message-----
> From: kvm-ppc-owner@xxxxxxxxxxxxxxx 
> [mailto:kvm-ppc-owner@xxxxxxxxxxxxxxx] On Behalf Of Hollis Blanchard
> Sent: Tuesday, December 16, 2008 9:34 AM
> To: Liu Yu-B13201
> Cc: kvm-ppc@xxxxxxxxxxxxxxx
> Subject: Re: [PATCH 07/11] E500 TLB emulation
> 
> On Thu, 2008-12-11 at 17:11 +0800, Liu Yu wrote:
> > Signed-off-by: Liu Yu <yu.liu@xxxxxxxxxxxxx>
> > ---
> >  arch/powerpc/kvm/e500_tlb.c |  748 
> +++++++++++++++++++++++++++++++++++++++++++
> >  arch/powerpc/kvm/e500_tlb.h |  177 ++++++++++
> >  2 files changed, 925 insertions(+), 0 deletions(-)
> >  create mode 100644 arch/powerpc/kvm/e500_tlb.c
> >  create mode 100644 arch/powerpc/kvm/e500_tlb.h
> > 
> > diff --git a/arch/powerpc/kvm/e500_tlb.c 
> b/arch/powerpc/kvm/e500_tlb.c
> > new file mode 100644
> > index 0000000..a5858e6
> > --- /dev/null
> > +++ b/arch/powerpc/kvm/e500_tlb.c
> > @@ -0,0 +1,748 @@
> > +/*
> > + * Copyright (C) 2008 Freescale Semiconductor, Inc. All 
> rights reserved.
> > + *
> > + * Author: Yu Liu, yu.liu@xxxxxxxxxxxxx, Aug, 2008
> > + *
> > + * Description:
> > + * This file is based on arch/powerpc/kvm/44x_tlb.c,
> > + * by Hollis Blanchard <hollisb@xxxxxxxxxx>.
> > + *
> > + * This program is free software; you can redistribute it 
> and/or modify
> > + * it under the terms of the GNU General Public License, 
> version 2, as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/types.h>
> > +#include <linux/string.h>
> > +#include <linux/kvm.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/highmem.h>
> > +#include <asm/kvm_ppc.h>
> > +#include <asm/kvm_e500.h>
> > +
> > +#include "e500_tlb.h"
> > +
> > +#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
> > +
> > +static unsigned int tlb1_entry_num;
> > +
> > +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
> > +{
> > +	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> > +	struct tlbe *tlbe;
> > +	int i, tlbsel;
> > +
> > +	printk("| %8s | %8s | %8s | %8s | %8s |\n",
> > +			"nr", "mas1", "mas2", "mas3", "mas7");
> > +
> > +	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
> > +		printk("Guest TLB%d:\n", tlbsel);
> > +		for (i = 0; i < 
> vcpu_e500->guest_tlb_size[tlbsel]; i++) {
> > +			tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
> > +			if (tlbe->mas1 & MAS1_VALID)
> > +				printk(" G[%d][%3d] |  %08X | 
> %08X | %08X | %08X |\n",
> > +					tlbsel, i, tlbe->mas1, 
> tlbe->mas2,
> > +					tlbe->mas3, tlbe->mas7);
> > +		}
> > +	}
> > +
> > +	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
> > +		printk("Shadow TLB%d:\n", tlbsel);
> > +		for (i = 0; i < 
> vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
> > +			tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
> > +			if (tlbe->mas1 & MAS1_VALID)
> > +				printk(" S[%d][%3d] |  %08X | 
> %08X | %08X | %08X |\n",
> > +					tlbsel, i, tlbe->mas1, 
> tlbe->mas2,
> > +					tlbe->mas3, tlbe->mas7);
> > +		}
> > +	}
> > +}
> > +
> > +static inline unsigned int tlb0_get_next_victim(
> > +		struct kvmppc_vcpu_e500 *vcpu_e500)
> > +{
> > +	unsigned int victim;
> > +
> > +	victim = vcpu_e500->guest_tlb_nv[0]++;
> > +	if (unlikely(vcpu_e500->guest_tlb_nv[0] >= 
> KVM_E500_TLB0_WAY_NUM))
> > +		vcpu_e500->guest_tlb_nv[0] = 0;
> > +
> > +	return victim;
> > +}
> > +
> > +static inline unsigned int tlb1_max_shadow_size(void)
> > +{
> > +	return tlb1_entry_num - tlbcam_index;
> > +}
> > +
> > +static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
> > +{
> > +	/* Mask off reserved bits. */
> > +	mas3 &= MAS3_ATTRIB_MASK;
> > +
> > +	if (!usermode) {
> > +		/* Guest is in supervisor mode,
> > +		 * so we need to translate guest
> > +		 * supervisor permissions into user permissions. */
> > +		mas3 &= ~E500_TLB_USER_PERM_MASK;
> > +		mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
> > +	}
> > +
> > +	return mas3 | E500_TLB_SUPER_PERM_MASK;
> > +}
> > +
> > +static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> > +{
> > +	return mas2 & MAS2_ATTRIB_MASK;
> > +}
> > +
> > +/*
> > + * writing shadow tlb entry to host TLB
> > + */
> > +static inline void __write_host_tlbe(struct tlbe *stlbe)
> > +{
> > +	mtspr(SPRN_MAS1, stlbe->mas1);
> > +	mtspr(SPRN_MAS2, stlbe->mas2);
> > +	mtspr(SPRN_MAS3, stlbe->mas3);
> > +	mtspr(SPRN_MAS7, stlbe->mas7);
> > +	__asm__ __volatile__ ("tlbwe\n" : : );
> > +}
> > +
> > +static inline void write_host_tlbe(struct kvmppc_vcpu_e500 
> *vcpu_e500,
> > +		int tlbsel, int esel)
> > +{
> > +	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
> > +
> > +	local_irq_disable();
> > +	if (tlbsel == 0) {
> > +		__write_host_tlbe(stlbe);
> > +	} else {
> > +		unsigned register mas0;
> > +
> > +		mas0 = mfspr(SPRN_MAS0);
> > +
> > +		mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | 
> MAS0_ESEL(to_htlb1_esel(esel)));
> > +		__write_host_tlbe(stlbe);
> > +
> > +		mtspr(SPRN_MAS0, mas0);
> > +	}
> > +	local_irq_enable();
> > +}
> > +
> > +void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
> > +{
> > +	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> > +	int i;
> > +	unsigned register mas0;
> > +	/* Mark every guest entry in the shadow TLB entry 
> modified, so that they
> > +	* will all be reloaded on the next vcpu run (instead of being
> > +	* demand-faulted). */
> 
> This comment does not apply to this code.
Fixed

> 
> > +	local_irq_disable();
> > +	mas0 = mfspr(SPRN_MAS0);
> > +	for (i = 0; i < vcpu_e500->shadow_tlb_size[1]; i++) {
> > +		struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
> > +
> > +		if (get_tlb_v(stlbe)) {
> > +			mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
> > +					| MAS0_ESEL(to_htlb1_esel(i)));
> > +			__write_host_tlbe(stlbe);
> > +		}
> > +	}
> > +	mtspr(SPRN_MAS0, mas0);
> > +	local_irq_enable();
> > +}
> 
> ...
> 
> > +
> > +/* Must be called with mmap_sem locked for writing. */
> 
> This comment is not correct.
Fixed.

> 
> > +static void e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
> > +		int tlbsel, int esel)
> > +{
> > +	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
> > +	struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
> > +
> > +	if (page) {
> > +		vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
> > +
> > +		if (get_tlb_v(stlbe)) {
> > +			if (tlbe_is_writable(stlbe))
> > +				kvm_release_page_dirty(page);
> > +			else
> > +				kvm_release_page_clean(page);
> > +		}
> > +	}
> > +}
> > +
> > +/* Must be called with mmap_sem locked for writing. */
> 
> This comment is not correct.
Fixed.

> 
> > +static void e500_tlbe_invalidate(struct kvmppc_vcpu_e500 
> *vcpu_e500,
> > +		int tlbsel, int esel)
> > +{
> > +	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
> > +
> > +	e500_shadow_release(vcpu_e500, tlbsel, esel);
> > +	stlbe->mas1 = 0;
> > +	KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, 
> index_of(tlbsel, esel),
> > +			stlbe->mas1, stlbe->mas2, stlbe->mas3, 
> stlbe->mas7,
> > +			handler);
> > +}
> 
> > +
> > +/* Must be called with mmap_sem locked for writing. */
> 
> Comment is not correct.
Fixed.

> 
> > +static int e500_guest_tlbe_invalidate(struct 
> kvmppc_vcpu_e500 *vcpu_e500,
> > +		int tlbsel, int esel)
> > +{
> > +	struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
> > +
> > +	if (unlikely(get_tlb_iprot(gtlbe)))
> > +		return -1;
> > +
> > +	if (tlbsel == 1) {
> > +		e500_mmu_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
> > +				get_tlb_end(gtlbe),
> > +				get_tlb_tid(gtlbe));
> > +	} else {
> > +		e500_tlbe_invalidate(vcpu_e500, tlbsel, esel);
> > +	}
> > +
> > +	gtlbe->mas1 = 0;
> > +
> > +	return 0;
> > +}
> > +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
> > +{
> 
> ^^^ Please add a newline before this function name.
Fixed.

> 
> > +	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
> > +	unsigned int ia;
> > +	int esel, tlbsel;
> > +	gva_t ea;
> > +
> > +	ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
> > +
> > +	ia = (ea >> 2) & 0x1;
> > +	tlbsel = (ea >> 3) & 0x3;
> 
> > +	if (ia) {
> > +		/* invalidate all entries */
> > +		for (esel = 0; esel < 
> vcpu_e500->guest_tlb_size[tlbsel]; esel++)
> > +			e500_guest_tlbe_invalidate(vcpu_e500, 
> tlbsel, esel);
> 
> This is actually a major issue, and it affects many places 
> where you use
> tlbsel.
> 
> To get tlbsel you mask various things with 0x3, but you've only
> allocated 2 guest TLBs! So if the guest sets MAS0[TLBSEL] = 2 or 3, or
> tlbivax with the right bits set, you will dereference beyond 
> the bounds
> of the guest_tlb* vcpu arrays.
> 
Hmm, good catch. Thanks.

> To fix this, instead of doing error-checking in all these 
> sites, I would
> just mask with 1 instead of 3.
> 

If the guest instruction appoint MAS0[TLBSEL] = 2 or 3,
I think the best way is to ignore it.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html