It is unnecessary to keep shadow tlb. first, shadow tlb keep fixed value in shadow, which make things unflexible. second, remove shadow tlb can save a lot memory. This patch remove shadow tlb and caculate the shadow tlb entry value before we write it to hardware. Also we use new struct tlbe_ref to trace the relation between guest tlb entry and page. Signed-off-by: Liu Yu <yu.liu@xxxxxxxxxxxxx> --- arch/powerpc/include/asm/kvm_e500.h | 7 +- arch/powerpc/kvm/e500_tlb.c | 287 +++++++++++++---------------------- 2 files changed, 108 insertions(+), 186 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7fea26f..cb785f9 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -29,13 +29,18 @@ struct tlbe{ u32 mas7; }; +struct tlbe_ref { + struct page *page; + struct tlbe *gtlbe; +}; + struct kvmppc_vcpu_e500 { /* Unmodified copy of the guest's TLB. */ struct tlbe *guest_tlb[E500_TLB_NUM]; /* TLB that's actually used when the guest is running. */ struct tlbe *shadow_tlb[E500_TLB_NUM]; /* Pages which are referenced in the shadow TLB. */ - struct page **shadow_pages[E500_TLB_NUM]; + struct tlbe_ref *shadow_refs[E500_TLB_NUM]; unsigned int guest_tlb_size[E500_TLB_NUM]; unsigned int shadow_tlb_size[E500_TLB_NUM]; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index f11ca0f..0b657af 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008, 2010 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@xxxxxxxxxxxxx * @@ -48,17 +48,6 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) tlbe->mas3, tlbe->mas7); } } - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - printk("Shadow TLB%d:\n", tlbsel); - for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) { - tlbe = &vcpu_e500->shadow_tlb[tlbsel][i]; - if (tlbe->mas1 & MAS1_VALID) - printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n", - tlbsel, i, tlbe->mas1, tlbe->mas2, - tlbe->mas3, tlbe->mas7); - } - } } static inline unsigned int tlb0_get_next_victim( @@ -121,10 +110,8 @@ static inline void __write_host_tlbe(struct tlbe *stlbe) } static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) + int tlbsel, int esel, struct tlbe *stlbe) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - local_irq_disable(); if (tlbsel == 0) { __write_host_tlbe(stlbe); @@ -139,28 +126,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, mtspr(SPRN_MAS0, mas0); } local_irq_enable(); + trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, + stlbe->mas3, stlbe->mas7); } void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) { - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int i; - unsigned register mas0; - - /* Load all valid TLB1 entries to reduce guest tlb miss fault */ - local_irq_disable(); - mas0 = mfspr(SPRN_MAS0); - for (i = 0; i < tlb1_max_shadow_size(); i++) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; - - if (get_tlb_v(stlbe)) { - mtspr(SPRN_MAS0, MAS0_TLBSEL(1) - | MAS0_ESEL(to_htlb1_esel(i))); - __write_host_tlbe(stlbe); - } - } - mtspr(SPRN_MAS0, mas0); - local_irq_enable(); } void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) @@ -202,16 +173,19 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, } static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) + int stlbsel, int sesel) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - struct page *page = vcpu_e500->shadow_pages[tlbsel][esel]; + struct tlbe_ref *ref; + struct page *page; + + ref = &vcpu_e500->shadow_refs[stlbsel][sesel]; + page = ref->page; if (page) { - vcpu_e500->shadow_pages[tlbsel][esel] = NULL; + ref->page = NULL; - if (get_tlb_v(stlbe)) { - if (tlbe_is_writable(stlbe)) + if (get_tlb_v(ref->gtlbe)) { + if (tlbe_is_writable(ref->gtlbe)) kvm_release_page_dirty(page); else kvm_release_page_clean(page); @@ -219,46 +193,19 @@ static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500, } } -static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) -{ - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - - kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); - stlbe->mas1 = 0; - /* XXX doesn't compile */ -#if 0 - trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, - stlbe->mas3, stlbe->mas7); -#endif -} - static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, - gva_t eaddr, gva_t eend, u32 tid) + int esel) { - unsigned int pid = tid & 0xff; + struct tlbe stlbe; unsigned int i; - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; - unsigned int tid; - - if (!get_tlb_v(stlbe)) - continue; - - if (eend < get_tlb_eaddr(stlbe)) - continue; - - if (eaddr > get_tlb_end(stlbe)) - continue; - - tid = get_tlb_tid(stlbe); - if (tid && (tid != pid)) - continue; + stlbe.mas1 = 0; + for (i = 0; i < KVM_E500_TLB1_SIZE; i++) { + struct tlbe_ref *ref = + &vcpu_e500->shadow_refs[1][i]; - kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); - write_host_tlbe(vcpu_e500, 1, i); + if (ref->gtlbe == &vcpu_e500->guest_tlb[1][esel]) + write_host_tlbe(vcpu_e500, 1, i, &stlbe); } } @@ -289,14 +236,29 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, vcpu_e500->mas7 = 0; } +static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + struct tlbe *gtlbe, struct tlbe_ref *ref, + u64 gvaddr, struct tlbe *stlbe) +{ + hpa_t hpaddr = page_to_phys(ref->page); + + /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) + | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) + | e500_shadow_mas2_attrib(gtlbe->mas2, + vcpu_e500->vcpu.arch.msr & MSR_PR); + stlbe->mas3 = (hpaddr & MAS3_RPN) + | e500_shadow_mas3_attrib(gtlbe->mas3, + vcpu_e500->vcpu.arch.msr & MSR_PR); + stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; +} + static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) + gfn_t gfn, struct tlbe *gtlbe, int stlbsel, int sesel) { struct page *new_page; - struct tlbe *stlbe; - hpa_t hpaddr; - - stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + struct tlbe_ref *ref; /* Get reference to new page. */ new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn); @@ -305,39 +267,24 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, kvm_release_page_clean(new_page); return; } - hpaddr = page_to_phys(new_page); /* Drop reference to old page. */ - kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); - - vcpu_e500->shadow_pages[tlbsel][esel] = new_page; + kvmppc_e500_shadow_release(vcpu_e500, stlbsel, sesel); - /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) - | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) - | e500_shadow_mas2_attrib(gtlbe->mas2, - vcpu_e500->vcpu.arch.msr & MSR_PR); - stlbe->mas3 = (hpaddr & MAS3_RPN) - | e500_shadow_mas3_attrib(gtlbe->mas3, - vcpu_e500->vcpu.arch.msr & MSR_PR); - stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; - - trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, - stlbe->mas3, stlbe->mas7); + ref = &vcpu_e500->shadow_refs[stlbsel][sesel]; + ref->page = new_page; + ref->gtlbe = gtlbe; } /* XXX only map the one-one case, for now use TLB0 */ -static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) +static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel) { struct tlbe *gtlbe; - gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + gtlbe = &vcpu_e500->guest_tlb[0][esel]; - kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), - get_tlb_raddr(gtlbe) >> PAGE_SHIFT, - gtlbe, tlbsel, esel); + kvmppc_e500_shadow_map(vcpu_e500, get_tlb_raddr(gtlbe) >> PAGE_SHIFT, + gtlbe, 0, esel); return esel; } @@ -355,7 +302,7 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size())) vcpu_e500->guest_tlb_nv[1] = 0; - kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim); + kvmppc_e500_shadow_map(vcpu_e500, gfn, gtlbe, 1, victim); return victim; } @@ -366,33 +313,19 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) { if (usermode) { - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int i; - - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < tlb1_max_shadow_size(); i++) - kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); - _tlbil_all(); } } -static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) +static inline int kvmppc_e500_gtlbe_invalidate( + struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) { struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; if (unlikely(get_tlb_iprot(gtlbe))) return -1; - if (tlbsel == 1) { - kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe), - get_tlb_end(gtlbe), - get_tlb_tid(gtlbe)); - } else { - kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel); - } - gtlbe->mas1 = 0; return 0; @@ -515,23 +448,16 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - u64 eaddr; - u64 raddr; - u32 tid; struct tlbe *gtlbe; - int tlbsel, esel, stlbsel, sesel; + int tlbsel, esel; tlbsel = get_tlb_tlbsel(vcpu_e500); esel = get_tlb_esel(vcpu_e500, tlbsel); gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; - if (get_tlb_v(gtlbe) && tlbsel == 1) { - eaddr = get_tlb_eaddr(gtlbe); - tid = get_tlb_tid(gtlbe); - kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr, - get_tlb_end(gtlbe), tid); - } + if (get_tlb_v(gtlbe) && tlbsel == 1) + kvmppc_e500_tlb1_invalidate(vcpu_e500, esel); gtlbe->mas1 = vcpu_e500->mas1; gtlbe->mas2 = vcpu_e500->mas2; @@ -543,35 +469,37 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { + struct tlbe stlbe; + int stlbsel, sesel; + struct tlbe_ref *ref; + u64 eaddr; + switch (tlbsel) { case 0: /* TLB0 */ gtlbe->mas1 &= ~MAS1_TSIZE(~0); gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); + eaddr = get_tlb_eaddr(gtlbe); stlbsel = 0; - sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel); + sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel); + + ref = &vcpu_e500->shadow_refs[stlbsel][sesel]; + + kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, + ref, eaddr, &stlbe); + write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); break; case 1: - /* TLB1 */ - eaddr = get_tlb_eaddr(gtlbe); - raddr = get_tlb_raddr(gtlbe); - - /* Create a 4KB mapping on the host. - * If the guest wanted a large page, - * only the first 4KB is mapped here and the rest - * are mapped on the fly. */ - stlbsel = 1; - sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe); + /* Large page breaks into 4KB pages. + * And all these 4KB pages will be mapped on the fly. */ break; default: BUG(); } - write_host_tlbe(vcpu_e500, stlbsel, sesel); } return EMULATE_DONE; @@ -618,25 +546,20 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int tlbsel, i; - - for (tlbsel = 0; tlbsel < 2; tlbsel++) - for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) - kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i); - - /* discard all guest mapping */ - _tlbil_all(); } void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, unsigned int index) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe_ref *ref; + struct tlbe *gtlbe, stlbe; int tlbsel = tlbsel_of(index); int esel = esel_of(index); int stlbsel, sesel; + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + switch (tlbsel) { case 0: stlbsel = 0; @@ -645,8 +568,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, case 1: { gfn_t gfn = gpaddr >> PAGE_SHIFT; - struct tlbe *gtlbe - = &vcpu_e500->guest_tlb[tlbsel][esel]; stlbsel = 1; sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe); @@ -657,7 +578,11 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, BUG(); break; } - write_host_tlbe(vcpu_e500, stlbsel, sesel); + + ref = &vcpu_e500->shadow_refs[stlbsel][sesel]; + + kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, ref, eaddr, &stlbe); + write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); } int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, @@ -704,33 +629,21 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (vcpu_e500->guest_tlb[0] == NULL) goto err_out; - vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE; - vcpu_e500->shadow_tlb[0] = - kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); - if (vcpu_e500->shadow_tlb[0] == NULL) - goto err_out_guest0; - vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE; vcpu_e500->guest_tlb[1] = kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); if (vcpu_e500->guest_tlb[1] == NULL) - goto err_out_shadow0; + goto err_out_guest0; - vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num; - vcpu_e500->shadow_tlb[1] = - kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL); - if (vcpu_e500->shadow_tlb[1] == NULL) + vcpu_e500->shadow_refs[0] = (struct tlbe_ref *) + kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->shadow_refs[0] == NULL) goto err_out_guest1; - vcpu_e500->shadow_pages[0] = (struct page **) - kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL); - if (vcpu_e500->shadow_pages[0] == NULL) - goto err_out_shadow1; - - vcpu_e500->shadow_pages[1] = (struct page **) - kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL); - if (vcpu_e500->shadow_pages[1] == NULL) - goto err_out_page0; + vcpu_e500->shadow_refs[1] = (struct tlbe_ref *) + kzalloc(sizeof(struct tlbe_ref) * KVM_E500_TLB1_SIZE, GFP_KERNEL); + if (vcpu_e500->shadow_refs[1] == NULL) + goto err_out_ref0; /* Init TLB configuration register */ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; @@ -740,14 +653,10 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) return 0; -err_out_page0: - kfree(vcpu_e500->shadow_pages[0]); -err_out_shadow1: - kfree(vcpu_e500->shadow_tlb[1]); +err_out_ref0: + kfree(vcpu_e500->shadow_refs[0]); err_out_guest1: kfree(vcpu_e500->guest_tlb[1]); -err_out_shadow0: - kfree(vcpu_e500->shadow_tlb[0]); err_out_guest0: kfree(vcpu_e500->guest_tlb[0]); err_out: @@ -756,10 +665,18 @@ err_out: void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { - kfree(vcpu_e500->shadow_pages[1]); - kfree(vcpu_e500->shadow_pages[0]); - kfree(vcpu_e500->shadow_tlb[1]); + int stlbsel, i; + + /* release all pages */ + for (stlbsel = 0; stlbsel < 2; stlbsel++) + for (i = 0; i < vcpu_e500->guest_tlb_size[stlbsel]; i++) + kvmppc_e500_shadow_release(vcpu_e500, stlbsel, i); + + /* discard all guest mapping */ + _tlbil_all(); + + kfree(vcpu_e500->shadow_refs[1]); + kfree(vcpu_e500->shadow_refs[0]); kfree(vcpu_e500->guest_tlb[1]); - kfree(vcpu_e500->shadow_tlb[0]); kfree(vcpu_e500->guest_tlb[0]); } -- 1.6.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html