On most systems we need to emulate dcbz when running 32 bit guests. So far we've been rather slack, not giving correct DSISR values to the guest. This patch makes the emulation more accurate, introducing a difference between "page not mapped" and "write protection fault". While at it, it also speeds up dcbz emulation by an order of magnitude by using kmap. Signed-off-by: Alexander Graf <agraf@xxxxxxx> --- arch/powerpc/kvm/book3s.c | 56 +++++++++++++-------------------- arch/powerpc/kvm/book3s_64_emulate.c | 19 +++++++++-- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 7912d72..1a12ef2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -28,6 +28,7 @@ #include <asm/mmu_context.h> #include <linux/sched.h> #include <linux/vmalloc.h> +#include <linux/highmem.h> #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -368,34 +369,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) */ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { - bool touched = false; - hva_t hpage; + struct page *hpage; + u64 hpage_offset; u32 *page; int i; - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) + hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (is_error_page(hpage)) return; - hpage |= pte->raddr & ~PAGE_MASK; - hpage &= ~0xFFFULL; - - page = vmalloc(HW_PAGE_SIZE); - - if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) - goto out; + hpage_offset = pte->raddr & ~PAGE_MASK; + hpage_offset &= ~0xFFFULL; + hpage_offset /= 4; - for (i=0; i < HW_PAGE_SIZE / 4; i++) - if ((page[i] & 0xff0007ff) == INS_DCBZ) { - page[i] &= 0xfffffff7; // reserved instruction, so we trap - touched = true; - } + get_page(hpage); + page = kmap_atomic(hpage, KM_USER0); - if (touched) - copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); + /* patch dcbz into reserved instruction, so we trap */ + for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) + if ((page[i] & 0xff0007ff) == INS_DCBZ) + page[i] &= 0xfffffff7; -out: - vfree(page); + kunmap_atomic(page, KM_USER0); + put_page(hpage); } static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, @@ -448,30 +444,21 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; - hva_t hva = *eaddr; vcpu->stat.st++; if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) - goto nopte; + return -ENOENT; *eaddr = pte.raddr; - hva = kvmppc_pte_to_hva(vcpu, &pte, false); - if (kvm_is_error_hva(hva)) - goto mmio; + if (!pte.may_write) + return -EPERM; - if (copy_to_user((void __user *)hva, ptr, size)) { - printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); - goto mmio; - } + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; return EMULATE_DONE; - -nopte: - return -ENOENT; -mmio: - return EMULATE_DO_MMIO; } int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, @@ -786,6 +773,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * that no guest that needs the dcbz hack does NX. */ kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); + r = RESUME_GUEST; } else { vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c index 1e5cf8d..bbd1590 100644 --- a/arch/powerpc/kvm/book3s_64_emulate.c +++ b/arch/powerpc/kvm/book3s_64_emulate.c @@ -189,6 +189,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, ulong ra = 0; ulong addr, vaddr; u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + u32 dsisr; + int r; if (get_ra(inst)) ra = kvmppc_get_gpr(vcpu, get_ra(inst)); @@ -198,14 +200,23 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, addr &= 0xffffffff; vaddr = addr; - if (kvmppc_st(vcpu, &addr, 32, zeros, true)) { + r = kvmppc_st(vcpu, &addr, 32, zeros, true); + if ((r == -ENOENT) || (r == -EPERM)) { + *advance = 0; vcpu->arch.dear = vaddr; vcpu->arch.fault_dear = vaddr; - to_book3s(vcpu)->dsisr = DSISR_PROTFAULT | - DSISR_ISSTORE; + + dsisr = DSISR_ISSTORE; + if (r == -ENOENT) + dsisr |= DSISR_NOHPTE; + else if (r == -EPERM) + dsisr |= DSISR_PROTFAULT; + + to_book3s(vcpu)->dsisr = dsisr; + vcpu->arch.fault_dsisr = dsisr; + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); - kvmppc_mmu_pte_flush(vcpu, vaddr, ~0xFFFULL); } break; -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html