On Fri, Sep 21, 2018 at 08:01:48PM +1000, Paul Mackerras wrote: > From: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> > > The radix page fault handler accounts for all cases, including just > needing to insert a pte. This breaks it up into separate functions for > the two main cases; setting rc and inserting a pte. > > This allows us to make the setting of rc and inserting of a pte > generic for any pgtable, not specific to the one for this guest. > > [paulus@xxxxxxxxxx - reduced diffs from previous code] > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> > Signed-off-by: Paul Mackerras <paulus@xxxxxxxxxx> Reviewed-by: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx> > --- > arch/powerpc/kvm/book3s_64_mmu_radix.c | 210 +++++++++++++++++++-------------- > 1 file changed, 123 insertions(+), 87 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c > index f2976f4..47f2b18 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c > @@ -400,8 +400,9 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, > */ > #define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)) > > -static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, > - unsigned int level, unsigned long mmu_seq) > +static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > + unsigned long gpa, unsigned int level, > + unsigned long mmu_seq) > { > pgd_t *pgd; > pud_t *pud, *new_pud = NULL; > @@ -410,7 +411,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, > int ret; > > /* Traverse the guest's 2nd-level tree, allocate new levels needed */ > - pgd = kvm->arch.pgtable + pgd_index(gpa); > + pgd = pgtable + pgd_index(gpa); > pud = NULL; > if (pgd_present(*pgd)) > pud = pud_offset(pgd, gpa); > @@ -565,95 +566,49 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, > return ret; > } > > -int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, > - unsigned long ea, unsigned long dsisr) > +static bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, > + bool writing, unsigned long gpa) > +{ > + unsigned long pgflags; > + unsigned int shift; > + pte_t *ptep; > + > + /* > + * Need to set an R or C bit in the 2nd-level tables; > + * since we are just helping out the hardware here, > + * it is sufficient to do what the hardware does. > + */ > + pgflags = _PAGE_ACCESSED; > + if (writing) > + pgflags |= _PAGE_DIRTY; > + /* > + * We are walking the secondary (partition-scoped) page table here. > + * We can do this without disabling irq because the Linux MM > + * subsystem doesn't do THP splits and collapses on this tree. > + */ > + ptep = __find_linux_pte(pgtable, gpa, NULL, &shift); > + if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) { > + kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); > + return true; > + } > + return false; > +} > + > +static int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, > + unsigned long gpa, > + struct kvm_memory_slot *memslot, > + bool writing, bool kvm_ro, > + pte_t *inserted_pte, unsigned int *levelp) > { > struct kvm *kvm = vcpu->kvm; > - unsigned long mmu_seq; > - unsigned long gpa, gfn, hva; > - struct kvm_memory_slot *memslot; > struct page *page = NULL; > - long ret; > - bool writing; > + unsigned long mmu_seq; > + unsigned long hva, gfn = gpa >> PAGE_SHIFT; > bool upgrade_write = false; > bool *upgrade_p = &upgrade_write; > pte_t pte, *ptep; > - unsigned long pgflags; > unsigned int shift, level; > - > - /* Check for unusual errors */ > - if (dsisr & DSISR_UNSUPP_MMU) { > - pr_err("KVM: Got unsupported MMU fault\n"); > - return -EFAULT; > - } > - if (dsisr & DSISR_BADACCESS) { > - /* Reflect to the guest as DSI */ > - pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr); > - kvmppc_core_queue_data_storage(vcpu, ea, dsisr); > - return RESUME_GUEST; > - } > - > - /* Translate the logical address and get the page */ > - gpa = vcpu->arch.fault_gpa & ~0xfffUL; > - gpa &= ~0xF000000000000000ul; > - gfn = gpa >> PAGE_SHIFT; > - if (!(dsisr & DSISR_PRTABLE_FAULT)) > - gpa |= ea & 0xfff; > - memslot = gfn_to_memslot(kvm, gfn); > - > - /* No memslot means it's an emulated MMIO region */ > - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { > - if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS | > - DSISR_SET_RC)) { > - /* > - * Bad address in guest page table tree, or other > - * unusual error - reflect it to the guest as DSI. > - */ > - kvmppc_core_queue_data_storage(vcpu, ea, dsisr); > - return RESUME_GUEST; > - } > - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, > - dsisr & DSISR_ISSTORE); > - } > - > - writing = (dsisr & DSISR_ISSTORE) != 0; > - if (memslot->flags & KVM_MEM_READONLY) { > - if (writing) { > - /* give the guest a DSI */ > - dsisr = DSISR_ISSTORE | DSISR_PROTFAULT; > - kvmppc_core_queue_data_storage(vcpu, ea, dsisr); > - return RESUME_GUEST; > - } > - upgrade_p = NULL; > - } > - > - if (dsisr & DSISR_SET_RC) { > - /* > - * Need to set an R or C bit in the 2nd-level tables; > - * since we are just helping out the hardware here, > - * it is sufficient to do what the hardware does. > - */ > - pgflags = _PAGE_ACCESSED; > - if (writing) > - pgflags |= _PAGE_DIRTY; > - /* > - * We are walking the secondary page table here. We can do this > - * without disabling irq. > - */ > - spin_lock(&kvm->mmu_lock); > - ptep = __find_linux_pte(kvm->arch.pgtable, > - gpa, NULL, &shift); > - if (ptep && pte_present(*ptep) && > - (!writing || pte_write(*ptep))) { > - kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, > - gpa, shift); > - dsisr &= ~DSISR_SET_RC; > - } > - spin_unlock(&kvm->mmu_lock); > - if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE | > - DSISR_PROTFAULT | DSISR_SET_RC))) > - return RESUME_GUEST; > - } > + int ret; > > /* used to check for invalidations in progress */ > mmu_seq = kvm->mmu_notifier_seq; > @@ -666,7 +621,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, > * is that the page is writable. > */ > hva = gfn_to_hva_memslot(memslot, gfn); > - if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { > + if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) { > upgrade_write = true; > } else { > unsigned long pfn; > @@ -724,7 +679,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, > } > > /* Allocate space in the tree and write the PTE */ > - ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); > + ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level, > + mmu_seq); > + if (inserted_pte) > + *inserted_pte = pte; > + if (levelp) > + *levelp = level; > > if (page) { > if (!ret && (pte_val(pte) & _PAGE_WRITE)) > @@ -732,6 +692,82 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, > put_page(page); > } > > + return ret; > +} > + > +int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, > + unsigned long ea, unsigned long dsisr) > +{ > + struct kvm *kvm = vcpu->kvm; > + unsigned long gpa, gfn; > + struct kvm_memory_slot *memslot; > + long ret; > + bool writing = !!(dsisr & DSISR_ISSTORE); > + bool kvm_ro = false; > + > + /* Check for unusual errors */ > + if (dsisr & DSISR_UNSUPP_MMU) { > + pr_err("KVM: Got unsupported MMU fault\n"); > + return -EFAULT; > + } > + if (dsisr & DSISR_BADACCESS) { > + /* Reflect to the guest as DSI */ > + pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr); > + kvmppc_core_queue_data_storage(vcpu, ea, dsisr); > + return RESUME_GUEST; > + } > + > + /* Translate the logical address */ > + gpa = vcpu->arch.fault_gpa & ~0xfffUL; > + gpa &= ~0xF000000000000000ul; > + gfn = gpa >> PAGE_SHIFT; > + if (!(dsisr & DSISR_PRTABLE_FAULT)) > + gpa |= ea & 0xfff; > + > + /* Get the corresponding memslot */ > + memslot = gfn_to_memslot(kvm, gfn); > + > + /* No memslot means it's an emulated MMIO region */ > + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { > + if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS | > + DSISR_SET_RC)) { > + /* > + * Bad address in guest page table tree, or other > + * unusual error - reflect it to the guest as DSI. > + */ > + kvmppc_core_queue_data_storage(vcpu, ea, dsisr); > + return RESUME_GUEST; > + } > + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); > + } > + > + if (memslot->flags & KVM_MEM_READONLY) { > + if (writing) { > + /* give the guest a DSI */ > + kvmppc_core_queue_data_storage(vcpu, ea, DSISR_ISSTORE | > + DSISR_PROTFAULT); > + return RESUME_GUEST; > + } > + kvm_ro = true; > + } > + > + /* Failed to set the reference/change bits */ > + if (dsisr & DSISR_SET_RC) { > + spin_lock(&kvm->mmu_lock); > + if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, > + writing, gpa)) > + dsisr &= ~DSISR_SET_RC; > + spin_unlock(&kvm->mmu_lock); > + > + if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE | > + DSISR_PROTFAULT | DSISR_SET_RC))) > + return RESUME_GUEST; > + } > + > + /* Try to insert a pte */ > + ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing, > + kvm_ro, NULL, NULL); > + > if (ret == 0 || ret == -EAGAIN) > ret = RESUME_GUEST; > return ret; -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
Attachment:
signature.asc
Description: PGP signature