gfn_to_pfn requires a writable host pte, failing otherwise. Change it to fallback to read-only "acquision', informing the callers. Hopefully the ptes are cache-hot so the overhead is minimal. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: kvm/arch/ia64/kvm/kvm-ia64.c =================================================================== --- kvm.orig/arch/ia64/kvm/kvm-ia64.c +++ kvm/arch/ia64/kvm/kvm-ia64.c @@ -1589,7 +1589,7 @@ int kvm_arch_prepare_memory_region(struc return -ENOMEM; for (i = 0; i < npages; i++) { - pfn = gfn_to_pfn(kvm, base_gfn + i); + pfn = gfn_to_pfn(kvm, base_gfn + i, NULL); if (!kvm_is_mmio_pfn(pfn)) { kvm_set_pmt_entry(kvm, base_gfn + i, pfn << PAGE_SHIFT, Index: kvm/arch/x86/kvm/mmu.c =================================================================== --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -2273,6 +2273,7 @@ static int nonpaging_map(struct kvm_vcpu { int r; int level; + int writable; pfn_t pfn; unsigned long mmu_seq; @@ -2289,10 +2290,10 @@ static int nonpaging_map(struct kvm_vcpu mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable); /* mmio */ - if (is_error_pfn(pfn)) + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); spin_lock(&vcpu->kvm->mmu_lock); @@ -2581,6 +2582,8 @@ static int tdp_page_fault(struct kvm_vcp pfn_t pfn; int r; int level; + int writable; + int write = error_code & PFERR_WRITE_MASK; gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; @@ -2597,15 +2600,14 @@ static int tdp_page_fault(struct kvm_vcp mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu->kvm, gfn); - if (is_error_pfn(pfn)) + pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable); + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); - r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, - level, gfn, pfn); + r = __direct_map(vcpu, gpa, write, level, gfn, pfn); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -3043,6 +3045,7 @@ static void mmu_guess_page_from_pte_writ { gfn_t gfn; pfn_t pfn; + int writable; if (!is_present_gpte(gpte)) return; @@ -3050,9 +3053,9 @@ static void mmu_guess_page_from_pte_writ vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable); - if (is_error_pfn(pfn)) { + if (is_error_pfn(pfn) || !writable) { kvm_release_pfn_clean(pfn); return; } Index: kvm/arch/x86/kvm/paging_tmpl.h =================================================================== --- kvm.orig/arch/x86/kvm/paging_tmpl.h +++ kvm/arch/x86/kvm/paging_tmpl.h @@ -536,6 +536,7 @@ static int FNAME(page_fault)(struct kvm_ int write_fault = error_code & PFERR_WRITE_MASK; int user_fault = error_code & PFERR_USER_MASK; int fetch_fault = error_code & PFERR_FETCH_MASK; + int writable; struct guest_walker walker; u64 *sptep; int write_pt = 0; @@ -573,10 +574,10 @@ static int FNAME(page_fault)(struct kvm_ mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); + pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &writable); /* mmio */ - if (is_error_pfn(pfn)) + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); spin_lock(&vcpu->kvm->mmu_lock); Index: kvm/include/linux/kvm_host.h =================================================================== --- kvm.orig/include/linux/kvm_host.h +++ kvm/include/linux/kvm_host.h @@ -302,7 +302,7 @@ void kvm_set_page_accessed(struct page * pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *writable); pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); int memslot_id(struct kvm *kvm, gfn_t gfn); Index: kvm/virt/kvm/kvm_main.c =================================================================== --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -948,17 +948,28 @@ unsigned long gfn_to_hva(struct kvm *kvm } EXPORT_SYMBOL_GPL(gfn_to_hva); -static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic) +static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, + int *writable) { struct page *page[1]; int npages; pfn_t pfn; + if (writable) + *writable = 1; + if (atomic) npages = __get_user_pages_fast(addr, 1, 1, page); else { might_sleep(); npages = get_user_pages_fast(addr, 1, 1, page); + + /* attempt to map read-only */ + if (unlikely(npages != 1) && writable) { + npages = get_user_pages_fast(addr, 1, 0, page); + if (npages == 1) + *writable = 0; + } } if (unlikely(npages != 1)) { @@ -995,11 +1006,11 @@ return_fault_page: pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) { - return hva_to_pfn(kvm, addr, true); + return hva_to_pfn(kvm, addr, true, NULL); } EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); -static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic) +static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, int *writable) { unsigned long addr; @@ -1009,18 +1020,18 @@ static pfn_t __gfn_to_pfn(struct kvm *kv return page_to_pfn(bad_page); } - return hva_to_pfn(kvm, addr, atomic); + return hva_to_pfn(kvm, addr, atomic, writable); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) { - return __gfn_to_pfn(kvm, gfn, true); + return __gfn_to_pfn(kvm, gfn, true, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *writable) { - return __gfn_to_pfn(kvm, gfn, false); + return __gfn_to_pfn(kvm, gfn, false, writable); } EXPORT_SYMBOL_GPL(gfn_to_pfn); @@ -1028,7 +1039,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(kvm, addr, false); + return hva_to_pfn(kvm, addr, false, NULL); } int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, @@ -1052,7 +1063,7 @@ struct page *gfn_to_page(struct kvm *kvm { pfn_t pfn; - pfn = gfn_to_pfn(kvm, gfn); + pfn = gfn_to_pfn(kvm, gfn, NULL); if (!kvm_is_mmio_pfn(pfn)) return pfn_to_page(pfn); Index: kvm/arch/powerpc/kvm/book3s.c =================================================================== --- kvm.orig/arch/powerpc/kvm/book3s.c +++ kvm/arch/powerpc/kvm/book3s.c @@ -468,7 +468,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu return pfn; } - return gfn_to_pfn(vcpu->kvm, gfn); + return gfn_to_pfn(vcpu->kvm, gfn, NULL); } /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html