Instantiate read-only spte if host pte is read-only, and exit to userspace if guest attempts to write. With this in place userspace can mprotect(PROT_READ) guest memory and handle write attempts. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: kvm/arch/x86/kvm/mmu.c =================================================================== --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -2205,7 +2205,14 @@ static void direct_pte_prefetch(struct k __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, +static int kvm_report_unallowed_write(struct kvm_vcpu *vcpu, gpa_t gpa) +{ + vcpu->run->exit_reason = KVM_EXIT_UNALLOWED_WRITE; + vcpu->run->unallowed_write.gpa = gpa; + return -EPERM; +} + +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int dirty, int level, gfn_t gfn, pfn_t pfn) { struct kvm_shadow_walk_iterator iterator; @@ -2216,7 +2223,7 @@ static int __direct_map(struct kvm_vcpu for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, + 0, write, dirty, &pt_write, level, gfn, pfn, false, true); direct_pte_prefetch(vcpu, iterator.sptep); ++vcpu->stat.pf_fixed; @@ -2269,13 +2276,15 @@ static int kvm_handle_bad_page(struct kv return 1; } -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) +static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write) { int r; int level; int writable; + int dirty = 1; pfn_t pfn; unsigned long mmu_seq; + gfn_t gfn = v >> PAGE_SHIFT; level = mapping_level(vcpu, gfn); @@ -2293,14 +2302,22 @@ static int nonpaging_map(struct kvm_vcpu pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable); /* mmio */ - if (is_error_pfn(pfn) || !writable) + if (is_error_pfn(pfn)) return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); + if (!writable) { + if (write) { + kvm_release_pfn_clean(pfn); + return kvm_report_unallowed_write(vcpu, v); + } + /* instantiate read-only spte */ + dirty = 0; + } spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); - r = __direct_map(vcpu, v, write, level, gfn, pfn); + r = __direct_map(vcpu, v, write, dirty, level, gfn, pfn); spin_unlock(&vcpu->kvm->mmu_lock); @@ -2559,7 +2576,6 @@ static gpa_t nonpaging_gva_to_gpa_nested static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code) { - gfn_t gfn; int r; pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); @@ -2570,10 +2586,7 @@ static int nonpaging_page_fault(struct k ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); - gfn = gva >> PAGE_SHIFT; - - return nonpaging_map(vcpu, gva & PAGE_MASK, - error_code & PFERR_WRITE_MASK, gfn); + return nonpaging_map(vcpu, gva, error_code & PFERR_WRITE_MASK); } static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, @@ -2584,6 +2597,7 @@ static int tdp_page_fault(struct kvm_vcp int level; int writable; int write = error_code & PFERR_WRITE_MASK; + int dirty = 1; gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; @@ -2601,13 +2615,22 @@ static int tdp_page_fault(struct kvm_vcp mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable); - if (is_error_pfn(pfn) || !writable) + if (is_error_pfn(pfn)) return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); + if (!writable) { + if (write) { + kvm_release_pfn_clean(pfn); + return kvm_report_unallowed_write(vcpu, gpa); + } + /* instantiate read-only spte */ + dirty = 0; + } + spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); - r = __direct_map(vcpu, gpa, write, level, gfn, pfn); + r = __direct_map(vcpu, gpa, write, dirty, level, gfn, pfn); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -3261,8 +3284,11 @@ int kvm_mmu_page_fault(struct kvm_vcpu * enum emulation_result er; r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); - if (r < 0) + if (r < 0) { + if (r == -EPERM) /* unallowed write */ + r = 0; goto out; + } if (!r) { r = 1; Index: kvm/include/linux/kvm.h =================================================================== --- kvm.orig/include/linux/kvm.h +++ kvm/include/linux/kvm.h @@ -161,6 +161,7 @@ struct kvm_pit_config { #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 +#define KVM_EXIT_UNALLOWED_WRITE 19 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -264,6 +265,10 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + /* KVM_EXIT_UNALLOWED_WRITE */ + struct { + __u64 gpa; + } unallowed_write; /* Fix the size of the union. */ char padding[256]; }; Index: kvm/arch/x86/kvm/paging_tmpl.h =================================================================== --- kvm.orig/arch/x86/kvm/paging_tmpl.h +++ kvm/arch/x86/kvm/paging_tmpl.h @@ -542,6 +542,7 @@ static int FNAME(page_fault)(struct kvm_ int write_pt = 0; int r; pfn_t pfn; + gpa_t gpa; int level = PT_PAGE_TABLE_LEVEL; unsigned long mmu_seq; @@ -567,6 +568,9 @@ static int FNAME(page_fault)(struct kvm_ return 0; } + gpa = gfn_to_gpa(walker.gfn); + gpa |= addr & ~PAGE_MASK; + if (walker.level >= PT_DIRECTORY_LEVEL) { level = min(walker.level, mapping_level(vcpu, walker.gfn)); walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); @@ -577,8 +581,15 @@ static int FNAME(page_fault)(struct kvm_ pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &writable); /* mmio */ - if (is_error_pfn(pfn) || !writable) + if (is_error_pfn(pfn)) return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); + if (!writable) { + if (write_fault) { + kvm_release_pfn_clean(pfn); + return kvm_report_unallowed_write(vcpu, gpa); + } + walker.pte_access &= ~ACC_WRITE_MASK; + } spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html