[RFC PATCH 2/2] kvm, x86: use ro page and don't copy shared page

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When page fault, we always call get_user_pages(write=1).

Actually, we don't need to do this when it is not write fault.
get_user_pages(write=1) will cause shared page(ksm) copied.
If this page is not modified in future, this copying and the copied page
are just wasted. Ksm may scan and merge them and may cause thrash.

This patch is not for inclusion, because I know nothing about mmio
and this patch includes a "workaround" which ensures mmio pfns
are always writable in tdp_page_fault().
The guest can't even boot up without this workaround.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
---
 mmu.c         |   21 ++++++++++++++-------
 paging_tmpl.h |   15 +++++++--------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b9b1c3..1e0826d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1902,7 +1902,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int level,
 		    gfn_t gfn, pfn_t pfn, bool speculative,
-		    bool can_unsync, bool reset_host_protection)
+		    bool can_unsync, bool host_writable)
 {
 	u64 spte;
 	int ret = 0;
@@ -1929,8 +1929,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
 			kvm_is_mmio_pfn(pfn));
 
-	if (reset_host_protection)
+	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
+	else
+		pte_access &= ~ACC_WRITE_MASK;
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
@@ -1983,7 +1985,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 			 int user_fault, int write_fault, int dirty,
 			 int *ptwrite, int level, gfn_t gfn,
 			 pfn_t pfn, bool speculative,
-			 bool reset_host_protection)
+			 bool host_writable)
 {
 	int was_rmapped = 0;
 	int was_writable = is_writable_pte(*sptep);
@@ -2018,7 +2020,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 	if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
 		      dirty, level, gfn, pfn, speculative, true,
-		      reset_host_protection)) {
+		      host_writable)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -2066,7 +2068,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 		if (iterator.level == level) {
 			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
 				     0, write, 1, &pt_write,
-				     level, gfn, pfn, false, true);
+				     level, gfn, pfn, false, write);
 			++vcpu->stat.pf_fixed;
 			break;
 		}
@@ -2135,7 +2137,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
+	pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, write);
 
 	/* mmio */
 	if (is_error_pfn(pfn))
@@ -2357,7 +2359,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
+	pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, error_code & PFERR_WRITE_MASK);
+	if (!(error_code & PFERR_WRITE_MASK) && kvm_is_mmio_pfn(pfn)) {
+		kvm_release_pfn_clean(pfn);
+		/* I don't know why we have to ensure mmio pfns are always writable. */
+		pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
+	}
 	if (is_error_pfn(pfn))
 		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
 	spin_lock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fddb726..e0cb33a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -283,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 		return;
 	kvm_get_pfn(pfn);
 	/*
-	 * we call mmu_set_spte() with reset_host_protection = true beacuse that
+	 * we call mmu_set_spte() with host_writable = true beacuse that
 	 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
 	 */
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
@@ -321,7 +321,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 				     user_fault, write_fault,
 				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
 				     ptwrite, level,
-				     gw->gfn, pfn, false, true);
+				     gw->gfn, pfn, false, write_fault);
 			break;
 		}
 
@@ -430,7 +430,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = kvm_get_pfn_for_gfn(vcpu->kvm, walker.gfn, 1);
+	pfn = kvm_get_pfn_for_gfn(vcpu->kvm, walker.gfn, write_fault);
 
 	/* mmio */
 	if (is_error_pfn(pfn))
@@ -579,7 +579,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
 	int i, offset, nr_present;
-	bool reset_host_protection;
+	bool host_writable;
 	gpa_t first_pte_gpa;
 
 	offset = nr_present = 0;
@@ -623,15 +623,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
-			pte_access &= ~ACC_WRITE_MASK;
-			reset_host_protection = 0;
+			host_writable = false;
 		} else {
-			reset_host_protection = 1;
+			host_writable = true;
 		}
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
 			 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false,
-			 reset_host_protection);
+			 host_writable);
 	}
 
 	return !nr_present;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux