kvm_mmu_pte_write is too long, we split it for better readable Signed-off-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxx> --- arch/x86/kvm/mmu.c | 180 ++++++++++++++++++++++++++++++++-------------------- 1 files changed, 111 insertions(+), 69 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7629802..931c23a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3524,48 +3524,29 @@ static int get_free_pte_list_desc_nr(struct kvm_vcpu *vcpu) return mmu_memory_cache_free_object(cache); } -void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes, bool repeat_write) +static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, + const u8 *new, int *bytes) { - gfn_t gfn = gpa >> PAGE_SHIFT; - union kvm_mmu_page_role mask = { .word = 0 }; - struct kvm_mmu_page *sp; - struct hlist_node *node; - LIST_HEAD(invalid_list); - u64 entry, gentry, *spte; - unsigned pte_size, page_offset, misaligned, quadrant, offset; - int level, npte, r, flooded = 0; - bool remote_flush, local_flush, zap_page; - - /* - * If we don't have indirect shadow pages, it means no page is - * write-protected, so we can exit simply. - */ - if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) - return; - - zap_page = remote_flush = local_flush = false; - offset = offset_in_page(gpa); - - pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); + u64 gentry; + int r; /* * Assume that the pte write on a page table of the same type * as the current vcpu paging mode since we update the sptes only * when they have the same mode. */ - if (is_pae(vcpu) && bytes == 4) { + if (is_pae(vcpu) && *bytes == 4) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ - gpa &= ~(gpa_t)7; - bytes = 8; + *gpa &= ~(gpa_t)7; + *bytes = 8; - r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); + r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); if (r) gentry = 0; new = (const u8 *)&gentry; } - switch (bytes) { + switch (*bytes) { case 4: gentry = *(const u32 *)new; break; @@ -3577,66 +3558,127 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } - mmu_topup_memory_caches(vcpu); - spin_lock(&vcpu->kvm->mmu_lock); - ++vcpu->kvm->stat.mmu_pte_write; - trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); + return gentry; +} + +/* + * If we're seeing too many writes to a page, it may no longer be a page table, + * or we may be forking, in which case it is better to unmap the page. + */ +static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + bool flooded = false; + if (gfn == vcpu->arch.last_pt_write_gfn && !last_updated_pte_accessed(vcpu)) { ++vcpu->arch.last_pt_write_count; if (vcpu->arch.last_pt_write_count >= 3) - flooded = 1; + flooded = true; } else { vcpu->arch.last_pt_write_gfn = gfn; vcpu->arch.last_pt_write_count = 1; vcpu->arch.last_pte_updated = NULL; } + return flooded; +} + +/* + * Misaligned accesses are too much trouble to fix up; also, they usually + * indicate a page is not used as a page table. + */ +static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, + int bytes) +{ + unsigned offset, pte_size, misaligned; + + pgprintk("misaligned: gpa %llx bytes %d role %x\n", + gpa, bytes, sp->role.word); + + offset = offset_in_page(gpa); + pte_size = sp->role.cr4_pae ? 8 : 4; + misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); + misaligned |= bytes < 4; + + return misaligned; +} + +static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) +{ + unsigned page_offset, quadrant; + u64 *spte; + int level; + + page_offset = offset_in_page(gpa); + level = sp->role.level; + *nspte = 1; + if (!sp->role.cr4_pae) { + page_offset <<= 1; /* 32->64 */ + /* + * A 32-bit pde maps 4MB while the shadow pdes map + * only 2MB. So we need to double the offset again + * and zap two pdes instead of one. + */ + if (level == PT32_ROOT_LEVEL) { + page_offset &= ~7; /* kill rounding error */ + page_offset <<= 1; + *nspte = 2; + } + quadrant = page_offset >> PAGE_SHIFT; + page_offset &= ~PAGE_MASK; + if (quadrant != sp->role.quadrant) + return NULL; + } + + spte = &sp->spt[page_offset / sizeof(*spte)]; + return spte; +} + +void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes, bool repeat_write) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + union kvm_mmu_page_role mask = { .word = 0 }; + struct kvm_mmu_page *sp; + struct hlist_node *node; + LIST_HEAD(invalid_list); + u64 entry, gentry, *spte; + int npte; + bool remote_flush, local_flush, zap_page, flooded, misaligned; + + /* + * If we don't have indirect shadow pages, it means no page is + * write-protected, so we can exit simply. + */ + if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + return; + + zap_page = remote_flush = local_flush = false; + + pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); + + gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); + mmu_topup_memory_caches(vcpu); + spin_lock(&vcpu->kvm->mmu_lock); + ++vcpu->kvm->stat.mmu_pte_write; + trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); + + flooded = detect_write_flooding(vcpu, gfn); mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { - pte_size = sp->role.cr4_pae ? 8 : 4; - misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); - misaligned |= bytes < 4; + misaligned = detect_write_misaligned(sp, gpa, bytes); if (misaligned || flooded || repeat_write) { - /* - * Misaligned accesses are too much trouble to fix - * up; also, they usually indicate a page is not used - * as a page table. - * - * If we're seeing too many writes to a page, - * it may no longer be a page table, or we may be - * forking, in which case it is better to unmap the - * page. - */ - pgprintk("misaligned: gpa %llx bytes %d role %x\n", - gpa, bytes, sp->role.word); zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } - page_offset = offset; - level = sp->role.level; - npte = 1; - if (!sp->role.cr4_pae) { - page_offset <<= 1; /* 32->64 */ - /* - * A 32-bit pde maps 4MB while the shadow pdes map - * only 2MB. So we need to double the offset again - * and zap two pdes instead of one. - */ - if (level == PT32_ROOT_LEVEL) { - page_offset &= ~7; /* kill rounding error */ - page_offset <<= 1; - npte = 2; - } - quadrant = page_offset >> PAGE_SHIFT; - page_offset &= ~PAGE_MASK; - if (quadrant != sp->role.quadrant) - continue; - } + + spte = get_written_sptes(sp, gpa, &npte); + if (!spte) + continue; + local_flush = true; - spte = &sp->spt[page_offset / sizeof(*spte)]; while (npte--) { entry = *spte; mmu_page_zap_pte(vcpu->kvm, sp, spte); -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html