Marcelo Tosatti wrote: > On Fri, Apr 23, 2010 at 01:58:22PM +0800, Gui Jianfeng wrote: >> Currently, in kvm_mmu_change_mmu_pages(kvm, page), "used_pages--" is performed after calling >> kvm_mmu_zap_page() in spite of that whether "page" is actually reclaimed. Because root sp won't >> be reclaimed by kvm_mmu_zap_page(). So making kvm_mmu_zap_page() return total number of reclaimed >> sp makes more sense. A new flag is put into kvm_mmu_zap_page() to indicate whether the top page is >> reclaimed. >> >> Signed-off-by: Gui Jianfeng <guijianfeng@xxxxxxxxxxxxxx> >> --- >> arch/x86/kvm/mmu.c | 53 +++++++++++++++++++++++++++++++++++---------------- >> 1 files changed, 36 insertions(+), 17 deletions(-) > > Gui, > > There will be only a few pinned roots, and there is no need for > kvm_mmu_change_mmu_pages to be precise at that level (pages will be > reclaimed through kvm_unmap_hva eventually). Hi Marcelo Actually, it doesn't only affect kvm_mmu_change_mmu_pages() but also affects kvm_mmu_remove_some_alloc_mmu_pages() which is called by mmu shrink routine. This will induce upper layer get a wrong number, so i think this should be fixed. Here is a updated version. --- From: Gui Jianfeng <guijianfeng@xxxxxxxxxxxxxx> Currently, in kvm_mmu_change_mmu_pages(kvm, page), "used_pages--" is performed after calling kvm_mmu_zap_page() in spite of that whether "page" is actually reclaimed. Because root sp won't be reclaimed by kvm_mmu_zap_page(). So making kvm_mmu_zap_page() return total number of reclaimed sp makes more sense. A new flag is put into kvm_mmu_zap_page() to indicate whether the top page is reclaimed. kvm_mmu_remove_some_alloc_mmu_pages() also rely on kvm_mmu_zap_page() to return a total relcaimed number. Signed-off-by: Gui Jianfeng <guijianfeng@xxxxxxxxxxxxxx> --- arch/x86/kvm/mmu.c | 53 +++++++++++++++++++++++++++++++++++---------------- 1 files changed, 36 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 51eb6d6..e545da8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1194,12 +1194,13 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) --kvm->stat.mmu_unsync; } -static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); +static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, + int *self_deleted); static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { if (sp->role.cr4_pae != !!is_pae(vcpu)) { - kvm_mmu_zap_page(vcpu->kvm, sp); + kvm_mmu_zap_page(vcpu->kvm, sp, NULL); return 1; } @@ -1207,7 +1208,7 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs(vcpu->kvm); kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { - kvm_mmu_zap_page(vcpu->kvm, sp); + kvm_mmu_zap_page(vcpu->kvm, sp, NULL); return 1; } @@ -1478,7 +1479,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp; for_each_sp(pages, sp, parents, i) { - kvm_mmu_zap_page(kvm, sp); + kvm_mmu_zap_page(kvm, sp, NULL); mmu_pages_clear_parents(&parents); zapped++; } @@ -1488,7 +1489,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, return zapped; } -static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) +static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, + int *self_deleted) { int ret; @@ -1505,11 +1507,16 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) if (!sp->root_count) { hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); + /* Count self */ + ret++; + if (self_deleted) + *self_deleted = 1; } else { sp->role.invalid = 1; list_move(&sp->link, &kvm->arch.active_mmu_pages); kvm_reload_remote_mmus(kvm); } + kvm_mmu_reset_last_pte_updated(kvm); return ret; } @@ -1538,8 +1545,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - used_pages -= kvm_mmu_zap_page(kvm, page); - used_pages--; + used_pages -= kvm_mmu_zap_page(kvm, page, NULL); } kvm_nr_mmu_pages = used_pages; kvm->arch.n_free_mmu_pages = 0; @@ -1558,6 +1564,8 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) struct kvm_mmu_page *sp; struct hlist_node *node, *n; int r; + int self_deleted = 0; + int ret; pgprintk("%s: looking for gfn %lx\n", __func__, gfn); r = 0; @@ -1569,7 +1577,8 @@ restart: pgprintk("%s: gfn %lx role %x\n", __func__, gfn, sp->role.word); r = 1; - if (kvm_mmu_zap_page(kvm, sp)) + ret = kvm_mmu_zap_page(kvm, sp, &self_deleted); + if (ret > 1 || (ret == 1 && self_deleted == 0)) goto restart; } return r; @@ -1581,6 +1590,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) struct hlist_head *bucket; struct kvm_mmu_page *sp; struct hlist_node *node, *nn; + int ret; + int self_deleted = 0; index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; @@ -1590,7 +1601,8 @@ restart: && !sp->role.invalid) { pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); - if (kvm_mmu_zap_page(kvm, sp)) + ret = kvm_mmu_zap_page(kvm, sp, &self_deleted); + if (ret > 1 || (ret == 1 && self_deleted == 0)) goto restart; } } @@ -2012,7 +2024,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) sp = page_header(root); --sp->root_count; if (!sp->root_count && sp->role.invalid) - kvm_mmu_zap_page(vcpu->kvm, sp); + kvm_mmu_zap_page(vcpu->kvm, sp, NULL); vcpu->arch.mmu.root_hpa = INVALID_PAGE; spin_unlock(&vcpu->kvm->mmu_lock); return; @@ -2025,7 +2037,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) sp = page_header(root); --sp->root_count; if (!sp->root_count && sp->role.invalid) - kvm_mmu_zap_page(vcpu->kvm, sp); + kvm_mmu_zap_page(vcpu->kvm, sp, NULL); } vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; } @@ -2604,6 +2616,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int npte; int r; int invlpg_counter; + int ret; + int self_deleted = 0; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); @@ -2682,7 +2696,9 @@ restart: */ pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, sp->role.word); - if (kvm_mmu_zap_page(vcpu->kvm, sp)) + + ret = kvm_mmu_zap_page(vcpu->kvm, sp, &self_deleted); + if (ret > 1 || (ret == 1 && self_deleted == 0)) goto restart; ++vcpu->kvm->stat.mmu_flooded; continue; @@ -2750,7 +2766,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - kvm_mmu_zap_page(vcpu->kvm, sp); + kvm_mmu_zap_page(vcpu->kvm, sp, NULL); ++vcpu->kvm->stat.mmu_recycled; } } @@ -2890,13 +2906,16 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) void kvm_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; + int ret; + int self_deleted = 0; spin_lock(&kvm->mmu_lock); restart: - list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) - if (kvm_mmu_zap_page(kvm, sp)) + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { + ret = kvm_mmu_zap_page(kvm, sp, &self_deleted); + if (ret > 1 || (ret == 1 && self_deleted == 0)) goto restart; - + } spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); @@ -2908,7 +2927,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - return kvm_mmu_zap_page(kvm, page) + 1; + return kvm_mmu_zap_page(kvm, page, NULL); } static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) -- 1.6.5.2 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html