The patch titled KVM: MMU: oom handling has been added to the -mm tree. Its filename is kvm-mmu-oom-handling.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: KVM: MMU: oom handling From: Avi Kivity <avi@xxxxxxxxxxxx> When beginning to process a page fault, make sure we have enough shadow pages available to service the fault. If not, free some pages. Signed-off-by: Avi Kivity <avi@xxxxxxxxxxxx> Acked-by: Ingo Molnar <mingo@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- drivers/kvm/kvm.h | 12 ++++++++ drivers/kvm/mmu.c | 49 +++++++++++++++--------------------- drivers/kvm/paging_tmpl.h | 15 +---------- drivers/kvm/svm.c | 2 - drivers/kvm/vmx.c | 2 - 5 files changed, 37 insertions(+), 43 deletions(-) diff -puN drivers/kvm/kvm.h~kvm-mmu-oom-handling drivers/kvm/kvm.h --- a/drivers/kvm/kvm.h~kvm-mmu-oom-handling +++ a/drivers/kvm/kvm.h @@ -52,6 +52,8 @@ #define KVM_MAX_VCPUS 1 #define KVM_MEMORY_SLOTS 4 #define KVM_NUM_MMU_PAGES 256 +#define KVM_MIN_FREE_MMU_PAGES 5 +#define KVM_REFILL_PAGES 25 #define FX_IMAGE_SIZE 512 #define FX_IMAGE_ALIGN 16 @@ -278,6 +280,7 @@ struct kvm { * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; + int n_free_mmu_pages; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; int memory_config_version; @@ -451,6 +454,15 @@ unsigned long segment_base(u16 selector) void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); + +static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, + u32 error_code) +{ + if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) + kvm_mmu_free_some_pages(vcpu); + return vcpu->mmu.page_fault(vcpu, gva, error_code); +} static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn) { diff -puN drivers/kvm/mmu.c~kvm-mmu-oom-handling drivers/kvm/mmu.c --- a/drivers/kvm/mmu.c~kvm-mmu-oom-handling +++ a/drivers/kvm/mmu.c @@ -310,6 +310,7 @@ static void kvm_mmu_free_page(struct kvm list_del(&page_head->link); page_head->page_hpa = page_hpa; list_add(&page_head->link, &vcpu->free_pages); + ++vcpu->kvm->n_free_mmu_pages; } static int is_empty_shadow_page(hpa_t page_hpa) @@ -344,6 +345,7 @@ static struct kvm_mmu_page *kvm_mmu_allo page->global = 1; page->multimapped = 0; page->parent_pte = parent_pte; + --vcpu->kvm->n_free_mmu_pages; return page; } @@ -544,8 +546,7 @@ static void kvm_mmu_zap_page(struct kvm_ } kvm_mmu_page_unlink_children(vcpu, page); hlist_del(&page->hash_link); - list_del(&page->link); - list_add(&page->link, &vcpu->free_pages); + kvm_mmu_free_page(vcpu, page->page_hpa); } static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -743,18 +744,6 @@ static void mmu_alloc_roots(struct kvm_v vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root); } -static void nonpaging_flush(struct kvm_vcpu *vcpu) -{ - hpa_t root = vcpu->mmu.root_hpa; - - ++kvm_stat.tlb_flush; - pgprintk("nonpaging_flush\n"); - mmu_free_roots(vcpu); - mmu_alloc_roots(vcpu); - kvm_arch_ops->set_cr3(vcpu, root); - kvm_arch_ops->tlb_flush(vcpu); -} - static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) { return vaddr; @@ -763,28 +752,19 @@ static gpa_t nonpaging_gva_to_gpa(struct static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code) { - int ret; gpa_t addr = gva; + hpa_t paddr; ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); - for (;;) { - hpa_t paddr; - paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); + paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); - if (is_error_hpa(paddr)) - return 1; + if (is_error_hpa(paddr)) + return 1; - ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr); - if (ret) { - nonpaging_flush(vcpu); - continue; - } - break; - } - return ret; + return nonpaging_map(vcpu, addr & PAGE_MASK, paddr); } static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) @@ -1093,6 +1073,18 @@ int kvm_mmu_unprotect_page_virt(struct k return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT); } +void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) +{ + while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) { + struct kvm_mmu_page *page; + + page = container_of(vcpu->kvm->active_mmu_pages.prev, + struct kvm_mmu_page, link); + kvm_mmu_zap_page(vcpu, page); + } +} +EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages); + static void free_mmu_pages(struct kvm_vcpu *vcpu) { while (!list_empty(&vcpu->free_pages)) { @@ -1124,6 +1116,7 @@ static int alloc_mmu_pages(struct kvm_vc page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; memset(__va(page_header->page_hpa), 0, PAGE_SIZE); list_add(&page_header->link, &vcpu->free_pages); + ++vcpu->kvm->n_free_mmu_pages; } /* diff -puN drivers/kvm/paging_tmpl.h~kvm-mmu-oom-handling drivers/kvm/paging_tmpl.h --- a/drivers/kvm/paging_tmpl.h~kvm-mmu-oom-handling +++ a/drivers/kvm/paging_tmpl.h @@ -246,8 +246,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu } shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, metaphysical, shadow_ent); - if (!shadow_page) - return ERR_PTR(-ENOMEM); shadow_addr = shadow_page->page_hpa; shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; @@ -347,17 +345,8 @@ static int FNAME(page_fault)(struct kvm_ /* * Look up the shadow pte for the faulting address. */ - for (;;) { - FNAME(walk_addr)(&walker, vcpu, addr); - shadow_pte = FNAME(fetch)(vcpu, addr, &walker); - if (IS_ERR(shadow_pte)) { /* must be -ENOMEM */ - printk("%s: oom\n", __FUNCTION__); - nonpaging_flush(vcpu); - FNAME(release_walker)(&walker); - continue; - } - break; - } + FNAME(walk_addr)(&walker, vcpu, addr); + shadow_pte = FNAME(fetch)(vcpu, addr, &walker); /* * The page is not mapped by the guest. Let the guest handle it. diff -puN drivers/kvm/svm.c~kvm-mmu-oom-handling drivers/kvm/svm.c --- a/drivers/kvm/svm.c~kvm-mmu-oom-handling +++ a/drivers/kvm/svm.c @@ -861,7 +861,7 @@ static int pf_interception(struct kvm_vc fault_address = vcpu->svm->vmcb->control.exit_info_2; error_code = vcpu->svm->vmcb->control.exit_info_1; - if (!vcpu->mmu.page_fault(vcpu, fault_address, error_code)) { + if (!kvm_mmu_page_fault(vcpu, fault_address, error_code)) { spin_unlock(&vcpu->kvm->lock); return 1; } diff -puN drivers/kvm/vmx.c~kvm-mmu-oom-handling drivers/kvm/vmx.c --- a/drivers/kvm/vmx.c~kvm-mmu-oom-handling +++ a/drivers/kvm/vmx.c @@ -1318,7 +1318,7 @@ static int handle_exception(struct kvm_v cr2 = vmcs_readl(EXIT_QUALIFICATION); spin_lock(&vcpu->kvm->lock); - if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) { + if (!kvm_mmu_page_fault(vcpu, cr2, error_code)) { spin_unlock(&vcpu->kvm->lock); return 1; } _ Patches currently in -mm which might be from avi@xxxxxxxxxxxx are kvm-fix-gfp_kernel-alloc-in-atomic-section-bug.patch kvm-use-raw_smp_processor_id-instead-of-smp_processor_id-where-applicable.patch kvm-recover-after-an-arch-module-load-failure.patch kvm-improve-interrupt-response.patch kvm-prevent-stale-bits-in-cr0-and-cr4.patch kvm-mmu-implement-simple-reverse-mapping.patch kvm-mmu-teach-the-page-table-walker-to-track-guest-page-table-gfns.patch kvm-mmu-load-the-pae-pdptrs-on-cr3-change-like-the-processor-does.patch kvm-mmu-fold-fetch_guest-into-init_walker.patch kvm-mu-special-treatment-for-shadow-pae-root-pages.patch kvm-mmu-use-the-guest-pdptrs-instead-of-mapping-cr3-in-pae-mode.patch kvm-mmu-make-the-shadow-page-tables-also-special-case-pae.patch kvm-mmu-make-kvm_mmu_alloc_page-return-a-kvm_mmu_page-pointer.patch kvm-mmu-shadow-page-table-caching.patch kvm-mmu-write-protect-guest-pages-when-a-shadow-is-created-for-them.patch kvm-mmu-let-the-walker-extract-the-target-page-gfn-from-the-pte.patch kvm-mmu-support-emulated-writes-into-ram.patch kvm-mmu-zap-shadow-page-table-entries-on-writes-to-guest-page-tables.patch kvm-mmu-if-emulating-an-instruction-fails-try-unprotecting-the-page.patch kvm-mmu-implement-child-shadow-unlinking.patch kvm-mmu-kvm_mmu_put_page-only-removes-one-link-to-the-page.patch kvm-mmu-oom-handling.patch kvm-mmu-remove-invlpg-interception.patch kvm-mmu-remove-release_pt_page_64.patch kvm-mmu-handle-misaligned-accesses-to-write-protected-guest-page-tables.patch kvm-mmu-ove-is_empty_shadow_page-above-kvm_mmu_free_page.patch kvm-mmu-ensure-freed-shadow-pages-are-clean.patch kvm-mmu-if-an-empty-shadow-page-is-not-empty-report-more-info.patch kvm-mmu-page-table-write-flood-protection.patch kvm-mmu-never-free-a-shadow-page-actively-serving-as-a-root.patch kvm-mmu-fix-cmpxchg8b-emulation.patch kvm-mmu-treat-user-mode-faults-as-a-hint-that-a-page-is-no-longer-a-page-table.patch kvm-mmu-free-pages-on-kvm-destruction.patch kvm-mmu-replace-atomic-allocations-by-preallocated-objects.patch kvm-mmu-detect-oom-conditions-and-propagate-error-to-userspace.patch kvm-mmu-flush-guest-tlb-when-reducing-permissions-on-a-pte.patch kvm-mmu-destroy-mmu-while-we-still-have-a-vcpu-left.patch kvm-mmu-add-audit-code-to-check-mappings-etc-are-correct.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html