KVM's paravirt mmu pte batching has issues with, at least, kernel updates from DEBUG_PAGEALLOC. This has been experienced with slab allocation from irq context from within lazy mmu sections: https://bugzilla.redhat.com/show_bug.cgi?id=480822 DEBUG_PAGEALLOC will map/unmap the kernel pagetables to catch bad accesses, with code such as: __change_page_attr(): /* * Do we really change anything ? */ if (pte_val(old_pte) != pte_val(new_pte)) { set_pte_atomic(kpte, new_pte); cpa->flags |= CPA_FLUSHTLB; } A present->nonpresent update can be queued, but not yet committed to memory. So the set_pte_atomic will be skipped but the update flushed afterwards. set_pte_ATOMIC. Only allow batching from set_pte_at, which is the interesting case. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 478bca9..ba2086a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -86,7 +86,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len) state->mmu_queue_len += len; } -static void kvm_mmu_write(void *dest, u64 val) +static void kvm_mmu_write(void *dest, u64 val, bool batch) { __u64 pte_phys; struct kvm_mmu_op_write_pte wpte; @@ -107,6 +107,8 @@ static void kvm_mmu_write(void *dest, u64 val) wpte.pte_phys = pte_phys; kvm_deferred_mmu_op(&wpte, sizeof wpte); + if (!batch) + mmu_queue_flush(kvm_para_state()); } /* @@ -117,54 +119,54 @@ static void kvm_mmu_write(void *dest, u64 val) */ static void kvm_set_pte(pte_t *ptep, pte_t pte) { - kvm_mmu_write(ptep, pte_val(pte)); + kvm_mmu_write(ptep, pte_val(pte), false); } static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - kvm_mmu_write(ptep, pte_val(pte)); + kvm_mmu_write(ptep, pte_val(pte), true); } static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) { - kvm_mmu_write(pmdp, pmd_val(pmd)); + kvm_mmu_write(pmdp, pmd_val(pmd), false); } #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) { - kvm_mmu_write(ptep, pte_val(pte)); + kvm_mmu_write(ptep, pte_val(pte), false); } static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - kvm_mmu_write(ptep, pte_val(pte)); + kvm_mmu_write(ptep, pte_val(pte), false); } static void kvm_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - kvm_mmu_write(ptep, 0); + kvm_mmu_write(ptep, 0, false); } static void kvm_pmd_clear(pmd_t *pmdp) { - kvm_mmu_write(pmdp, 0); + kvm_mmu_write(pmdp, 0, false); } #endif static void kvm_set_pud(pud_t *pudp, pud_t pud) { - kvm_mmu_write(pudp, pud_val(pud)); + kvm_mmu_write(pudp, pud_val(pud), false); } #if PAGETABLE_LEVELS == 4 static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) { - kvm_mmu_write(pgdp, pgd_val(pgd)); + kvm_mmu_write(pgdp, pgd_val(pgd), false); } #endif #endif /* PAGETABLE_LEVELS >= 3 */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html