There is no real urgency to free a stage-2 subtree that was pruned. Nonetheless, KVM does the tear down in the stage-2 fault path while holding the MMU lock. Free removed stage-2 subtrees after an RCU grace period. To guarantee all stage-2 table pages are freed before killing a VM, add an rcu_barrier() to the flush path. Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx> --- arch/arm64/include/asm/kvm_pgtable.h | 2 +- arch/arm64/kvm/mmu.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d1859e8550df..f35e7d4d73f1 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -65,7 +65,7 @@ typedef kvm_pte_t __rcu *kvm_pteref_t; static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) { - return rcu_dereference_check(pteref, shared); + return rcu_dereference_check(pteref, !shared); } static inline void kvm_pgtable_walk_begin(void) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 175a0e9a04b6..a1a623c5b069 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head) +{ + struct page *page = container_of(head, struct page, rcu_head); + void *pgtable = page_to_virt(page); + u32 level = page_private(page); + + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level); +} + static void stage2_free_removed_table(void *addr, u32 level) { - kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level); + struct page *page = virt_to_page(addr); + + set_page_private(page, (unsigned long)level); + call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb); } static void kvm_host_get_page(void *addr) -- 2.38.1.273.g43a17bfeac-goog