Use RCU to safely walk the stage-2 page tables in parallel. Acquire and release the RCU read lock when traversing the page tables. Defer the freeing of table memory to an RCU callback. Indirect the calls into RCU and provide stubs for hypervisor code, as RCU is not available in such a context. The RCU protection doesn't amount to much at the moment, as readers are already protected by the read-write lock (all walkers that free table memory take the write lock). Nonetheless, a subsequent change will futher relax the locking requirements around the stage-2 MMU, thereby depending on RCU. Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx> --- arch/arm64/include/asm/kvm_pgtable.h | 49 ++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 10 +++++- arch/arm64/kvm/mmu.c | 14 +++++++- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index e70cf57b719e..7634b6964779 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + typedef kvm_pte_t *kvm_pteref_t; static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) @@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared return pteref; } +static inline void kvm_pgtable_walk_begin(void) {} +static inline void kvm_pgtable_walk_end(void) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) +{ + return rcu_dereference_check(pteref, !shared); +} + +static inline void kvm_pgtable_walk_begin(void) +{ + rcu_read_lock(); +} + +static inline void kvm_pgtable_walk_end(void) +{ + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + #define KVM_PTE_VALID BIT(0) #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) @@ -202,11 +243,14 @@ struct kvm_pgtable { * children. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their * children. + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared + * with other software walkers. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), + KVM_PGTABLE_WALK_SHARED = BIT(3), }; struct kvm_pgtable_visit_ctx { @@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx { typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit); +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx) +{ + return ctx->flags & KVM_PGTABLE_WALK_SHARED; +} + /** * struct kvm_pgtable_walker - Hook into a page-table walk. * @cb: Callback function to invoke during the walk. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index de8a2e1c7435..156754d87b2a 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_walker *walker = data->walker; + + /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */ + WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held()); return walker->cb(ctx, visit); } @@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, }; + int r; + + kvm_pgtable_walk_begin(); + r = _kvm_pgtable_walk(pgt, &walk_data); + kvm_pgtable_walk_end(); - return _kvm_pgtable_walk(pgt, &walk_data); + return r; } struct leaf_walk_data { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 73ae908eb5d9..52e042399ba5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head) +{ + struct page *page = container_of(head, struct page, rcu_head); + void *pgtable = page_to_virt(page); + u32 level = page_private(page); + + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level); +} + static void stage2_free_removed_table(void *addr, u32 level) { - kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level); + struct page *page = virt_to_page(addr); + + set_page_private(page, (unsigned long)level); + call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb); } static void kvm_host_get_page(void *addr) -- 2.38.1.431.g37b22c650d-goog