On Mon, Nov 7, 2022 at 1:57 PM Oliver Upton <oliver.upton@xxxxxxxxx> wrote: > > Use RCU to safely walk the stage-2 page tables in parallel. Acquire and > release the RCU read lock when traversing the page tables. Defer the > freeing of table memory to an RCU callback. Indirect the calls into RCU > and provide stubs for hypervisor code, as RCU is not available in such a > context. > > The RCU protection doesn't amount to much at the moment, as readers are > already protected by the read-write lock (all walkers that free table > memory take the write lock). Nonetheless, a subsequent change will > futher relax the locking requirements around the stage-2 MMU, thereby > depending on RCU. > > Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx> > --- > arch/arm64/include/asm/kvm_pgtable.h | 49 ++++++++++++++++++++++++++++ > arch/arm64/kvm/hyp/pgtable.c | 10 +++++- > arch/arm64/kvm/mmu.c | 14 +++++++- > 3 files changed, 71 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h > index e70cf57b719e..7634b6964779 100644 > --- a/arch/arm64/include/asm/kvm_pgtable.h > +++ b/arch/arm64/include/asm/kvm_pgtable.h > @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0) > > typedef u64 kvm_pte_t; > > +/* > + * RCU cannot be used in a non-kernel context such as the hyp. As such, page > + * table walkers used in hyp do not call into RCU and instead use other > + * synchronization mechanisms (such as a spinlock). > + */ > +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) > + > typedef kvm_pte_t *kvm_pteref_t; > > static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) > @@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared > return pteref; > } > > +static inline void kvm_pgtable_walk_begin(void) {} > +static inline void kvm_pgtable_walk_end(void) {} > + > +static inline bool kvm_pgtable_walk_lock_held(void) > +{ > + return true; Forgive my ignorance, but does hyp not use a MMU lock at all? Seems like this would be a good place to add a lockdep check. > +} > + > +#else > + > +typedef kvm_pte_t __rcu *kvm_pteref_t; > + > +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared) > +{ > + return rcu_dereference_check(pteref, !shared); Same here, could add a lockdep check depending on shared. > +} > + > +static inline void kvm_pgtable_walk_begin(void) > +{ > + rcu_read_lock(); > +} > + > +static inline void kvm_pgtable_walk_end(void) > +{ > + rcu_read_unlock(); > +} > + > +static inline bool kvm_pgtable_walk_lock_held(void) > +{ > + return rcu_read_lock_held(); Likewise could do some lockdep here. > +} > + > +#endif > + > #define KVM_PTE_VALID BIT(0) > > #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) > @@ -202,11 +243,14 @@ struct kvm_pgtable { > * children. > * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their > * children. > + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared > + * with other software walkers. > */ > enum kvm_pgtable_walk_flags { > KVM_PGTABLE_WALK_LEAF = BIT(0), > KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), > KVM_PGTABLE_WALK_TABLE_POST = BIT(2), > + KVM_PGTABLE_WALK_SHARED = BIT(3), Not sure if necessary, but it might pay to have 3 shared options: exclusive, shared mmu lock, no mmu lock if we ever want lockless fast page faults. > }; > > struct kvm_pgtable_visit_ctx { > @@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx { > typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, > enum kvm_pgtable_walk_flags visit); > > +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx) > +{ > + return ctx->flags & KVM_PGTABLE_WALK_SHARED; > +} > + > /** > * struct kvm_pgtable_walker - Hook into a page-table walk. > * @cb: Callback function to invoke during the walk. > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c > index 7c9782347570..d8d963521d4e 100644 > --- a/arch/arm64/kvm/hyp/pgtable.c > +++ b/arch/arm64/kvm/hyp/pgtable.c > @@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, > enum kvm_pgtable_walk_flags visit) > { > struct kvm_pgtable_walker *walker = data->walker; > + > + /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */ > + WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held()); > return walker->cb(ctx, visit); > } > > @@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, > .end = PAGE_ALIGN(walk_data.addr + size), > .walker = walker, > }; > + int r; > + > + kvm_pgtable_walk_begin(); > + r = _kvm_pgtable_walk(pgt, &walk_data); > + kvm_pgtable_walk_end(); > > - return _kvm_pgtable_walk(pgt, &walk_data); > + return r; > } > > struct leaf_walk_data { > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 73ae908eb5d9..52e042399ba5 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) > > static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; > > +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head) > +{ > + struct page *page = container_of(head, struct page, rcu_head); > + void *pgtable = page_to_virt(page); > + u32 level = page_private(page); > + > + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level); > +} > + > static void stage2_free_removed_table(void *addr, u32 level) > { > - kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level); > + struct page *page = virt_to_page(addr); > + > + set_page_private(page, (unsigned long)level); > + call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb); > } > > static void kvm_host_get_page(void *addr) > -- > 2.38.1.431.g37b22c650d-goog >