On Mon, Nov 7, 2022 at 1:58 PM Oliver Upton <oliver.upton@xxxxxxxxx> wrote: > > The stage2 attr walker is already used for parallel walks. Since commit > f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation > during dirty logging"), KVM acquires the read lock when > write-unprotecting a PTE. However, the walker only uses a simple store > to update the PTE. This is safe as the only possible race is with > hardware updates to the access flag, which is benign. > > However, a subsequent change to KVM will allow more changes to the stage > 2 page tables to be done in parallel. Prepare the stage 2 attribute > walker by performing atomic updates to the PTE when walking in parallel. > > Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx> > --- > arch/arm64/kvm/hyp/pgtable.c | 31 ++++++++++++++++++++++--------- > 1 file changed, 22 insertions(+), 9 deletions(-) > > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c > index d8d963521d4e..a34e2050f931 100644 > --- a/arch/arm64/kvm/hyp/pgtable.c > +++ b/arch/arm64/kvm/hyp/pgtable.c > @@ -185,7 +185,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, > kvm_pteref_t pteref, u32 level) > { > enum kvm_pgtable_walk_flags flags = data->walker->flags; > - kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false); > + kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED); > struct kvm_pgtable_visit_ctx ctx = { > .ptep = ptep, > .old = READ_ONCE(*ptep), > @@ -675,6 +675,16 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) > return !!pte; > } > > +static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) > +{ > + if (!kvm_pgtable_walk_shared(ctx)) { > + WRITE_ONCE(*ctx->ptep, new); > + return true; > + } > + > + return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; > +} > + > static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, > struct kvm_pgtable_mm_ops *mm_ops) > { > @@ -986,7 +996,9 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, > stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) > mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), > kvm_granule_size(ctx->level)); > - WRITE_ONCE(*ctx->ptep, pte); > + > + if (!stage2_try_set_pte(ctx, pte)) > + return -EAGAIN; > } > > return 0; > @@ -995,7 +1007,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, > static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, > u64 size, kvm_pte_t attr_set, > kvm_pte_t attr_clr, kvm_pte_t *orig_pte, > - u32 *level) > + u32 *level, enum kvm_pgtable_walk_flags flags) > { > int ret; > kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; > @@ -1006,7 +1018,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, > struct kvm_pgtable_walker walker = { > .cb = stage2_attr_walker, > .arg = &data, > - .flags = KVM_PGTABLE_WALK_LEAF, > + .flags = flags | KVM_PGTABLE_WALK_LEAF, > }; > > ret = kvm_pgtable_walk(pgt, addr, size, &walker); > @@ -1025,14 +1037,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) > { > return stage2_update_leaf_attrs(pgt, addr, size, 0, > KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, > - NULL, NULL); > + NULL, NULL, 0); > } > > kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) > { > kvm_pte_t pte = 0; > stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, > - &pte, NULL); > + &pte, NULL, 0); > dsb(ishst); > return pte; > } > @@ -1041,7 +1053,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) > { > kvm_pte_t pte = 0; > stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, > - &pte, NULL); > + &pte, NULL, 0); > /* > * "But where's the TLBI?!", you scream. > * "Over in the core code", I sigh. > @@ -1054,7 +1066,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) > bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) > { > kvm_pte_t pte = 0; > - stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); > + stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0); Would be nice to have an enum for KVM_PGTABLE_WALK_EXCLUSIVE so this doesn't just have to pass 0. > return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; > } > > @@ -1077,7 +1089,8 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, > if (prot & KVM_PGTABLE_PROT_X) > clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; > > - ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); > + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, > + KVM_PGTABLE_WALK_SHARED); > if (!ret) > kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); > return ret; > -- > 2.38.1.431.g37b22c650d-goog >