To date the permission relax path of the stage-2 fault handler hasn't had to worry about the paging structures changing under its nose, as map operations acquire the write lock. That's about to change, which means a permission relaxation walker could traverse in parallel with a map operation. If at any point during traversal the permission relax walker finds a locked pte, bail immediately. Either the instruction will succeed or the vCPU will fault once more and (hopefully) walk the tables successfully. Signed-off-by: Oliver Upton <oupton@xxxxxxxxxx> --- arch/arm64/kvm/hyp/pgtable.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 059ebb921125..ff6f14755d0c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1168,6 +1168,11 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_attr_data *data = arg; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + if (stage2_pte_is_locked(pte)) { + WARN_ON(!shared); + return -EAGAIN; + } + if (!kvm_pte_valid(pte)) return 0; @@ -1190,7 +1195,9 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); - WRITE_ONCE(*ptep, pte); + + if (!kvm_try_set_pte(ptep, data->pte, pte, shared)) + return -EAGAIN; } return 0; @@ -1199,7 +1206,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, u64 size, kvm_pte_t attr_set, kvm_pte_t attr_clr, kvm_pte_t *orig_pte, - u32 *level) + u32 *level, bool shared) { int ret; kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; @@ -1214,7 +1221,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, .flags = KVM_PGTABLE_WALK_LEAF, }; - ret = kvm_pgtable_walk(pgt, addr, size, &walker, false); + ret = kvm_pgtable_walk(pgt, addr, size, &walker, shared); if (ret) return ret; @@ -1230,14 +1237,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { return stage2_update_leaf_attrs(pgt, addr, size, 0, KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, - NULL, NULL); + NULL, NULL, false); } kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, - &pte, NULL); + &pte, NULL, false); dsb(ishst); return pte; } @@ -1246,7 +1253,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, - &pte, NULL); + &pte, NULL, false); /* * "But where's the TLBI?!", you scream. * "Over in the core code", I sigh. @@ -1259,7 +1266,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; - stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); + stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, false); return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; } @@ -1282,7 +1289,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_X) clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; - ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, true); if (!ret) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); return ret; -- 2.36.0.rc0.470.gd361397f0d-goog