Breaking a table pte is insufficient to guarantee ownership of an unlinked subtree. Parallel software walkers could be traversing substructures and changing their mappings. Recurse through the unlinked subtree and lock all descendent ptes to take ownership of the subtree. Since the ptes are actually being evicted, return table ptes back to the table walker to ensure child tables are also traversed. Note that this is done both in both the pre-order and leaf visitors as the underlying pte remains volatile until it is unlinked. Signed-off-by: Oliver Upton <oupton@xxxxxxxxxx> --- arch/arm64/kvm/hyp/pgtable.c | 56 +++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index ffdfd5ee9642..146fc44acf31 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -838,6 +838,54 @@ static void stage2_make_pte(kvm_pte_t *ptep, kvm_pte_t new, struct kvm_pgtable_m } } +static kvm_pte_t stage2_unlink_pte_shared(kvm_pte_t *ptep) +{ + kvm_pte_t old; + + while (true) { + old = xchg(ptep, KVM_INVALID_PTE_LOCKED); + if (old != KVM_INVALID_PTE_LOCKED) + return old; + + cpu_relax(); + } +} + + +/** + * stage2_unlink_pte() - Tears down an unreachable pte, returning the next pte + * to visit (if any). + * + * @ptep: pointer to the pte to unlink + * @level: page table level of the pte + * @shared: true if the tables are shared by multiple software walkers + * @mm_ops: pointer to the mm ops table + * + * Return: a table pte if another level of recursion is necessary, 0 otherwise. + */ +static kvm_pte_t stage2_unlink_pte(kvm_pte_t *ptep, u32 level, bool shared, + struct kvm_pgtable_mm_ops *mm_ops) +{ + kvm_pte_t old; + + if (shared) { + old = stage2_unlink_pte_shared(ptep); + } else { + old = *ptep; + WRITE_ONCE(*ptep, KVM_INVALID_PTE_LOCKED); + } + + WARN_ON(stage2_pte_is_locked(old)); + + if (kvm_pte_table(old, level)) + return old; + + if (stage2_pte_is_counted(old)) + mm_ops->put_page(ptep); + + return 0; +} + static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, u32 level, struct kvm_pgtable_mm_ops *mm_ops) { @@ -922,8 +970,10 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, struct stage2_map_data *data, bool shared) { - if (data->anchor) + if (data->anchor) { + *old = stage2_unlink_pte(ptep, level, shared, data->mm_ops); return 0; + } if (!stage2_leaf_mapping_allowed(addr, end, level, data)) return 0; @@ -944,9 +994,7 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, int ret; if (data->anchor) { - if (stage2_pte_is_counted(*old)) - mm_ops->put_page(ptep); - + *old = stage2_unlink_pte(ptep, level, shared, data->mm_ops); return 0; } -- 2.36.0.rc0.470.gd361397f0d-goog