Re: [RFC PATCH 10/17] KVM: arm64: Assume a table pte is already owned in post-order traversal

Ben Gardon <bgardon@xxxxxxxxxx> · Thu, 21 Apr 2022 09:11:37 -0700

On Fri, Apr 15, 2022 at 2:59 PM Oliver Upton <oupton@xxxxxxxxxx> wrote:
>
> For parallel walks that collapse a table into a block KVM ensures a
> locked invalid pte is visible to all observers in pre-order traversal.
> As such, there is no need to try breaking the pte again.

When you're doing the pre and post-order traversals, are they
implemented as separate traversals from the root, or is it a kind of
pre and post-order where non-leaf nodes are visited on the way down
and on the way up?
I assume either could be made to work, but the re-traversal from the
root probably minimizes TLB flushes, whereas the pre-and-post-order
would be a more efficient walk?

>
> Directly set the pte if it has already been broken.
>
> Signed-off-by: Oliver Upton <oupton@xxxxxxxxxx>
> ---
>  arch/arm64/kvm/hyp/pgtable.c | 22 ++++++++++++++++------
>  1 file changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 146fc44acf31..121818d4c33e 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -924,7 +924,7 @@ static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
>  static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
>                                       kvm_pte_t *ptep, kvm_pte_t old,
>                                       struct stage2_map_data *data,
> -                                     bool shared)
> +                                     bool shared, bool locked)
>  {
>         kvm_pte_t new;
>         u64 granule = kvm_granule_size(level), phys = data->phys;
> @@ -948,7 +948,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
>         if (!stage2_pte_needs_update(old, new))
>                 return -EAGAIN;
>
> -       if (!stage2_try_break_pte(ptep, old, addr, level, shared, data))
> +       if (!locked && !stage2_try_break_pte(ptep, old, addr, level, shared, data))
>                 return -EAGAIN;
>
>         /* Perform CMOs before installation of the guest stage-2 PTE */
> @@ -987,7 +987,8 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
>  }
>
>  static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> -                               kvm_pte_t *old, struct stage2_map_data *data, bool shared)
> +                               kvm_pte_t *old, struct stage2_map_data *data, bool shared,
> +                               bool locked)
>  {
>         struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
>         kvm_pte_t *childp, pte;
> @@ -998,10 +999,13 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>                 return 0;
>         }
>
> -       ret = stage2_map_walker_try_leaf(addr, end, level, ptep, *old, data, shared);
> +       ret = stage2_map_walker_try_leaf(addr, end, level, ptep, *old, data, shared, locked);
>         if (ret != -E2BIG)
>                 return ret;
>
> +       /* We should never attempt installing a table in post-order */
> +       WARN_ON(locked);
> +
>         if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
>                 return -EINVAL;
>
> @@ -1048,7 +1052,13 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
>                 childp = data->childp;
>                 data->anchor = NULL;
>                 data->childp = NULL;
> -               ret = stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared);
> +
> +               /*
> +                * We are guaranteed exclusive access to the pte in post-order
> +                * traversal since the locked value was made visible to all
> +                * observers in stage2_map_walk_table_pre.
> +                */
> +               ret = stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared, true);
>         } else {
>                 childp = kvm_pte_follow(*old, mm_ops);
>         }
> @@ -1087,7 +1097,7 @@ static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, kvm_
>         case KVM_PGTABLE_WALK_TABLE_PRE:
>                 return stage2_map_walk_table_pre(addr, end, level, ptep, old, data, shared);
>         case KVM_PGTABLE_WALK_LEAF:
> -               return stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared);
> +               return stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared, false);
>         case KVM_PGTABLE_WALK_TABLE_POST:
>                 return stage2_map_walk_table_post(addr, end, level, ptep, old, data, shared);
>         }
> --
> 2.36.0.rc0.470.gd361397f0d-goog
>