On Fri, Apr 15, 2022 at 2:59 PM Oliver Upton <oupton@xxxxxxxxxx> wrote: > > For parallel walks that collapse a table into a block KVM ensures a > locked invalid pte is visible to all observers in pre-order traversal. > As such, there is no need to try breaking the pte again. When you're doing the pre and post-order traversals, are they implemented as separate traversals from the root, or is it a kind of pre and post-order where non-leaf nodes are visited on the way down and on the way up? I assume either could be made to work, but the re-traversal from the root probably minimizes TLB flushes, whereas the pre-and-post-order would be a more efficient walk? > > Directly set the pte if it has already been broken. > > Signed-off-by: Oliver Upton <oupton@xxxxxxxxxx> > --- > arch/arm64/kvm/hyp/pgtable.c | 22 ++++++++++++++++------ > 1 file changed, 16 insertions(+), 6 deletions(-) > > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c > index 146fc44acf31..121818d4c33e 100644 > --- a/arch/arm64/kvm/hyp/pgtable.c > +++ b/arch/arm64/kvm/hyp/pgtable.c > @@ -924,7 +924,7 @@ static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level, > static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, > kvm_pte_t *ptep, kvm_pte_t old, > struct stage2_map_data *data, > - bool shared) > + bool shared, bool locked) > { > kvm_pte_t new; > u64 granule = kvm_granule_size(level), phys = data->phys; > @@ -948,7 +948,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, > if (!stage2_pte_needs_update(old, new)) > return -EAGAIN; > > - if (!stage2_try_break_pte(ptep, old, addr, level, shared, data)) > + if (!locked && !stage2_try_break_pte(ptep, old, addr, level, shared, data)) > return -EAGAIN; > > /* Perform CMOs before installation of the guest stage-2 PTE */ > @@ -987,7 +987,8 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, > } > > static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, > - kvm_pte_t *old, struct stage2_map_data *data, bool shared) > + kvm_pte_t *old, struct stage2_map_data *data, bool shared, > + bool locked) > { > struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; > kvm_pte_t *childp, pte; > @@ -998,10 +999,13 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, > return 0; > } > > - ret = stage2_map_walker_try_leaf(addr, end, level, ptep, *old, data, shared); > + ret = stage2_map_walker_try_leaf(addr, end, level, ptep, *old, data, shared, locked); > if (ret != -E2BIG) > return ret; > > + /* We should never attempt installing a table in post-order */ > + WARN_ON(locked); > + > if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) > return -EINVAL; > > @@ -1048,7 +1052,13 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, > childp = data->childp; > data->anchor = NULL; > data->childp = NULL; > - ret = stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared); > + > + /* > + * We are guaranteed exclusive access to the pte in post-order > + * traversal since the locked value was made visible to all > + * observers in stage2_map_walk_table_pre. > + */ > + ret = stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared, true); > } else { > childp = kvm_pte_follow(*old, mm_ops); > } > @@ -1087,7 +1097,7 @@ static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, kvm_ > case KVM_PGTABLE_WALK_TABLE_PRE: > return stage2_map_walk_table_pre(addr, end, level, ptep, old, data, shared); > case KVM_PGTABLE_WALK_LEAF: > - return stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared); > + return stage2_map_walk_leaf(addr, end, level, ptep, old, data, shared, false); > case KVM_PGTABLE_WALK_TABLE_POST: > return stage2_map_walk_table_post(addr, end, level, ptep, old, data, shared); > } > -- > 2.36.0.rc0.470.gd361397f0d-goog >