On Fri, Jun 25, 2021, Peter Xu wrote: > Using rmap_get_first() and rmap_remove() for zapping a huge rmap list could be > slow. The easy way is to travers the rmap list, collecting the a/d bits and > free the slots along the way. > > Provide a pte_list_destroy() and do exactly that. > > Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 45 +++++++++++++++++++++++++++++++----------- > 1 file changed, 33 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index ba0258bdebc4..45aac78dcabc 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -1014,6 +1014,38 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) > return count; > } > > +/* Return true if rmap existed and callback called, false otherwise */ > +static bool pte_list_destroy(struct kvm_rmap_head *rmap_head, > + int (*callback)(u64 *sptep)) > +{ > + struct pte_list_desc *desc, *next; > + int i; > + > + if (!rmap_head->val) > + return false; > + > + if (!(rmap_head->val & 1)) { > + if (callback) > + callback((u64 *)rmap_head->val); > + goto out; > + } > + > + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); > + > + while (desc) { > + if (callback) > + for (i = 0; i < desc->spte_count; i++) > + callback(desc->sptes[i]); > + next = desc->more; > + mmu_free_pte_list_desc(desc); > + desc = next; Alternatively, desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); for ( ; desc; desc = next) { for (i = 0; i < desc->spte_count; i++) mmu_spte_clear_track_bits((u64 *)rmap_head->val); next = desc->more; mmu_free_pte_list_desc(desc); } > + } > +out: > + /* rmap_head is meaningless now, remember to reset it */ > + rmap_head->val = 0; > + return true; Why implement this as a generic method with a callback? gcc is suprisingly astute in optimizing callback(), but I don't see the point of adding a complex helper that has a single caller, and is extremely unlikely to gain new callers. Or is there another "zap everything" case I'm missing? E.g. why not this? static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { struct pte_list_desc *desc, *next; int i; if (!rmap_head->val) return false; if (!(rmap_head->val & 1)) { mmu_spte_clear_track_bits((u64 *)rmap_head->val); goto out; } desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); for ( ; desc; desc = next) { for (i = 0; i < desc->spte_count; i++) mmu_spte_clear_track_bits(desc->sptes[i]); next = desc->more; mmu_free_pte_list_desc(desc); } out: /* rmap_head is meaningless now, remember to reset it */ rmap_head->val = 0; return true; } > +} > + > static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, > struct kvm_memory_slot *slot) > { > @@ -1403,18 +1435,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) > static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, > struct kvm_memory_slot *slot) > { > - u64 *sptep; > - struct rmap_iterator iter; > - bool flush = false; > - > - while ((sptep = rmap_get_first(rmap_head, &iter))) { > - rmap_printk("spte %p %llx.\n", sptep, *sptep); > - > - pte_list_remove(rmap_head, sptep); > - flush = true; > - } > - > - return flush; > + return pte_list_destroy(rmap_head, mmu_spte_clear_track_bits); > } > > static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, > -- > 2.31.1 >