On 5/17/21 10:07 PM, Claudio Imbrenda wrote: > Refactor s390_reset_acc so that its pieces can be reused in upcoming > patches. The users parameter for s390_destroy_range will be needed in > upcoming patches. > > We don't want to hold all the locks used in a walk_page_range for too > long, and the destroy page UVC does take some time to complete. > Therefore we quickly gather the pages to destroy, and then destroy them > without holding all the locks. > Acked-by: Janosch Frank <frankja@xxxxxxxxxxxxx> > Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> > --- > arch/s390/include/asm/gmap.h | 5 +- > arch/s390/kvm/pv.c | 12 ++++- > arch/s390/mm/gmap.c | 88 ++++++++++++++++++++++++------------ > 3 files changed, 73 insertions(+), 32 deletions(-) > > diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h > index 40264f60b0da..618ddc455867 100644 > --- a/arch/s390/include/asm/gmap.h > +++ b/arch/s390/include/asm/gmap.h > @@ -147,5 +147,8 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start, > void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], > unsigned long gaddr, unsigned long vmaddr); > int gmap_mark_unmergeable(void); > -void s390_reset_acc(struct mm_struct *mm); > +void s390_uv_destroy_range(struct mm_struct *mm, unsigned int users, > + unsigned long start, unsigned long end); > +void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); > + > #endif /* _ASM_S390_GMAP_H */ > diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c > index e0532ab725bf..c3f9f30d2ed4 100644 > --- a/arch/s390/kvm/pv.c > +++ b/arch/s390/kvm/pv.c > @@ -12,6 +12,8 @@ > #include <asm/gmap.h> > #include <asm/uv.h> > #include <asm/mman.h> > +#include <linux/pagewalk.h> > +#include <linux/sched/mm.h> > #include "kvm-s390.h" > > int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) > @@ -204,8 +206,14 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) > { > int cc; > > - /* make all pages accessible before destroying the guest */ > - s390_reset_acc(kvm->mm); > + /* > + * if the mm still has a mapping, make all its pages accessible > + * before destroying the guest > + */ > + if (mmget_not_zero(kvm->mm)) { > + s390_uv_destroy_range(kvm->mm, 0, 0, TASK_SIZE); > + mmput(kvm->mm); > + } > > cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), > UVC_CMD_DESTROY_SEC_CONF, rc, rrc); > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index de679facc720..ad210a6e2c41 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -2670,41 +2670,71 @@ void s390_reset_cmma(struct mm_struct *mm) > } > EXPORT_SYMBOL_GPL(s390_reset_cmma); > > -/* > - * make inaccessible pages accessible again > - */ > -static int __s390_reset_acc(pte_t *ptep, unsigned long addr, > - unsigned long next, struct mm_walk *walk) > +#define DESTROY_LOOP_THRESHOLD 32 > + > +struct reset_walk_state { > + unsigned long next; > + unsigned long count; > + unsigned long pfns[DESTROY_LOOP_THRESHOLD]; Candidate for a module parameter and extensive performance testing? > +}; > + > +static int s390_gather_pages(pte_t *ptep, unsigned long addr, > + unsigned long next, struct mm_walk *walk) A "pv" somewhere in that function name would be helpful to me. Also the "__" prefix applies here I think. We never call the function directly and it's static. And I assume that's what the "__" prefix is trying to tell us. :) > { > + struct reset_walk_state *p = walk->private; > pte_t pte = READ_ONCE(*ptep); > > - /* There is a reference through the mapping */ > - if (pte_present(pte)) > - WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK)); > - > - return 0; > + if (pte_present(pte)) { > + /* we have a reference from the mapping, take an extra one */ > + get_page(phys_to_page(pte_val(pte))); > + p->pfns[p->count] = phys_to_pfn(pte_val(pte)); > + p->next = next; > + p->count++; > + } > + return p->count >= DESTROY_LOOP_THRESHOLD; > } > > -static const struct mm_walk_ops reset_acc_walk_ops = { > - .pte_entry = __s390_reset_acc, > +static const struct mm_walk_ops gather_pages_ops = { > + .pte_entry = s390_gather_pages, > }; > > -#include <linux/sched/mm.h> > -void s390_reset_acc(struct mm_struct *mm) > +/* > + * Call the Destroy secure page UVC on each page in the given array of PFNs. > + * Each page needs to have an extra reference, which will be released here. > + */ > +void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns) > { > - if (!mm_is_protected(mm)) > - return; > - /* > - * we might be called during > - * reset: we walk the pages and clear > - * close of all kvm file descriptors: we walk the pages and clear > - * exit of process on fd closure: vma already gone, do nothing > - */ > - if (!mmget_not_zero(mm)) > - return; > - mmap_read_lock(mm); > - walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL); > - mmap_read_unlock(mm); > - mmput(mm); > + unsigned long i; > + > + for (i = 0; i < count; i++) { > + /* we always have an extra reference */ > + uv_destroy_owned_page(pfn_to_phys(pfns[i])); > + /* get rid of the extra reference */ > + put_page(pfn_to_page(pfns[i])); > + cond_resched(); > + } > +} > +EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns); > + > +/* > + * Walk the given range of the given address space, and call the destroy > + * secure page UVC on each page. > + * Exit early if the number of users of the mm drops to (or below) the given > + * value. > + */ > +void s390_uv_destroy_range(struct mm_struct *mm, unsigned int users, > + unsigned long start, unsigned long end) > +{ > + struct reset_walk_state state = { .next = start }; > + int r = 1; > + > + while ((r > 0) && (atomic_read(&mm->mm_users) > users)) { > + state.count = 0; > + mmap_read_lock(mm); > + r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state); > + mmap_read_unlock(mm); > + cond_resched(); > + s390_uv_destroy_pfns(state.count, state.pfns); > + } > } > -EXPORT_SYMBOL_GPL(s390_reset_acc); > +EXPORT_SYMBOL_GPL(s390_uv_destroy_range); >