On Mon, 7 Feb 2022 11:02:28 +0100 Janosch Frank <frankja@xxxxxxxxxxxxx> wrote: > On 2/4/22 16:53, Claudio Imbrenda wrote: > > Refactor s390_reset_acc so that it can be reused in upcoming patches. > > > > We don't want to hold all the locks used in a walk_page_range for too > > long, and the destroy page UVC does take some time to complete. > > Therefore we quickly gather the pages to destroy, and then destroy them > > without holding all the locks. > > > > The new refactored function optionally allows to return early without > > completing if a fatal signal is pending (and return and appropriate > > error code). Two wrappers are provided to call the new function. > > > > Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> > > (dropping Janosch's Ack because of major changes to the patch) > > --- > > arch/s390/include/asm/gmap.h | 36 +++++++++++++- > > arch/s390/kvm/pv.c | 12 ++++- > > arch/s390/mm/gmap.c | 95 +++++++++++++++++++++++++----------- > > 3 files changed, 111 insertions(+), 32 deletions(-) > > > > diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h > > index 746e18bf8984..2a913014f63c 100644 > > --- a/arch/s390/include/asm/gmap.h > > +++ b/arch/s390/include/asm/gmap.h > > @@ -147,7 +147,41 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start, > > void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], > > unsigned long gaddr, unsigned long vmaddr); > > int gmap_mark_unmergeable(void); > > -void s390_reset_acc(struct mm_struct *mm); > > void s390_remove_old_asce(struct gmap *gmap); > > int s390_replace_asce(struct gmap *gmap); > > +void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); > > +int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, > > + unsigned long end, int interruptible); > > s/int/bool/ ? I don't know, I like the fact that the function could return different error codes in the future if needed. > > > + > > +/** > > + * s390_uv_destroy_range - Destroy a range of pages in the given mm. > > + * @mm the mm on which to operate on > > + * @start the start of the range > > + * @end the end of the range > > + * > > + * This call will call cond_sched, so it should not generate stalls, but it > > This function will call ? will fix > > > + * will otherwise only return when it completed. > > + */ > > +static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, > > + unsigned long end) > > +{ > > + (void)__s390_uv_destroy_range(mm, start, end, 0); > > +} > > + > > +/** > > + * s390_uv_destroy_range_interruptibe - Destroy a range of pages in the > > interruptible will fix > > > + * given mm, but stop when a fatal signal is received. > > + * @mm the mm on which to operate on > > + * @start the start of the range > > + * @end the end of the range > > + * > > + * This call will call cond_sched, so it should not generate stalls. It > > + * will return -EINTR if a fatal signal is received, or 0 if the whole range > > + * has been destroyed. > > + */ > > +static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start, > > + unsigned long end) > > +{ > > + return __s390_uv_destroy_range(mm, start, end, 1); > > +} > > #endif /* _ASM_S390_GMAP_H */ > > diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c > > index 3c59ef763dde..2ab22500e092 100644 > > --- a/arch/s390/kvm/pv.c > > +++ b/arch/s390/kvm/pv.c > > @@ -12,6 +12,8 @@ > > #include <asm/gmap.h> > > #include <asm/uv.h> > > #include <asm/mman.h> > > +#include <linux/pagewalk.h> > > +#include <linux/sched/mm.h> > > #include "kvm-s390.h" > > > > int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) > > @@ -157,8 +159,14 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) > > { > > int cc; > > > > - /* make all pages accessible before destroying the guest */ > > - s390_reset_acc(kvm->mm); > > + /* > > + * if the mm still has a mapping, make all its pages accessible > > + * before destroying the guest > > + */ > > + if (mmget_not_zero(kvm->mm)) { > > + s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); > > + mmput(kvm->mm); > > + } > > > > cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), > > UVC_CMD_DESTROY_SEC_CONF, rc, rrc); > > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > > index ce6cac4463f2..6eb5acb4be3d 100644 > > --- a/arch/s390/mm/gmap.c > > +++ b/arch/s390/mm/gmap.c > > @@ -2676,44 +2676,81 @@ void s390_reset_cmma(struct mm_struct *mm) > > } > > EXPORT_SYMBOL_GPL(s390_reset_cmma); > > > > -/* > > - * make inaccessible pages accessible again > > - */ > > -static int __s390_reset_acc(pte_t *ptep, unsigned long addr, > > - unsigned long next, struct mm_walk *walk) > > +#define DESTROY_LOOP_THRESHOLD 32 > > + > > +struct reset_walk_state { > > + unsigned long next; > > + unsigned long count; > > + unsigned long pfns[DESTROY_LOOP_THRESHOLD]; > > +}; > > + > > +static int s390_gather_pages(pte_t *ptep, unsigned long addr, > > + unsigned long next, struct mm_walk *walk) > > { > > + struct reset_walk_state *p = walk->private; > > pte_t pte = READ_ONCE(*ptep); > > > > - /* There is a reference through the mapping */ > > - if (pte_present(pte)) > > - WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK)); > > - > > - return 0; > > + if (pte_present(pte)) { > > + /* we have a reference from the mapping, take an extra one */ > > + get_page(phys_to_page(pte_val(pte))); > > + p->pfns[p->count] = phys_to_pfn(pte_val(pte)); > > + p->next = next; > > + p->count++; > > + } > > + return p->count >= DESTROY_LOOP_THRESHOLD; > > } > > > > -static const struct mm_walk_ops reset_acc_walk_ops = { > > - .pte_entry = __s390_reset_acc, > > +static const struct mm_walk_ops gather_pages_ops = { > > + .pte_entry = s390_gather_pages, > > }; > > > > -#include <linux/sched/mm.h> > > -void s390_reset_acc(struct mm_struct *mm) > > +/* > > + * Call the Destroy secure page UVC on each page in the given array of PFNs. > > + * Each page needs to have an extra reference, which will be released here. > > + */ > > +void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns) > > { > > - if (!mm_is_protected(mm)) > > - return; > > - /* > > - * we might be called during > > - * reset: we walk the pages and clear > > - * close of all kvm file descriptors: we walk the pages and clear > > - * exit of process on fd closure: vma already gone, do nothing > > - */ > > - if (!mmget_not_zero(mm)) > > - return; > > - mmap_read_lock(mm); > > - walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL); > > - mmap_read_unlock(mm); > > - mmput(mm); > > + unsigned long i; > > + > > + for (i = 0; i < count; i++) { > > + /* we always have an extra reference */ > > + uv_destroy_owned_page(pfn_to_phys(pfns[i])); > > + /* get rid of the extra reference */ > > + put_page(pfn_to_page(pfns[i])); > > + cond_resched(); > > + } > > +} > > +EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns); > > + > > +/** > > + * __s390_uv_destroy_range - Walk the given range of the given address > > + * space, and call the destroy secure page UVC on each page. > > + * Optionally exit early if a fatal signal is pending. > > + * @mm the mm to operate on > > + * @start the start of the range > > + * @end the end of the range > > + * @interruptible if not 0, stop when a fatal signal is received > > + * Return: 0 on success, -EINTR if the function stopped before completing > > + */ > > +int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, > > + unsigned long end, int interruptible) > > +{ > > + struct reset_walk_state state = { .next = start }; > > + int r = 1; > > + > > + while (r > 0) { > > + state.count = 0; > > + mmap_read_lock(mm); > > + r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state); > > + mmap_read_unlock(mm); > > + cond_resched(); > > + s390_uv_destroy_pfns(state.count, state.pfns); > > + if (interruptible && fatal_signal_pending(current)) > > + return -EINTR; > > + } > > + return 0; > > } > > -EXPORT_SYMBOL_GPL(s390_reset_acc); > > +EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); > > > > /** > > * s390_remove_old_asce - Remove the topmost level of page tables from the > > >