* Yu Zhao <yuzhao@xxxxxxxxxx> [220706 18:26]: > On Wed, Jul 06, 2022 at 04:00:17PM -0600, Yu Zhao wrote: > > ... > > > +/* > > + * Some userspace memory allocators map many single-page VMAs. Instead of > > + * returning back to the PGD table for each of such VMAs, finish an entire PMD > > + * table to reduce zigzags and improve cache performance. > > + */ > > +static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk *args, > > + unsigned long *vm_start, unsigned long *vm_end) > > +{ > > + unsigned long start = round_up(*vm_end, size); > > + unsigned long end = (start | ~mask) + 1; > > + > > + VM_WARN_ON_ONCE(mask & size); > > + VM_WARN_ON_ONCE((start & mask) != (*vm_start & mask)); > > + > > + while (args->vma) { > > + if (start >= args->vma->vm_end) { > > + args->vma = args->vma->vm_next; > > + continue; > > + } > > + > > + if (end && end <= args->vma->vm_start) > > + return false; > > + > > + if (should_skip_vma(args->vma->vm_start, args->vma->vm_end, args)) { > > + args->vma = args->vma->vm_next; > > + continue; > > + } > > + > > + *vm_start = max(start, args->vma->vm_start); > > + *vm_end = min(end - 1, args->vma->vm_end - 1) + 1; > > + > > + return true; > > + } > > + > > + return false; > > +} > > To make the above work on top of the Maple Tree: > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 7096ff7836db..c0c1195da803 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -3689,23 +3689,14 @@ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk > { > unsigned long start = round_up(*vm_end, size); > unsigned long end = (start | ~mask) + 1; > + VMA_ITERATOR(vmi, args->mm, start); > > VM_WARN_ON_ONCE(mask & size); > VM_WARN_ON_ONCE((start & mask) != (*vm_start & mask)); > > - while (args->vma) { > - if (start >= args->vma->vm_end) { > - args->vma = args->vma->vm_next; > + for_each_vma_range(vmi, args->vma, end) { > + if (should_skip_vma(args->vma->vm_start, args->vma->vm_end, args)) > continue; > - } > - > - if (end && end <= args->vma->vm_start) > - return false; > - > - if (should_skip_vma(args->vma->vm_start, args->vma->vm_end, args)) { > - args->vma = args->vma->vm_next; > - continue; > - } > > *vm_start = max(start, args->vma->vm_start); > *vm_end = min(end - 1, args->vma->vm_end - 1) + 1; This looks correct to me. There are a few subtle things that happen with this change but it all works out rather nicely. Thanks, Liam