Hugh Dickins <hughd@xxxxxxxxxx> writes: > 4.10-rc loadtest (even on x86, even without THPCache) fails with > "fork: Cannot allocate memory" or some such; and /proc/meminfo > shows PageTables growing. > > rc1 removed the freeing of an unused preallocated pagetable after > do_fault_around() has called map_pages(): which is usually a good > optimization, so that the followup doesn't have to reallocate one; > but it's not sufficient to shift the freeing into alloc_set_pte(), > since there are failure cases (most commonly VM_FAULT_RETRY) which > never reach finish_fault(). > > Check and free it at the outer level in do_fault(), then we don't > need to worry in alloc_set_pte(), and can restore that to how it was > (I cannot find any reason to pte_free() under lock as it was doing). > > And fix a separate pagetable leak, or crash, introduced by the same > change, that could only show up on some ppc64: why does do_set_pmd()'s > failure case attempt to withdraw a pagetable when it never deposited > one, at the same time overwriting (so leaking) the vmf->prealloc_pte? > Residue of an earlier implementation, perhaps? Delete it. That change is part of -mm tree. https://lkml.kernel.org/r/20161212163428.6780-1-aneesh.kumar@xxxxxxxxxxxxxxxxxx > > Fixes: 953c66c2b22a ("mm: THP page cache support for ppc64") > Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx> > --- > > mm/memory.c | 47 ++++++++++++++++++++--------------------------- > 1 file changed, 20 insertions(+), 27 deletions(-) > > --- 4.10-rc2/mm/memory.c 2016-12-25 18:40:50.830453384 -0800 > +++ linux/mm/memory.c 2017-01-07 13:34:29.373381551 -0800 > @@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *v > ret = 0; > count_vm_event(THP_FILE_MAPPED); > out: > - /* > - * If we are going to fallback to pte mapping, do a > - * withdraw with pmd lock held. > - */ > - if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) > - vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, > - vmf->pmd); > spin_unlock(vmf->ptl); > return ret; > } > @@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, > > ret = do_set_pmd(vmf, page); > if (ret != VM_FAULT_FALLBACK) > - goto fault_handled; > + return ret; > } > > if (!vmf->pte) { > ret = pte_alloc_one_map(vmf); > if (ret) > - goto fault_handled; > + return ret; > } > > /* Re-check under ptl */ > - if (unlikely(!pte_none(*vmf->pte))) { > - ret = VM_FAULT_NOPAGE; > - goto fault_handled; > - } > + if (unlikely(!pte_none(*vmf->pte))) > + return VM_FAULT_NOPAGE; > > flush_icache_page(vma, page); > entry = mk_pte(page, vma->vm_page_prot); > @@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, > > /* no need to invalidate: a not-present page won't be cached */ > update_mmu_cache(vma, vmf->address, vmf->pte); > - ret = 0; > > -fault_handled: > - /* preallocated pagetable is unused: free it */ > - if (vmf->prealloc_pte) { > - pte_free(vmf->vma->vm_mm, vmf->prealloc_pte); > - vmf->prealloc_pte = 0; > - } > - return ret; > + return 0; > } > > > @@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fau > static int do_fault(struct vm_fault *vmf) > { > struct vm_area_struct *vma = vmf->vma; > + int ret; > > /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ > if (!vma->vm_ops->fault) > - return VM_FAULT_SIGBUS; > - if (!(vmf->flags & FAULT_FLAG_WRITE)) > - return do_read_fault(vmf); > - if (!(vma->vm_flags & VM_SHARED)) > - return do_cow_fault(vmf); > - return do_shared_fault(vmf); > + ret = VM_FAULT_SIGBUS; > + else if (!(vmf->flags & FAULT_FLAG_WRITE)) > + ret = do_read_fault(vmf); > + else if (!(vma->vm_flags & VM_SHARED)) > + ret = do_cow_fault(vmf); > + else > + ret = do_shared_fault(vmf); > + > + /* preallocated pagetable is unused: free it */ > + if (vmf->prealloc_pte) { > + pte_free(vma->vm_mm, vmf->prealloc_pte); > + vmf->prealloc_pte = 0; > + } > + return ret; > } > > static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>