The existing logic is confusing and fails to abstract a number of bitwise tricks. Use ALIGN_DOWN() to perform alignment, pte_index() to obtain a PTE index and represent the address range using PTE offsets, which naturally make it clear that the operation is intended to occur within only a single PTE and prevent spanning of more than one page table. We rely on the fact that fault_around_bytes will always be page-aligned, at least one page in size, a power of two and that it will not exceed PAGE_SIZE * PTRS_PER_PTE in size (i.e. the address space mapped by a PTE). These are all guaranteed by fault_around_bytes_set(). Signed-off-by: Lorenzo Stoakes <lstoakes@xxxxxxxxx> --- mm/memory.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index c5f1bf906d0c..3d85aa7106b0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4437,8 +4437,8 @@ late_initcall(fault_around_debugfs); * It uses vm_ops->map_pages() to map the pages, which skips the page if it's * not ready to be mapped: not up-to-date, locked, etc. * - * This function doesn't cross the VMA boundaries, in order to call map_pages() - * only once. + * This function doesn't cross VMA or page table boundaries, in order to call + * map_pages() and acquire a PTE lock only once. * * fault_around_bytes defines how many bytes we'll try to map. * do_fault_around() expects it to be set to a power of two less than or equal @@ -4451,27 +4451,19 @@ late_initcall(fault_around_debugfs); */ static vm_fault_t do_fault_around(struct vm_fault *vmf) { - unsigned long address = vmf->address, nr_pages, mask; - pgoff_t start_pgoff = vmf->pgoff; - pgoff_t end_pgoff; - int off; + pgoff_t nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; + pgoff_t pte_off = pte_index(vmf->address); + /* The page offset of vmf->address within the VMA. */ + pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; + pgoff_t from_pte, to_pte; - nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; - mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; + /* The PTE offset of the start address, clamped to the VMA. */ + from_pte = max(ALIGN_DOWN(pte_off, nr_pages), + pte_off - min(pte_off, vma_off)); - address = max(address & mask, vmf->vma->vm_start); - off = ((vmf->address - address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); - start_pgoff -= off; - - /* - * end_pgoff is either the end of the page table, the end of - * the vma or nr_pages from start_pgoff, depending what is nearest. - */ - end_pgoff = start_pgoff - - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + - PTRS_PER_PTE - 1; - end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, - start_pgoff + nr_pages - 1); + /* The PTE offset of the end address, clamped to the VMA and PTE. */ + to_pte = min3(from_pte + nr_pages, (pgoff_t)PTRS_PER_PTE, + pte_off + vma_pages(vmf->vma) - vma_off) - 1; if (pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); @@ -4479,7 +4471,9 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) return VM_FAULT_OOM; } - return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); + return vmf->vma->vm_ops->map_pages(vmf, + vmf->pgoff + from_pte - pte_off, + vmf->pgoff + to_pte - pte_off); } /* Return true if we should do read fault-around, false otherwise */ -- 2.39.2