The adj_start calculation has been a constant source of confusion in the VMA merge code. There are two cases to consider, one where we adjust the start of the vmg->middle VMA (i.e. the __VMG_FLAG_ADJUST_MIDDLE_START merge flag is set), in which case adj_start is calculated as: (1) adj_start = vmg->end - vmg->middle->vm_start And the case where we adjust the start of the vmg->next VMA (i.e.t he __VMG_FLAG_ADJUST_NEXT_START merge flag is set), in which case adj_start is calculated as: (2) adj_start = -(vmg->middle->vm_end - vmg->end) We apply (1) thusly: vmg->middle->vm_start = vmg->middle->vm_start + vmg->end - vmg->middle->vm_start Which simplifies to: vmg->middle->vm_start = vmg->end Similarly, we apply (2) as: vmg->next->vm_start = vmg->next->vm_start + -(vmg->middle->vm_end - vmg->end) Noting that for these VMAs to be mergeable vmg->middle->vm_end == vmg->next->vm_start and so this simplifies to: vmg->next->vm_start = vmg->next->vm_start + -(vmg->next->vm_start - vmg->end) Which simplifies to: vmg->next->vm_start = vmg->end Therefore in each case, we simply need to adjust the start of the VMA to vmg->end (!) and can do away with this adj_start calculation. The only caveat is that we must ensure we update the vm_pgoff field correctly. We therefore abstract this entire calculation to a new function vmg_adjust_set_range() which performs this calculation and sets the adjusted VMA's new range using the general vma_set_range() function. We also must update vma_adjust_trans_huge() which expects the now-abstracted adj_start parameter. It turns out this is wholly unnecessary. In vma_adjust_trans_huge() the relevant code is: if (adjust_next > 0) { struct vm_area_struct *next = find_vma(vma->vm_mm, vma->vm_end); unsigned long nstart = next->vm_start; nstart += adjust_next; split_huge_pmd_if_needed(next, nstart); } The only case where this is relevant is when __VMG_FLAG_ADJUST_MIDDLE_START is specified (in which case adj_next would have been positive), i.e. the one in which the vma specified is vmg->prev and this the sought 'next' VMA would be vmg->middle. We can therefore eliminate the find_vma() invocation altogether and simply provide the vmg->middle VMA in this instance, or NULL otherwise. Again we have an adj_next offset calculation: next->vm_start + vmg->end - vmg->middle->vm_start Where next == vmg->middle this simplifies to vmg->end as previously demonstrated. Therefore nstart is equal to vmg->end, which is already passed to vma_adjust_trans_huge() via the 'end' parameter and so this code (rather delightfully) simplifies to: if (next) split_huge_pmd_if_needed(next, end); With these changes in place, it becomes silly for commit_merge() to return vmg->target, as it is always the same and threaded through vmg, so we finally change commit_merge() to return an error value once again. This patch has no change in functional behaviour. Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> --- include/linux/huge_mm.h | 2 +- mm/huge_memory.c | 19 ++---- mm/vma.c | 102 +++++++++++++++---------------- tools/testing/vma/vma_internal.h | 4 +- 4 files changed, 58 insertions(+), 69 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 93e509b6c00e..43f76b54b522 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -404,7 +404,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, - unsigned long end, long adjust_next); + unsigned long end, struct vm_area_struct *next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3d3ebdc002d5..c44599f9ad09 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3017,9 +3017,9 @@ static inline void split_huge_pmd_if_needed(struct vm_area_struct *vma, unsigned } void vma_adjust_trans_huge(struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - long adjust_next) + unsigned long start, + unsigned long end, + struct vm_area_struct *next) { /* Check if we need to split start first. */ split_huge_pmd_if_needed(vma, start); @@ -3027,16 +3027,9 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, /* Check if we need to split end next. */ split_huge_pmd_if_needed(vma, end); - /* - * If we're also updating the next vma vm_start, - * check if we need to split it. - */ - if (adjust_next > 0) { - struct vm_area_struct *next = find_vma(vma->vm_mm, vma->vm_end); - unsigned long nstart = next->vm_start; - nstart += adjust_next; - split_huge_pmd_if_needed(next, nstart); - } + /* If we're incrementing next->vm_start, we might need to split it. */ + if (next) + split_huge_pmd_if_needed(next, end); } static void unmap_folio(struct folio *folio) diff --git a/mm/vma.c b/mm/vma.c index cfcadca7b116..48a7acc83a82 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -515,7 +515,7 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vp.insert = new; vma_prepare(&vp); - vma_adjust_trans_huge(vma, vma->vm_start, addr, 0); + vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); if (new_below) { vma->vm_start = addr; @@ -646,47 +646,46 @@ void validate_mm(struct mm_struct *mm) #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */ /* - * Actually perform the VMA merge operation. - * - * On success, returns the merged VMA. Otherwise returns NULL. + * Based on the vmg flag indicating whether we need to adjust the vm_start field + * for the middle or next VMA, we calculate what the range of the newly adjusted + * VMA ought to be, and set the VMA's range accordingly. */ -static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg) +static void vmg_adjust_set_range(struct vma_merge_struct *vmg) { - struct vm_area_struct *vma; - struct vma_prepare vp; - struct vm_area_struct *adjust; - long adj_start; unsigned long flags = vmg->merge_flags; + struct vm_area_struct *adjust; + pgoff_t pgoff; - /* - * If modifying an existing VMA and we don't remove vmg->middle, then we - * shrink the adjacent VMA. - */ if (flags & __VMG_FLAG_ADJUST_MIDDLE_START) { - vma = vmg->target; adjust = vmg->middle; - /* The POSITIVE value by which we offset vmg->middle->vm_start. */ - adj_start = vmg->end - vmg->middle->vm_start; - - /* Note: vma iterator must be pointing to 'start'. */ - vma_iter_config(vmg->vmi, vmg->start, vmg->end); + pgoff = adjust->vm_pgoff + PHYS_PFN(vmg->end - adjust->vm_start); } else if (flags & __VMG_FLAG_ADJUST_NEXT_START) { - /* - * In this case alone, the VMA we manipulate is vmg->middle, but - * we ultimately return vmg->next. - */ - vma = vmg->middle; adjust = vmg->next; - /* The NEGATIVE value by which we offset vmg->next->vm_start. */ - adj_start = -(vmg->middle->vm_end - vmg->end); + pgoff = adjust->vm_pgoff - PHYS_PFN(adjust->vm_start - vmg->end); + } else { + return; + } + + vma_set_range(adjust, vmg->end, adjust->vm_end, pgoff); +} + +/* + * Actually perform the VMA merge operation. + * + * On success, returns the merged VMA. Otherwise returns NULL. + */ +static int commit_merge(struct vma_merge_struct *vmg) +{ + struct vm_area_struct *vma; + struct vma_prepare vp; + bool adj_middle = vmg->merge_flags & __VMG_FLAG_ADJUST_MIDDLE_START; - vma_iter_config(vmg->vmi, vmg->next->vm_start + adj_start, - vmg->next->vm_end); + if (vmg->merge_flags & __VMG_FLAG_ADJUST_NEXT_START) { + /* In this case we manipulate middle and return next. */ + vma = vmg->middle; + vma_iter_config(vmg->vmi, vmg->end, vmg->next->vm_end); } else { vma = vmg->target; - adjust = NULL; - adj_start = 0; - /* Note: vma iterator must be pointing to 'start'. */ vma_iter_config(vmg->vmi, vmg->start, vmg->end); } @@ -694,22 +693,22 @@ static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg) init_multi_vma_prep(&vp, vma, vmg); if (vma_iter_prealloc(vmg->vmi, vma)) - return NULL; + return -ENOMEM; vma_prepare(&vp); - vma_adjust_trans_huge(vma, vmg->start, vmg->end, adj_start); + /* + * THP pages may need to do additional splits if we increase + * middle->vm_start. + */ + vma_adjust_trans_huge(vma, vmg->start, vmg->end, + adj_middle ? vmg->middle : NULL); vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); - - if (adj_start) { - adjust->vm_start += adj_start; - adjust->vm_pgoff += PHYS_PFN(adj_start); - } - + vmg_adjust_set_range(vmg); vma_iter_store(vmg->vmi, vmg->target); vma_complete(&vp, vmg->vmi, vma->vm_mm); - return vmg->target; + return 0; } /* We can only remove VMAs when merging if they do not have a close hook. */ @@ -752,7 +751,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( { struct vm_area_struct *middle = vmg->middle; struct vm_area_struct *prev = vmg->prev; - struct vm_area_struct *next, *res; + struct vm_area_struct *next; struct vm_area_struct *anon_dup = NULL; unsigned long start = vmg->start; unsigned long end = vmg->end; @@ -904,12 +903,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( vmg->end = next->vm_end; vmg->pgoff = next->vm_pgoff - pglen; } else { - /* - * We shrink middle and expand next. - * - * IMPORTANT: This is the ONLY case where the final - * merged VMA is NOT vmg->target, but rather vmg->next. - */ + /* We shrink middle and expand next. */ vmg->merge_flags |= __VMG_FLAG_ADJUST_NEXT_START; vmg->start = middle->vm_start; vmg->end = start; @@ -927,8 +921,10 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (merge_will_delete_next) vmg->merge_flags |= __VMG_FLAG_REMOVE_NEXT; - res = commit_merge(vmg); - if (!res) { + err = commit_merge(vmg); + if (err) { + VM_WARN_ON(err != -ENOMEM); + if (anon_dup) unlink_anon_vmas(anon_dup); @@ -936,9 +932,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( return NULL; } - khugepaged_enter_vma(res, vmg->flags); + khugepaged_enter_vma(vmg->target, vmg->flags); vmg->state = VMA_MERGE_SUCCESS; - return res; + return vmg->target; abort: vma_iter_set(vmg->vmi, start); @@ -1102,7 +1098,7 @@ int vma_expand(struct vma_merge_struct *vmg) if (remove_next) vmg->merge_flags |= __VMG_FLAG_REMOVE_NEXT; - if (!commit_merge(vmg)) + if (commit_merge(vmg)) goto nomem; return 0; @@ -1142,7 +1138,7 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vma_prepare(&vp); - vma_adjust_trans_huge(vma, start, end, 0); + vma_adjust_trans_huge(vma, start, end, NULL); vma_iter_clear(vmi); vma_set_range(vma, start, end, pgoff); diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 1eae23039854..bb273927af0f 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -796,12 +796,12 @@ static inline void vma_start_write(struct vm_area_struct *vma) static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, - long adjust_next) + struct vm_area_struct *next) { (void)vma; (void)start; (void)end; - (void)adjust_next; + (void)next; } static inline void vma_iter_free(struct vma_iterator *vmi) -- 2.48.0