On 8 Aug 2024, at 10:14, David Hildenbrand wrote: > On 08.08.24 16:13, Zi Yan wrote: >> On 8 Aug 2024, at 4:22, David Hildenbrand wrote: >> >>> On 08.08.24 05:19, Baolin Wang wrote: >>>> >>>> >>>> On 2024/8/8 02:47, Zi Yan wrote: >>>>> When handling a numa page fault, task_numa_fault() should be called by a >>>>> process that restores the page table of the faulted folio to avoid >>>>> duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce >>>>> TLB flush via delaying mapping on hint page fault") restructured >>>>> do_numa_page() and do_huge_pmd_numa_page() and did not avoid >>>>> task_numa_fault() call in the second page table check after a numa >>>>> migration failure. Fix it by making all !pte_same()/!pmd_same() return >>>>> immediately. >>>>> >>>>> This issue can cause task_numa_fault() being called more than necessary >>>>> and lead to unexpected numa balancing results (It is hard to tell whether >>>>> the issue will cause positive or negative performance impact due to >>>>> duplicated numa fault counting). >>>>> >>>>> Reported-by: "Huang, Ying" <ying.huang@xxxxxxxxx> >>>>> Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/ >>>>> Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") >>>>> Cc: <stable@xxxxxxxxxxxxxxx> >>>>> Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> >>>> >>>> The fix looks reasonable to me. Feel free to add: >>>> Reviewed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> >>>> >>>> (Nit: These goto labels are a bit confusing and might need some cleanup >>>> in the future.) >>> >>> Agreed, maybe we should simply handle that right away and replace the "goto out;" users by "return 0;". >>> >>> Then, just copy the 3 LOC. >>> >>> For mm/memory.c that would be: >>> >>> diff --git a/mm/memory.c b/mm/memory.c >>> index 67496dc5064f..410ba50ca746 100644 >>> --- a/mm/memory.c >>> +++ b/mm/memory.c >>> @@ -5461,7 +5461,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) >>> if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { >>> pte_unmap_unlock(vmf->pte, vmf->ptl); >>> - goto out; >>> + return 0; >>> } >>> pte = pte_modify(old_pte, vma->vm_page_prot); >>> @@ -5528,15 +5528,14 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) >>> vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, >>> vmf->address, &vmf->ptl); >>> if (unlikely(!vmf->pte)) >>> - goto out; >>> + return 0; >>> if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { >>> pte_unmap_unlock(vmf->pte, vmf->ptl); >>> - goto out; >>> + return 0; >>> } >>> goto out_map; >>> } >>> -out: >>> if (nid != NUMA_NO_NODE) >>> task_numa_fault(last_cpupid, nid, nr_pages, flags); >>> return 0; >>> @@ -5552,7 +5551,9 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) >>> numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, >>> writable); >>> pte_unmap_unlock(vmf->pte, vmf->ptl); >>> - goto out; >>> + if (nid != NUMA_NO_NODE) >>> + task_numa_fault(last_cpupid, nid, nr_pages, flags); >>> + return 0; >>> } >> >> Looks good to me. Thanks. >> >> Hi Andrew, >> >> Should I resend this for an easy back porting? Or you want to fold David’s >> changes in directly? > > Note that I didn't touch huge_memory.c. So maybe just send a fixup on top? Got it. The fixup is attached. Best Regards, Yan, Zi
From c0494d569e77291f7f51abb16c2ceff0976371f4 Mon Sep 17 00:00:00 2001 From: Zi Yan <ziy@xxxxxxxxxx> Date: Thu, 8 Aug 2024 10:18:42 -0400 Subject: [PATCH] fixup! mm/numa: no task_numa_fault() call if page table is changed --- mm/huge_memory.c | 11 +++++------ mm/memory.c | 12 ++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a3c018f2b554..4e8746769a97 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1681,7 +1681,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1729,16 +1729,13 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } goto out_map; } -count_fault: if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); - -out: return 0; out_map: @@ -1750,7 +1747,9 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto count_fault; + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* diff --git a/mm/memory.c b/mm/memory.c index 503d493263df..410ba50ca746 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5461,7 +5461,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5528,18 +5528,16 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (unlikely(!vmf->pte)) - goto out; + return 0; if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } goto out_map; } -count_fault: if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, nr_pages, flags); -out: return 0; out_map: /* @@ -5553,7 +5551,9 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto count_fault; + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) -- 2.43.0
Attachment:
signature.asc
Description: OpenPGP digital signature