Re: [PATCH 1/2] mm/numa: no task_numa_fault() call if page table is changed

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 8 Aug 2024, at 10:14, David Hildenbrand wrote:

> On 08.08.24 16:13, Zi Yan wrote:
>> On 8 Aug 2024, at 4:22, David Hildenbrand wrote:
>>
>>> On 08.08.24 05:19, Baolin Wang wrote:
>>>>
>>>>
>>>> On 2024/8/8 02:47, Zi Yan wrote:
>>>>> When handling a numa page fault, task_numa_fault() should be called by a
>>>>> process that restores the page table of the faulted folio to avoid
>>>>> duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce
>>>>> TLB flush via delaying mapping on hint page fault") restructured
>>>>> do_numa_page() and do_huge_pmd_numa_page() and did not avoid
>>>>> task_numa_fault() call in the second page table check after a numa
>>>>> migration failure. Fix it by making all !pte_same()/!pmd_same() return
>>>>> immediately.
>>>>>
>>>>> This issue can cause task_numa_fault() being called more than necessary
>>>>> and lead to unexpected numa balancing results (It is hard to tell whether
>>>>> the issue will cause positive or negative performance impact due to
>>>>> duplicated numa fault counting).
>>>>>
>>>>> Reported-by: "Huang, Ying" <ying.huang@xxxxxxxxx>
>>>>> Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/
>>>>> Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault")
>>>>> Cc: <stable@xxxxxxxxxxxxxxx>
>>>>> Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
>>>>
>>>> The fix looks reasonable to me. Feel free to add:
>>>> Reviewed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
>>>>
>>>> (Nit: These goto labels are a bit confusing and might need some cleanup
>>>> in the future.)
>>>
>>> Agreed, maybe we should simply handle that right away and replace the "goto out;" users by "return 0;".
>>>
>>> Then, just copy the 3 LOC.
>>>
>>> For mm/memory.c that would be:
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index 67496dc5064f..410ba50ca746 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -5461,7 +5461,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
>>>           if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
>>>                  pte_unmap_unlock(vmf->pte, vmf->ptl);
>>> -               goto out;
>>> +               return 0;
>>>          }
>>>           pte = pte_modify(old_pte, vma->vm_page_prot);
>>> @@ -5528,15 +5528,14 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
>>>                  vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
>>>                                                 vmf->address, &vmf->ptl);
>>>                  if (unlikely(!vmf->pte))
>>> -                       goto out;
>>> +                       return 0;
>>>                  if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
>>>                          pte_unmap_unlock(vmf->pte, vmf->ptl);
>>> -                       goto out;
>>> +                       return 0;
>>>                  }
>>>                  goto out_map;
>>>          }
>>>   -out:
>>>          if (nid != NUMA_NO_NODE)
>>>                  task_numa_fault(last_cpupid, nid, nr_pages, flags);
>>>          return 0;
>>> @@ -5552,7 +5551,9 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
>>>                  numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
>>>                                              writable);
>>>          pte_unmap_unlock(vmf->pte, vmf->ptl);
>>> -       goto out;
>>> +       if (nid != NUMA_NO_NODE)
>>> +               task_numa_fault(last_cpupid, nid, nr_pages, flags);
>>> +       return 0;
>>>   }
>>
>> Looks good to me. Thanks.
>>
>> Hi Andrew,
>>
>> Should I resend this for an easy back porting? Or you want to fold David’s
>> changes in directly?
>
> Note that I didn't touch huge_memory.c. So maybe just send a fixup on top?

Got it. The fixup is attached.

Best Regards,
Yan, Zi
From c0494d569e77291f7f51abb16c2ceff0976371f4 Mon Sep 17 00:00:00 2001
From: Zi Yan <ziy@xxxxxxxxxx>
Date: Thu, 8 Aug 2024 10:18:42 -0400
Subject: [PATCH] fixup! mm/numa: no task_numa_fault() call if page table is
 changed

---
 mm/huge_memory.c | 11 +++++------
 mm/memory.c      | 12 ++++++------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a3c018f2b554..4e8746769a97 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1681,7 +1681,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
 	if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
 		spin_unlock(vmf->ptl);
-		goto out;
+		return 0;
 	}
 
 	pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@@ -1729,16 +1729,13 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
 		if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
 			spin_unlock(vmf->ptl);
-			goto out;
+			return 0;
 		}
 		goto out_map;
 	}
 
-count_fault:
 	if (nid != NUMA_NO_NODE)
 		task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
-
-out:
 	return 0;
 
 out_map:
@@ -1750,7 +1747,9 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
 	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 	spin_unlock(vmf->ptl);
-	goto count_fault;
+	if (nid != NUMA_NO_NODE)
+		task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
+	return 0;
 }
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index 503d493263df..410ba50ca746 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5461,7 +5461,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 
 	if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
-		goto out;
+		return 0;
 	}
 
 	pte = pte_modify(old_pte, vma->vm_page_prot);
@@ -5528,18 +5528,16 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					       vmf->address, &vmf->ptl);
 		if (unlikely(!vmf->pte))
-			goto out;
+			return 0;
 		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
-			goto out;
+			return 0;
 		}
 		goto out_map;
 	}
 
-count_fault:
 	if (nid != NUMA_NO_NODE)
 		task_numa_fault(last_cpupid, nid, nr_pages, flags);
-out:
 	return 0;
 out_map:
 	/*
@@ -5553,7 +5551,9 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 		numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
 					    writable);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
-	goto count_fault;
+	if (nid != NUMA_NO_NODE)
+		task_numa_fault(last_cpupid, nid, nr_pages, flags);
+	return 0;
 }
 
 static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
-- 
2.43.0

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux