After page A on source node is migrated to page B on target node, hinting fault is recorded on the target node for B. On the source node there is another record for A, since a two-stage filter is used when migrating pages. Page A is no longer used after migration, so we have to erase its record. Signed-off-by: Hillf Danton <dhillf@xxxxxxxxx> --- --- a/include/linux/sched.h Sat Sep 14 16:57:32 2013 +++ b/include/linux/sched.h Sat Sep 14 17:22:20 2013 @@ -1440,12 +1440,13 @@ struct task_struct { #define TNF_NO_GROUP 0x02 #ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int last_node, int node, int pages, int flags); +extern void task_numa_fault(int last_cpupid, int src_node, + int dst_node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); #else -static inline void task_numa_fault(int last_node, int node, int pages, - int flags) +static inline void task_numa_fault(int last_cpupid, int src_node, + int dst_node, int pages, int flags) { } static inline pid_t task_numa_group_id(struct task_struct *p) --- a/kernel/sched/fair.c Wed Sep 11 18:33:00 2013 +++ b/kernel/sched/fair.c Sat Sep 14 17:57:16 2013 @@ -1493,7 +1493,8 @@ void task_numa_free(struct task_struct * /* * Got a PROT_NONE fault for a page on @node. */ -void task_numa_fault(int last_cpupid, int node, int pages, int flags) +void task_numa_fault(int last_cpupid, int src_node, int dst_node, + int pages, int flags) { struct task_struct *p = current; bool migrated = flags & TNF_MIGRATED; @@ -1559,7 +1560,15 @@ void task_numa_fault(int last_cpupid, in if (migrated) p->numa_pages_migrated += pages; - p->numa_faults_buffer[task_faults_idx(node, priv)] += pages; + p->numa_faults_buffer[task_faults_idx(dst_node, priv)] += pages; + + if (migrated) { + /* extract numa-hint faults from src node */ + int idx = task_faults_idx(src_node, priv); + + if (p->numa_faults[idx] >= pages) + p->numa_faults[idx] -= pages; + } } static void reset_ptenuma_scan(struct task_struct *p) --- a/mm/huge_memory.c Sat Sep 14 16:54:28 2013 +++ b/mm/huge_memory.c Sat Sep 14 17:46:54 2013 @@ -1295,6 +1295,7 @@ int do_huge_pmd_numa_page(struct mm_stru unsigned long haddr = addr & HPAGE_PMD_MASK; int page_nid = -1, this_nid = numa_node_id(); int target_nid, last_cpupid = -1; + int src_nid; bool page_locked; bool migrated = false; int flags = 0; @@ -1306,6 +1307,7 @@ int do_huge_pmd_numa_page(struct mm_stru page = pmd_page(pmd); BUG_ON(is_huge_zero_page(page)); page_nid = page_to_nid(page); + src_nid = page_nid; last_cpupid = page_cpupid_last(page); count_vm_numa_event(NUMA_HINT_FAULTS); if (page_nid == this_nid) @@ -1388,7 +1390,7 @@ out: page_unlock_anon_vma_read(anon_vma); if (page_nid != -1) - task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); + task_numa_fault(last_cpupid, src_nid, page_nid, HPAGE_PMD_NR, flags); return 0; } --- a/mm/memory.c Sat Sep 14 16:54:52 2013 +++ b/mm/memory.c Sat Sep 14 17:52:02 2013 @@ -3555,6 +3555,7 @@ int do_numa_page(struct mm_struct *mm, s struct page *page = NULL; spinlock_t *ptl; int page_nid = -1; + int src_nid; int last_cpupid; int target_nid; bool migrated = false; @@ -3597,6 +3598,7 @@ int do_numa_page(struct mm_struct *mm, s last_cpupid = page_cpupid_last(page); page_nid = page_to_nid(page); + src_nid = page_nid; target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(ptep, ptl); if (target_nid == -1) { @@ -3613,7 +3615,7 @@ int do_numa_page(struct mm_struct *mm, s out: if (page_nid != -1) - task_numa_fault(last_cpupid, page_nid, 1, flags); + task_numa_fault(last_cpupid, src_nid, page_nid, 1, flags); return 0; } @@ -3652,6 +3654,7 @@ static int do_pmd_numa_page(struct mm_st pte_t pteval = *pte; struct page *page; int page_nid = -1; + int src_nid; int target_nid; bool migrated = false; int flags = 0; @@ -3684,6 +3687,7 @@ static int do_pmd_numa_page(struct mm_st last_cpupid = page_cpupid_last(page); page_nid = page_to_nid(page); + src_nid = page_nid; target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(pte, ptl); if (target_nid != -1) { @@ -3697,7 +3701,7 @@ static int do_pmd_numa_page(struct mm_st } if (page_nid != -1) - task_numa_fault(last_cpupid, page_nid, 1, flags); + task_numa_fault(last_cpupid, src_nid, page_nid, 1, flags); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } --?韬{.n?壏?檩jg??a旃??迻)钋?韰骅w+h櫒璀?y/i?⒏?婃⒎?湼Щ烚m???)钋?痂燐^櫒?觥櫒ザ?v夗﨣趓O璁?f?i?⒏?