overhead caclulation support Intended to be used only for experimental purpose. Not to be merged. Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxx> --- include/linux/mm.h | 3 +++ include/linux/vm_event_item.h | 4 ++++ kernel/sched/fair.c | 13 ++++++++----- mm/huge_memory.c | 1 + mm/kmmscand.c | 9 +++++++++ mm/memory.c | 12 ++++++++---- mm/vmstat.c | 4 ++++ 7 files changed, 37 insertions(+), 9 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 306452c11d31..7380aab1fa62 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -600,6 +600,7 @@ struct vm_fault { * page table to avoid allocation from * atomic context. */ + unsigned long start_time; }; /* @@ -690,6 +691,8 @@ void count_kmmscand_migrate_failed(void); void count_kmmscand_slowtier(void); void count_kmmscand_toptier(void); void count_kmmscand_idlepage(void); +void count_kmmscand_scan_oh(long delta); +void count_kmmscand_migration_oh(long delta); #endif #ifdef CONFIG_NUMA_BALANCING diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index b2ccd4f665aa..4c7eaea01f13 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -64,6 +64,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, + NUMA_TASK_WORK_OH, + NUMA_HF_MIGRATION_OH, #endif #ifdef CONFIG_KMMSCAND KMMSCAND_MM_SCANS, @@ -74,6 +76,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, KMMSCAND_SLOWTIER, KMMSCAND_TOPTIER, KMMSCAND_IDLEPAGE, + KMMSCAND_SCAN_OH, + KMMSCAND_MIGRATION_OH, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fbdca89c677f..d205be30ae6c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3299,6 +3299,7 @@ static void task_numa_work(struct callback_head *work) struct vma_iterator vmi; bool vma_pids_skipped; bool vma_pids_forced = false; + unsigned long old = jiffies; SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work)); @@ -3312,7 +3313,7 @@ static void task_numa_work(struct callback_head *work) * work. */ if (p->flags & PF_EXITING) - return; + goto out1; if (!mm->numa_next_scan) { mm->numa_next_scan = now + @@ -3324,7 +3325,7 @@ static void task_numa_work(struct callback_head *work) */ migrate = mm->numa_next_scan; if (time_before(now, migrate)) - return; + goto out1; if (p->numa_scan_period == 0) { p->numa_scan_period_max = task_scan_max(p); @@ -3333,7 +3334,7 @@ static void task_numa_work(struct callback_head *work) next_scan = now + msecs_to_jiffies(p->numa_scan_period); if (!try_cmpxchg(&mm->numa_next_scan, &migrate, next_scan)) - return; + goto out1; /* * Delay this task enough that another task of this mm will likely win @@ -3345,11 +3346,11 @@ static void task_numa_work(struct callback_head *work) pages <<= 20 - PAGE_SHIFT; /* MB in pages */ virtpages = pages * 8; /* Scan up to this much virtual space */ if (!pages) - return; + goto out1; if (!mmap_read_trylock(mm)) - return; + goto out1; /* * VMAs are skipped if the current PID has not trapped a fault within @@ -3526,6 +3527,8 @@ static void task_numa_work(struct callback_head *work) u64 diff = p->se.sum_exec_runtime - runtime; p->node_stamp += 32 * diff; } +out1: + __count_vm_events(NUMA_TASK_WORK_OH, jiffies_to_usecs(jiffies - old)); } void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ee335d96fc39..d948d1fbbffd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1250,6 +1250,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf) spin_unlock(vmf->ptl); } + __count_vm_events(NUMA_HF_MIGRATION_OH, jiffies_to_usecs(jiffies - vmf->start_time)); return 0; unlock_release: spin_unlock(vmf->ptl); diff --git a/mm/kmmscand.c b/mm/kmmscand.c index 70f588a210dd..bd2c65f38da2 100644 --- a/mm/kmmscand.c +++ b/mm/kmmscand.c @@ -644,8 +644,10 @@ static void kmmscand_cleanup_migration_list(struct mm_struct *mm) static void kmmscand_migrate_folio(void) { int ret = 0; + unsigned long tstart, tend; struct kmmscand_migrate_info *info, *tmp; + tstart = jiffies; spin_lock(&kmmscand_migrate_lock); if (!list_empty(&kmmscand_migrate_list.migrate_head)) { @@ -691,6 +693,8 @@ static void kmmscand_migrate_folio(void) } } spin_unlock(&kmmscand_migrate_lock); + tend = jiffies; + __count_vm_events(KMMSCAND_MIGRATION_OH, jiffies_to_usecs(tend - tstart)); } /* @@ -788,6 +792,8 @@ static unsigned long kmmscand_scan_mm_slot(void) unsigned int mm_slot_scan_period; unsigned long now; + + unsigned long tstart, tend; unsigned long mm_slot_next_scan; unsigned long mm_slot_scan_size; unsigned long scanned_size = 0; @@ -800,6 +806,7 @@ static unsigned long kmmscand_scan_mm_slot(void) struct vm_area_struct *vma = NULL; struct kmmscand_mm_slot *mm_slot; + tstart = jiffies; /* Retrieve mm */ spin_lock(&kmmscand_mm_lock); @@ -917,6 +924,8 @@ static unsigned long kmmscand_scan_mm_slot(void) } spin_unlock(&kmmscand_mm_lock); + tend = jiffies; + __count_vm_events(KMMSCAND_SCAN_OH, jiffies_to_usecs(tend - tstart)); return total; } diff --git a/mm/memory.c b/mm/memory.c index 75c2dfd04f72..baea436124b0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5590,7 +5590,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; + goto out; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5629,17 +5629,18 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) nid = target_nid; flags |= TNF_MIGRATED; task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + goto out; } flags |= TNF_MIGRATE_FAIL; vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (unlikely(!vmf->pte)) - return 0; + goto out; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; + goto out; } out_map: /* @@ -5656,6 +5657,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, nr_pages, flags); +out: + __count_vm_events(NUMA_HF_MIGRATION_OH, jiffies_to_usecs(jiffies - vmf->start_time)); return 0; } @@ -5858,6 +5861,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, .flags = flags, .pgoff = linear_page_index(vma, address), .gfp_mask = __get_fault_gfp_mask(vma), + .start_time = jiffies, }; struct mm_struct *mm = vma->vm_mm; unsigned long vm_flags = vma->vm_flags; diff --git a/mm/vmstat.c b/mm/vmstat.c index d758e7155042..b7fe51342970 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1339,6 +1339,8 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", + "numa_task_work_oh", + "numa_hf_migration_oh", #endif #ifdef CONFIG_KMMSCAND "nr_kmmscand_mm_scans", @@ -1349,6 +1351,8 @@ const char * const vmstat_text[] = { "nr_kmmscand_slowtier", "nr_kmmscand_toptier", "nr_kmmscand_idlepage", + "kmmscand_scan_oh", + "kmmscand_migration_oh", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", -- 2.39.3