numa_migrate_prep and mpol_misplaced presume callers are in the fault path. To enable migrations from the pagecache, re-using the same logic to handle migration prep would be preferable. Mildly refactor numa_migrate_prep and mpol_misplaced so that it may be called with (vmf=NULL) from non-faulting paths. Count non-fault events as page-cache numa hints instead of fault hints. Signed-off-by: Gregory Price <gourry@xxxxxxxxxx> --- include/linux/vm_event_item.h | 1 + mm/memory.c | 19 ++++++++++--------- mm/mempolicy.c | 25 +++++++++++++++++-------- mm/vmstat.c | 1 + 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 747943bc8cc2..b917bcfffe6d 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -62,6 +62,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HUGE_PTE_UPDATES, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, + NUMA_HINT_PAGE_CACHE, NUMA_PAGE_MIGRATE, #endif #ifdef CONFIG_MIGRATION diff --git a/mm/memory.c b/mm/memory.c index 1d97bdfd0ed6..8b664b08915c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5205,15 +5205,16 @@ static vm_fault_t do_fault(struct vm_fault *vmf) int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int page_nid, int *flags) { - struct vm_area_struct *vma = vmf->vma; - - /* Record the current PID acceesing VMA */ - vma_set_access_pid_bit(vma); - - count_vm_numa_event(NUMA_HINT_FAULTS); - if (page_nid == numa_node_id()) { - count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); - *flags |= TNF_FAULT_LOCAL; + /* If accessed in fault path, record the current PID acceesing VMA */ + if (vmf) { + vma_set_access_pid_bit(vmf->vma); + count_vm_numa_event(NUMA_HINT_FAULTS); + if (page_nid == numa_node_id()) { + count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + *flags |= TNF_FAULT_LOCAL; + } + } else { + count_vm_numa_event(NUMA_HINT_PAGE_CACHE); } return mpol_misplaced(folio, vmf, addr); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b858e22b259d..0f654aff477a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2726,12 +2726,16 @@ static void sp_free(struct sp_node *n) * mpol_misplaced - check whether current folio node is valid in policy * * @folio: folio to be checked - * @vmf: structure describing the fault + * @vmf: structure describing the fault (NULL if called outside fault path) * @addr: virtual address in @vma for shared policy lookup and interleave policy + * Ignored if vmf is NULL. * * Lookup current policy node id for vma,addr and "compare to" folio's - * node id. Policy determination "mimics" alloc_page_vma(). - * Called from fault path where we know the vma and faulting address. + * node id - or task's policy node id if vmf is NULL. Policy determination + * "mimics" alloc_page_vma() + * + * vmf must be non-NULL if called from fault path where we know the vma and + * faulting address. The PTL must be held by caller if vmf is not NULL. * * Return: NUMA_NO_NODE if the page is in a node that is valid for this * policy, or a suitable node ID to allocate a replacement folio from. @@ -2743,7 +2747,6 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, pgoff_t ilx; struct zoneref *z; int curnid = folio_nid(folio); - struct vm_area_struct *vma = vmf->vma; int thiscpu = raw_smp_processor_id(); int thisnid = numa_node_id(); int polnid = NUMA_NO_NODE; @@ -2753,18 +2756,24 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, * Make sure ptl is held so that we don't preempt and we * have a stable smp processor id */ - lockdep_assert_held(vmf->ptl); - pol = get_vma_policy(vma, addr, folio_order(folio), &ilx); + if (vmf) { + lockdep_assert_held(vmf->ptl); + pol = get_vma_policy(vmf->vma, addr, folio_order(folio), &ilx); + } else + pol = get_task_policy(current); + if (!(pol->flags & MPOL_F_MOF)) goto out; switch (pol->mode) { case MPOL_INTERLEAVE: - polnid = interleave_nid(pol, ilx); + polnid = vmf ? interleave_nid(pol, ilx) : + interleave_nodes(pol); break; case MPOL_WEIGHTED_INTERLEAVE: - polnid = weighted_interleave_nid(pol, ilx); + polnid = vmf ? weighted_interleave_nid(pol, ilx) : + weighted_interleave_nodes(pol); break; case MPOL_PREFERRED: diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc636..5a02e6ff043d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1316,6 +1316,7 @@ const char * const vmstat_text[] = { "numa_huge_pte_updates", "numa_hint_faults", "numa_hint_faults_local", + "numa_hint_page_cache", "numa_pages_migrated", #endif #ifdef CONFIG_MIGRATION -- 2.43.0