On Sun, Jul 31, 2022 at 12:35:28AM +0800, Xin Hao wrote: > In many data center servers, the shared memory architectures is > Non-Uniform Memory Access (NUMA), remote numa node data access > often brings a high latency problem, but what we are easy to ignore > is that the page table remote numa access, It can also leads to a > performance degradation. > > So there add a new interface in /proc, This will help developers to > get more info about performance issues if they are caused by cross-NUMA. Interesting. The implementation seems rather more complex than necessary though. > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 2d04e3470d4c..a51befb47ea8 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -1999,4 +1999,133 @@ const struct file_operations proc_pid_numa_maps_operations = { > .release = proc_map_release, > }; > > +struct pgtable_numa_maps { > + unsigned long node[MAX_NUMNODES]; > +}; > + > +struct pgtable_numa_private { > + struct proc_maps_private proc_maps; > + struct pgtable_numa_maps md; > +}; struct pgtable_numa_private { struct proc_maps_private proc_maps; unsigned long node[MAX_NUMNODES]; }; > +static void gather_pgtable_stats(struct page *page, struct pgtable_numa_maps *md) > +{ > + md->node[page_to_nid(page)] += 1; > +} > + > +static struct page *can_gather_pgtable_numa_stats(pmd_t pmd, struct vm_area_struct *vma, > + unsigned long addr) > +{ > + struct page *page; > + int nid; > + > + if (!pmd_present(pmd)) > + return NULL; > + > + if (pmd_huge(pmd)) > + return NULL; > + > + page = pmd_page(pmd); > + nid = page_to_nid(page); > + if (!node_isset(nid, node_states[N_MEMORY])) > + return NULL; > + > + return page; > +} > + > +static int gather_pgtable_numa_stats(pmd_t *pmd, unsigned long addr, > + unsigned long end, struct mm_walk *walk) > +{ > + struct pgtable_numa_maps *md = walk->private; > + struct vm_area_struct *vma = walk->vma; > + struct page *page; > + > + if (pmd_huge(*pmd)) { > + struct page *pmd_page; > + > + pmd_page = virt_to_page(pmd); > + if (!pmd_page) > + return 0; > + > + if (!node_isset(page_to_nid(pmd_page), node_states[N_MEMORY])) > + return 0; > + > + gather_pgtable_stats(pmd_page, md); > + goto out; > + } > + > + page = can_gather_pgtable_numa_stats(*pmd, vma, addr); > + if (!page) > + return 0; > + > + gather_pgtable_stats(page, md); > + > +out: > + cond_resched(); > + return 0; > +} static int gather_pgtable_numa_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct pgtable_numa_private *priv = walk->private; struct vm_area_struct *vma = walk->vma; struct page *page; int nid; if (pmd_huge(*pmd)) { page = virt_to_page(pmd); } else { page = pmd_page(*pmd); } nid = page_to_nid(page); priv->node[nid]++; return 0; }