Like copy_pte_range()/zap_pte_range(), make mm counter batch updating in filemap_map_pages(), the 'lat_pagefault -P 1 file' test from lmbench shows 12% improve, and the percpu_counter_add_batch() is gone from perf flame graph. Signed-off-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx> --- include/linux/mm.h | 14 ++++++++++++++ mm/filemap.c | 19 +++++++++++-------- mm/memory.c | 14 -------------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6ad440ac3706..c7dffd358088 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2655,6 +2655,20 @@ static inline int mm_counter(struct folio *folio) return mm_counter_file(folio); } +static inline void init_rss_vec(int *rss) +{ + memset(rss, 0, sizeof(int) * NR_MM_COUNTERS); +} + +static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) +{ + int i; + + for (i = 0; i < NR_MM_COUNTERS; i++) + if (rss[i]) + add_mm_counter(mm, i, rss[i]); +} + static inline unsigned long get_mm_rss(struct mm_struct *mm) { return get_mm_counter(mm, MM_FILEPAGES) + diff --git a/mm/filemap.c b/mm/filemap.c index 2274e590bab4..d8b23e976a43 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3506,7 +3506,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas, static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, struct folio *folio, unsigned long start, unsigned long addr, unsigned int nr_pages, - unsigned int *mmap_miss) + int *rss, unsigned int *mmap_miss) { vm_fault_t ret = 0; struct page *page = folio_page(folio, start); @@ -3541,7 +3541,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, skip: if (count) { type = set_pte_range(vmf, folio, page, count, addr); - add_mm_counter(vmf->vma->vm_mm, type, count); + rss[type] += count; folio_ref_add(folio, count); if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; @@ -3556,7 +3556,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, if (count) { type = set_pte_range(vmf, folio, page, count, addr); - add_mm_counter(vmf->vma->vm_mm, type, count); + rss[type] += count; folio_ref_add(folio, count); if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; @@ -3569,7 +3569,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf, struct folio *folio, unsigned long addr, - unsigned int *mmap_miss) + int *rss, unsigned int *mmap_miss) { vm_fault_t ret = 0; struct page *page = &folio->page; @@ -3592,8 +3592,7 @@ static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf, if (vmf->address == addr) ret = VM_FAULT_NOPAGE; - add_mm_counter(vmf->vma->vm_mm, - set_pte_range(vmf, folio, page, 1, addr), 1); + rss[set_pte_range(vmf, folio, page, 1, addr)]++; folio_ref_inc(folio); return ret; @@ -3610,6 +3609,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, XA_STATE(xas, &mapping->i_pages, start_pgoff); struct folio *folio; vm_fault_t ret = 0; + int rss[NR_MM_COUNTERS]; unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved; rcu_read_lock(); @@ -3629,6 +3629,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, folio_put(folio); goto out; } + + init_rss_vec(rss); do { unsigned long end; @@ -3640,15 +3642,16 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, if (!folio_test_large(folio)) ret |= filemap_map_order0_folio(vmf, - folio, addr, &mmap_miss); + folio, addr, rss, &mmap_miss); else ret |= filemap_map_folio_range(vmf, folio, xas.xa_index - folio->index, addr, - nr_pages, &mmap_miss); + nr_pages, rss, &mmap_miss); folio_unlock(folio); folio_put(folio); } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL); + add_mm_rss_vec(vma->vm_mm, rss); pte_unmap_unlock(vmf->pte, vmf->ptl); out: rcu_read_unlock(); diff --git a/mm/memory.c b/mm/memory.c index 485ffec9d4c7..149208da1652 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -465,20 +465,6 @@ int __pte_alloc_kernel(pmd_t *pmd) return 0; } -static inline void init_rss_vec(int *rss) -{ - memset(rss, 0, sizeof(int) * NR_MM_COUNTERS); -} - -static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) -{ - int i; - - for (i = 0; i < NR_MM_COUNTERS; i++) - if (rss[i]) - add_mm_counter(mm, i, rss[i]); -} - /* * This function is called to print an error when a bad pte * is found. For example, we might have a PFN-mapped pte in -- 2.41.0