On Mon, May 06, 2024 at 09:50:10PM +0200, David Hildenbrand wrote: > On 06.05.24 21:29, Yosry Ahmed wrote: > > Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg, > > although some of those fields are not exposed anywhere. Commit > > 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats") > > changed this such that we only maintain the stats we actually expose > > per-memcg via a translation table. > > > > Additionally, commit 514462bbe927b ("memcg: warn for unexpected events > > and stats") added a warning if a per-memcg stat update is attempted for > > a stat that is not in the translation table. The warning started firing > > for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These > > stats are not maintained per-memcg, and hence are not in the translation > > table. > > > > Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and > > NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates > > the global per-node stats only. > > > > Reported-by: syzbot+9319a4268a640e26b72b@xxxxxxxxxxxxxxxxxxxxxxxxx > > Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@xxxxxxxxxx > > Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats") > > Acked-by: Shakeel Butt <shakeel.butt@xxxxxxxxx> > > Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx> > > --- > > mm/rmap.c | 15 +++++++++------ > > 1 file changed, 9 insertions(+), 6 deletions(-) > > > > diff --git a/mm/rmap.c b/mm/rmap.c > > index 12be4241474ab..ed7f820369864 100644 > > --- a/mm/rmap.c > > +++ b/mm/rmap.c > > @@ -1435,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio, > > struct page *page, int nr_pages, struct vm_area_struct *vma, > > enum rmap_level level) > > { > > + pg_data_t *pgdat = folio_pgdat(folio); > > int nr, nr_pmdmapped = 0; > > VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); > > nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); > > if (nr_pmdmapped) > > - __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ? > > + __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ? > > NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); > > if (nr) > > __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr); > > @@ -1493,6 +1494,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, > > enum rmap_level level) > > { > > atomic_t *mapped = &folio->_nr_pages_mapped; > > + pg_data_t *pgdat = folio_pgdat(folio); > > int last, nr = 0, nr_pmdmapped = 0; > > bool partially_mapped = false; > > enum node_stat_item idx; > > @@ -1540,13 +1542,14 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, > > } > > if (nr_pmdmapped) { > > + /* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */ > > if (folio_test_anon(folio)) > > - idx = NR_ANON_THPS; > > - else if (folio_test_swapbacked(folio)) > > - idx = NR_SHMEM_PMDMAPPED; > > + __lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped); > > else > > - idx = NR_FILE_PMDMAPPED; > > - __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped); > > + __mod_node_page_state(pgdat, > > folio_pgdat(folio) should fit here easily. :) > > But I would actually suggest something like the following in mm/rmap.c I am not a big fan of this. Not because I don't like the abstraction, but because I think it doesn't go all the way. It abstracts a very certain case: updating nr_pmdmapped for file folios. I think if we opt for abstracting the stats updates in mm/rmap.c, we should go all the way with something like the following (probably split as two patches: refactoring then bug fix). WDYT about the below? diff --git a/mm/rmap.c b/mm/rmap.c index 12be4241474ab..70d6f6309da01 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1269,6 +1269,28 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page, page); } +static void __foio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped) +{ + int idx; + + if (nr) { + idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; + __lruvec_stat_mod_folio(folio, idx, nr); + } + if (nr_pmdmapped) { + if (folio_test_anon(folio)) { + idx = NR_ANON_THPS; + __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped); + } else { + /* NR_*_PMDMAPPED are not maintained per-memcg */ + idx = folio_test_swapbacked(folio) ? + NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED; + __mod_node_page_state(folio_pgdat(folio), idx, + nr_pmdmapped); + } + } +} + static __always_inline void __folio_add_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, unsigned long address, rmap_t flags, enum rmap_level level) @@ -1276,10 +1298,6 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio, int i, nr, nr_pmdmapped = 0; nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); - if (nr_pmdmapped) - __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped); - if (nr) - __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr); if (unlikely(!folio_test_anon(folio))) { VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); @@ -1297,6 +1315,8 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio, __page_check_anon_rmap(folio, page, vma, address); } + __folio_mod_stat(folio, nr, nr_pmdmapped); + if (flags & RMAP_EXCLUSIVE) { switch (level) { case RMAP_LEVEL_PTE: @@ -1393,6 +1413,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, unsigned long address) { int nr = folio_nr_pages(folio); + int nr_pmdmapped = 0; VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); VM_BUG_ON_VMA(address < vma->vm_start || @@ -1425,10 +1446,10 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, atomic_set(&folio->_large_mapcount, 0); atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); SetPageAnonExclusive(&folio->page); - __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr); + nr_pmdmapped = nr; } - __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr); + __folio_mod_stat(folio, nr, nr_pmdmapped); } static __always_inline void __folio_add_file_rmap(struct folio *folio, @@ -1440,11 +1461,7 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio, VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); - if (nr_pmdmapped) - __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ? - NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); - if (nr) - __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr); + __folio_mod_stat(folio, nr, nr_pmdmapped); /* See comments in folio_add_anon_rmap_*() */ if (!folio_test_large(folio)) @@ -1539,19 +1556,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, break; } - if (nr_pmdmapped) { - if (folio_test_anon(folio)) - idx = NR_ANON_THPS; - else if (folio_test_swapbacked(folio)) - idx = NR_SHMEM_PMDMAPPED; - else - idx = NR_FILE_PMDMAPPED; - __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped); - } if (nr) { - idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; - __lruvec_stat_mod_folio(folio, idx, -nr); - /* * Queue anon large folio for deferred split if at least one * page of the folio is unmapped and at least one page @@ -1563,6 +1568,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, list_empty(&folio->_deferred_list)) deferred_split_folio(folio); } + __foio_mod_stat(folio, nr, nr_pmdmapped); /* * It would be tidy to reset folio_test_anon mapping when fully > > static void __folio_mod_node_file_state(folio, int nr_pages) > { > enum node_stat_item idx = NR_FILE_PMDMAPPED; > > if (folio_test_swapbacked(folio)) > idx = NR_SHMEM_PMDMAPPED; > > __mod_node_page_state(folio_pgdat(folio), idx, nr_pages); > } > > And then simply calling here > > __folio_mod_node_file_state(folio, -nr_pmdmapped); > > And likewise in __folio_add_file_rmap() > > > ... will be cleaner. > > In any case > > Acked-by: David Hildenbrand <david@xxxxxxxxxx> > > -- > Cheers, > > David / dhildenb >