On Thu, Aug 15, 2024 at 12:37 AM Usama Arif <usamaarif642@xxxxxxxxx> wrote: [snip] > >>>> > >>>> -void deferred_split_folio(struct folio *folio) > >>>> +void deferred_split_folio(struct folio *folio, bool partially_mapped) > >>>> { > >>>> struct deferred_split *ds_queue = get_deferred_split_queue(folio); > >>>> #ifdef CONFIG_MEMCG > >>>> @@ -3485,14 +3487,17 @@ void deferred_split_folio(struct folio *folio) > >>>> if (folio_test_swapcache(folio)) > >>>> return; > >>>> > >>>> - if (!list_empty(&folio->_deferred_list)) > >>>> - return; > >>>> - > >>>> spin_lock_irqsave(&ds_queue->split_queue_lock, flags); > >>>> + if (partially_mapped) > >>>> + folio_set_partially_mapped(folio); > >>>> + else > >>>> + folio_clear_partially_mapped(folio); > >>>> if (list_empty(&folio->_deferred_list)) { > >>>> - if (folio_test_pmd_mappable(folio)) > >>>> - count_vm_event(THP_DEFERRED_SPLIT_PAGE); > >>>> - count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); > >>>> + if (partially_mapped) { > >>>> + if (folio_test_pmd_mappable(folio)) > >>>> + count_vm_event(THP_DEFERRED_SPLIT_PAGE); > >>>> + count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); > >>> > >>> This code completely broke MTHP_STAT_SPLIT_DEFERRED for PMD_ORDER. It > >>> added the folio to the deferred_list as entirely_mapped > >>> (partially_mapped == false). > >>> However, when partially_mapped becomes true, there's no opportunity to > >>> add it again > >>> as it has been there on the list. Are you consistently seeing the counter for > >>> PMD_ORDER as 0? > >>> > >> > >> Ah I see it, this should fix it? > >> > >> -void deferred_split_folio(struct folio *folio) > >> +/* partially_mapped=false won't clear PG_partially_mapped folio flag */ > >> +void deferred_split_folio(struct folio *folio, bool partially_mapped) > >> { > >> struct deferred_split *ds_queue = get_deferred_split_queue(folio); > >> #ifdef CONFIG_MEMCG > >> @@ -3485,14 +3488,14 @@ void deferred_split_folio(struct folio *folio) > >> if (folio_test_swapcache(folio)) > >> return; > >> > >> - if (!list_empty(&folio->_deferred_list)) > >> - return; > >> - > >> spin_lock_irqsave(&ds_queue->split_queue_lock, flags); > >> - if (list_empty(&folio->_deferred_list)) { > >> + if (partially_mapped) { > >> + folio_set_partially_mapped(folio); > >> if (folio_test_pmd_mappable(folio)) > >> count_vm_event(THP_DEFERRED_SPLIT_PAGE); > >> count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); > >> + } > >> + if (list_empty(&folio->_deferred_list)) { > >> list_add_tail(&folio->_deferred_list, &ds_queue->split_queue); > >> ds_queue->split_queue_len++; > >> #ifdef CONFIG_MEMCG > >> > > > > not enough. as deferred_split_folio(true) won't be called if folio has been > > deferred_list in __folio_remove_rmap(): > > > > if (partially_mapped && folio_test_anon(folio) && > > list_empty(&folio->_deferred_list)) > > deferred_split_folio(folio, true); > > > > so you will still see 0. > > > > ah yes, Thanks. > > So below diff over the current v3 series should work for all cases: > > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index b4d72479330d..482e3ab60911 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -3483,6 +3483,7 @@ void __folio_undo_large_rmappable(struct folio *folio) > spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); > } > > +/* partially_mapped=false won't clear PG_partially_mapped folio flag */ > void deferred_split_folio(struct folio *folio, bool partially_mapped) > { > struct deferred_split *ds_queue = get_deferred_split_queue(folio); > @@ -3515,16 +3516,16 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped) > return; > > spin_lock_irqsave(&ds_queue->split_queue_lock, flags); > - if (partially_mapped) > + if (partially_mapped) { > folio_set_partially_mapped(folio); > - else > - folio_clear_partially_mapped(folio); > + if (folio_test_pmd_mappable(folio)) > + count_vm_event(THP_DEFERRED_SPLIT_PAGE); > + count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); > + } else { > + /* partially mapped folios cannont become partially unmapped */ > + VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio); > + } > if (list_empty(&folio->_deferred_list)) { > - if (partially_mapped) { > - if (folio_test_pmd_mappable(folio)) > - count_vm_event(THP_DEFERRED_SPLIT_PAGE); > - count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); > - } > list_add_tail(&folio->_deferred_list, &ds_queue->split_queue); > ds_queue->split_queue_len++; > #ifdef CONFIG_MEMCG > diff --git a/mm/rmap.c b/mm/rmap.c > index 9ad558c2bad0..4c330635aa4e 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1578,7 +1578,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, > * Check partially_mapped first to ensure it is a large folio. > */ > if (partially_mapped && folio_test_anon(folio) && > - list_empty(&folio->_deferred_list)) > + !folio_test_partially_mapped(folio)) > deferred_split_folio(folio, true); > > __folio_mod_stat(folio, -nr, -nr_pmdmapped); > This is an improvement, but there's still an issue. Two or more threads can execute deferred_split_folio() simultaneously, which could lead to DEFERRED_SPLIT being added multiple times. We should double-check the status after acquiring the spinlock. diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f401ceded697..3d247826fb95 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3607,10 +3607,12 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped) spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (partially_mapped) { - folio_set_partially_mapped(folio); - if (folio_test_pmd_mappable(folio)) - count_vm_event(THP_DEFERRED_SPLIT_PAGE); - count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); + if (!folio_test_partially_mapped(folio)) { + folio_set_partially_mapped(folio); + if (folio_test_pmd_mappable(folio)) + count_vm_event(THP_DEFERRED_SPLIT_PAGE); + count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED); + } } else { /* partially mapped folios cannont become partially unmapped */ VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);