On Sat, Jun 22, 2024 at 6:48 PM <yangge1116@xxxxxxx> wrote: > > From: yangge <yangge1116@xxxxxxx> > > If a large number of CMA memory are configured in system (for example, the > CMA memory accounts for 50% of the system memory), starting a virtual > virtual machine, it will call pin_user_pages_remote(..., FOLL_LONGTERM, > ...) to pin memory. Normally if a page is present and in CMA area, > pin_user_pages_remote() will migrate the page from CMA area to non-CMA > area because of FOLL_LONGTERM flag. But the current code will cause the > migration failure due to unexpected page refcounts, and eventually cause > the virtual machine fail to start. > > If a page is added in LRU batch, its refcount increases one, remove the > page from LRU batch decreases one. Page migration requires the page is not > referenced by others except page mapping. Before migrating a page, we > should try to drain the page from LRU batch in case the page is in it, > however, folio_test_lru() is not sufficient to tell whether the page is > in LRU batch or not, if the page is in LRU batch, the migration will fail. > > To solve the problem above, we modify the logic of adding to LRU batch. > Before adding a page to LRU batch, we clear the LRU flag of the page so > that we can check whether the page is in LRU batch by folio_test_lru(page). > Seems making the LRU flag of the page invisible a long time is no problem, > because a new page is allocated from buddy and added to the lru batch, > its LRU flag is also not visible for a long time. > > Cc: <stable@xxxxxxxxxxxxxxx> you have Cced stable, what is the fixes tag? > Signed-off-by: yangge <yangge1116@xxxxxxx> > --- > mm/swap.c | 43 +++++++++++++++++++++++++++++++------------ > 1 file changed, 31 insertions(+), 12 deletions(-) > > diff --git a/mm/swap.c b/mm/swap.c > index dc205bd..9caf6b0 100644 > --- a/mm/swap.c > +++ b/mm/swap.c > @@ -211,10 +211,6 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) > for (i = 0; i < folio_batch_count(fbatch); i++) { > struct folio *folio = fbatch->folios[i]; > > - /* block memcg migration while the folio moves between lru */ > - if (move_fn != lru_add_fn && !folio_test_clear_lru(folio)) > - continue; > - > folio_lruvec_relock_irqsave(folio, &lruvec, &flags); > move_fn(lruvec, folio); > > @@ -255,11 +251,16 @@ static void lru_move_tail_fn(struct lruvec *lruvec, struct folio *folio) > void folio_rotate_reclaimable(struct folio *folio) > { > if (!folio_test_locked(folio) && !folio_test_dirty(folio) && > - !folio_test_unevictable(folio) && folio_test_lru(folio)) { > + !folio_test_unevictable(folio)) { > struct folio_batch *fbatch; > unsigned long flags; > > folio_get(folio); > + if (!folio_test_clear_lru(folio)) { > + folio_put(folio); > + return; > + } > + > local_lock_irqsave(&lru_rotate.lock, flags); > fbatch = this_cpu_ptr(&lru_rotate.fbatch); > folio_batch_add_and_move(fbatch, folio, lru_move_tail_fn); > @@ -352,11 +353,15 @@ static void folio_activate_drain(int cpu) > > void folio_activate(struct folio *folio) > { > - if (folio_test_lru(folio) && !folio_test_active(folio) && > - !folio_test_unevictable(folio)) { > + if (!folio_test_active(folio) && !folio_test_unevictable(folio)) { > struct folio_batch *fbatch; > > folio_get(folio); > + if (!folio_test_clear_lru(folio)) { > + folio_put(folio); > + return; > + } > + > local_lock(&cpu_fbatches.lock); > fbatch = this_cpu_ptr(&cpu_fbatches.activate); > folio_batch_add_and_move(fbatch, folio, folio_activate_fn); > @@ -700,6 +705,11 @@ void deactivate_file_folio(struct folio *folio) > return; > > folio_get(folio); > + if (!folio_test_clear_lru(folio)) { > + folio_put(folio); > + return; > + } > + > local_lock(&cpu_fbatches.lock); > fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file); > folio_batch_add_and_move(fbatch, folio, lru_deactivate_file_fn); > @@ -716,11 +726,16 @@ void deactivate_file_folio(struct folio *folio) > */ > void folio_deactivate(struct folio *folio) > { > - if (folio_test_lru(folio) && !folio_test_unevictable(folio) && > - (folio_test_active(folio) || lru_gen_enabled())) { > + if (!folio_test_unevictable(folio) && (folio_test_active(folio) || > + lru_gen_enabled())) { > struct folio_batch *fbatch; > > folio_get(folio); > + if (!folio_test_clear_lru(folio)) { > + folio_put(folio); > + return; > + } > + > local_lock(&cpu_fbatches.lock); > fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate); > folio_batch_add_and_move(fbatch, folio, lru_deactivate_fn); > @@ -737,12 +752,16 @@ void folio_deactivate(struct folio *folio) > */ > void folio_mark_lazyfree(struct folio *folio) > { > - if (folio_test_lru(folio) && folio_test_anon(folio) && > - folio_test_swapbacked(folio) && !folio_test_swapcache(folio) && > - !folio_test_unevictable(folio)) { > + if (folio_test_anon(folio) && folio_test_swapbacked(folio) && > + !folio_test_swapcache(folio) && !folio_test_unevictable(folio)) { > struct folio_batch *fbatch; > > folio_get(folio); > + if (!folio_test_clear_lru(folio)) { > + folio_put(folio); > + return; > + } > + > local_lock(&cpu_fbatches.lock); > fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree); > folio_batch_add_and_move(fbatch, folio, lru_lazyfree_fn); > -- > 2.7.4 >