On 21-Jan-25 5:35 AM, Vinay Banakar wrote:
Sorry, the previous patch was unreadable due to damaged whitespace. Here is the same patch with fixed indentation. Signed-off-by: Vinay Banakar <vny@xxxxxxxxxx> --- mm/vmscan.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 74 insertions(+), 33 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index bd489c1af..1bd510622 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1035,6 +1035,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, struct folio_batch free_folios; LIST_HEAD(ret_folios); LIST_HEAD(demote_folios); + LIST_HEAD(pageout_list); unsigned int nr_reclaimed = 0; unsigned int pgactivate = 0; bool do_demote_pass; @@ -1351,39 +1352,9 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!sc->may_writepage) goto keep_locked; - /* - * Folio is dirty. Flush the TLB if a writable entry - * potentially exists to avoid CPU writes after I/O - * starts and then write it out here. - */ - try_to_unmap_flush_dirty(); - switch (pageout(folio, mapping, &plug)) { - case PAGE_KEEP: - goto keep_locked; - case PAGE_ACTIVATE: - goto activate_locked; - case PAGE_SUCCESS: - stat->nr_pageout += nr_pages; - - if (folio_test_writeback(folio)) - goto keep; - if (folio_test_dirty(folio)) - goto keep; - - /* - * A synchronous write - probably a ramdisk. Go - * ahead and try to reclaim the folio. - */ - if (!folio_trylock(folio)) - goto keep; - if (folio_test_dirty(folio) || - folio_test_writeback(folio)) - goto keep_locked; - mapping = folio_mapping(folio); - fallthrough; - case PAGE_CLEAN: - ; /* try to free the folio below */ - } + /* Add to pageout list for defered bio submissions */ + list_add(&folio->lru, &pageout_list); + continue;
The dirty pages are collected in a list here...
} /* @@ -1494,6 +1465,76 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, } /* 'folio_list' is always empty here */ + if (!list_empty(&pageout_list)) { + /* + * Batch TLB flushes by flushing once before processing all dirty pages. + * Since we operate on one PMD at a time, this batches TLB flushes at + * PMD granularity rather than per-page, reducing IPIs. + */ + struct address_space *mapping; + try_to_unmap_flush_dirty();
and one flush request is issued for the entire list. Where is the PMD level (512) batching done? Is that implicit elsewhere in the flow?
+ + while (!list_empty(&pageout_list)) { + struct folio *folio = lru_to_folio(&pageout_list); + list_del(&folio->lru); + + /* Recheck if page got reactivated */ + if (folio_test_active(folio) || + (folio_mapped(folio) && folio_test_young(folio))) + goto skip_pageout_locked; + + mapping = folio_mapping(folio); + pageout_t pageout_res = pageout(folio, mapping, &plug); + switch (pageout_res) { + case PAGE_KEEP: + goto skip_pageout_locked; + case PAGE_ACTIVATE: + goto skip_pageout_locked; + case PAGE_SUCCESS: + stat->nr_pageout += folio_nr_pages(folio); + + if (folio_test_writeback(folio) || + folio_test_dirty(folio)) + goto skip_pageout; + + /* + * A synchronous write - probably a ramdisk. Go + * ahead and try to reclaim the folio. + */ + if (!folio_trylock(folio)) + goto skip_pageout; + if (folio_test_dirty(folio) || + folio_test_writeback(folio)) + goto skip_pageout_locked; + + // Try to free the page + if (!mapping || + !__remove_mapping(mapping, folio, true, + sc->target_mem_cgroup)) + goto skip_pageout_locked; + + nr_reclaimed += folio_nr_pages(folio); + folio_unlock(folio); + continue; + + case PAGE_CLEAN: + if (!mapping || + !__remove_mapping(mapping, folio, true, + sc->target_mem_cgroup)) + goto skip_pageout_locked; + + nr_reclaimed += folio_nr_pages(folio); + folio_unlock(folio); + continue; + } + +skip_pageout_locked: + folio_unlock(folio); +skip_pageout: + list_add(&folio->lru, &ret_folios); + } + } + /* Migrate folios selected for demotion */ nr_reclaimed += demote_folio_list(&demote_folios, pgdat); /* Folios that could not be demoted are still in @demote_folios */
Regards, Bharata.