On Thu, Feb 22, 2024 at 05:09:43PM +0100, David Hildenbrand wrote: > We always get a head page, so we can just naturally interpret is as a folio > (similar to other code). memfd seems rather confused about how to iterate over the page cache. Perhaps we could sort that out and then delete total_mapcount as a second patch? I haven't tested this at all, but ... Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> diff --git a/mm/memfd.c b/mm/memfd.c index d3a1ba4208c9..45e55b0e3cbe 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -29,28 +29,29 @@ #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE #define LAST_SCAN 4 /* about 150ms max */ +static bool memfd_extra_refs(struct folio *folio) +{ + return folio_ref_count(folio) - folio_mapcount(folio) != + folio_nr_pages(folio); +} + static void memfd_tag_pins(struct xa_state *xas) { - struct page *page; + struct folio *folio; int latency = 0; - int cache_count; lru_add_drain(); xas_lock_irq(xas); - xas_for_each(xas, page, ULONG_MAX) { - cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) - cache_count = HPAGE_PMD_NR; - - if (!xa_is_value(page) && - page_count(page) - total_mapcount(page) != cache_count) + xas_for_each(xas, folio, ULONG_MAX) { + /* Can we have shadow/swap entries in memfd? */ + if (xa_is_value(folio)) + continue; + + if (memfd_extra_refs(folio)) xas_set_mark(xas, MEMFD_TAG_PINNED); - if (cache_count != 1) - xas_set(xas, page->index + cache_count); - latency += cache_count; + latency++; if (latency < XA_CHECK_SCHED) continue; latency = 0; @@ -75,7 +76,6 @@ static void memfd_tag_pins(struct xa_state *xas) static int memfd_wait_for_pins(struct address_space *mapping) { XA_STATE(xas, &mapping->i_pages, 0); - struct page *page; int error, scan; memfd_tag_pins(&xas); @@ -83,7 +83,7 @@ static int memfd_wait_for_pins(struct address_space *mapping) error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { int latency = 0; - int cache_count; + struct folio *folio; if (!xas_marked(&xas, MEMFD_TAG_PINNED)) break; @@ -95,16 +95,10 @@ static int memfd_wait_for_pins(struct address_space *mapping) xas_set(&xas, 0); xas_lock_irq(&xas); - xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { + xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) { bool clear = true; - cache_count = 1; - if (!xa_is_value(page) && - PageTransHuge(page) && !PageHuge(page)) - cache_count = HPAGE_PMD_NR; - - if (!xa_is_value(page) && cache_count != - page_count(page) - total_mapcount(page)) { + if (memfd_extra_refs(folio)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still @@ -118,8 +112,7 @@ static int memfd_wait_for_pins(struct address_space *mapping) if (clear) xas_clear_mark(&xas, MEMFD_TAG_PINNED); - latency += cache_count; - if (latency < XA_CHECK_SCHED) + if (++latency < XA_CHECK_SCHED) continue; latency = 0;