Hi Baolin, On Thu, Jun 06, 2024 at 07:58:55PM +0800, Baolin Wang wrote: > In the following patches, shmem will support the swap out of large folios, > which means the shmem mappings may contain large order swap entries, so an > 'orders' array is added for find_get_entries() and find_lock_entries() to > obtain the order size of shmem swap entries, which will help in the release > of shmem large folio swap entries. > > Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> > --- > mm/filemap.c | 27 +++++++++++++++++++++++++-- > mm/internal.h | 4 ++-- > mm/shmem.c | 17 +++++++++-------- > mm/truncate.c | 8 ++++---- > 4 files changed, 40 insertions(+), 16 deletions(-) > > diff --git a/mm/filemap.c b/mm/filemap.c > index 37061aafd191..47fcd9ee6012 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2036,14 +2036,24 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, > * Return: The number of entries which were found. > */ > unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, > - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) > + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices, > + int *orders) > { > XA_STATE(xas, &mapping->i_pages, *start); > struct folio *folio; > + int order; > > rcu_read_lock(); > while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { > indices[fbatch->nr] = xas.xa_index; > + if (orders) { > + if (!xa_is_value(folio)) > + order = folio_order(folio); > + else > + order = xa_get_order(xas.xa, xas.xa_index); > + > + orders[fbatch->nr] = order; > + } > if (!folio_batch_add(fbatch, folio)) > break; > } > @@ -2056,6 +2066,8 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, > folio = fbatch->folios[idx]; > if (!xa_is_value(folio)) > nr = folio_nr_pages(folio); > + else if (orders) > + nr = 1 << orders[idx]; > *start = indices[idx] + nr; > } > return folio_batch_count(fbatch); > @@ -2082,10 +2094,12 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, > * Return: The number of entries which were found. > */ > unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, > - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) > + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices, > + int *orders) > { > XA_STATE(xas, &mapping->i_pages, *start); > struct folio *folio; > + int order; > > rcu_read_lock(); > while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { > @@ -2099,9 +2113,16 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, > if (folio->mapping != mapping || > folio_test_writeback(folio)) > goto unlock; > + if (orders) > + order = folio_order(folio); > VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), > folio); > + } else if (orders) { > + order = xa_get_order(xas.xa, xas.xa_index); > } > + > + if (orders) > + orders[fbatch->nr] = order; > indices[fbatch->nr] = xas.xa_index; > if (!folio_batch_add(fbatch, folio)) > break; > @@ -2120,6 +2141,8 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, > folio = fbatch->folios[idx]; > if (!xa_is_value(folio)) > nr = folio_nr_pages(folio); > + else if (orders) > + nr = 1 << orders[idx]; > *start = indices[idx] + nr; > } > return folio_batch_count(fbatch); > diff --git a/mm/internal.h b/mm/internal.h > index 3419c329b3bc..0b5adb6c33cc 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -339,9 +339,9 @@ static inline void force_page_cache_readahead(struct address_space *mapping, > } > > unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, > - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); > + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices, int *orders); > unsigned find_get_entries(struct address_space *mapping, pgoff_t *start, > - pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); > + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices, int *orders); > void filemap_free_folio(struct address_space *mapping, struct folio *folio); > int truncate_inode_folio(struct address_space *mapping, struct folio *folio); > bool truncate_inode_partial_folio(struct folio *folio, loff_t start, > diff --git a/mm/shmem.c b/mm/shmem.c > index 0ac71580decb..28ba603d87b8 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -840,14 +840,14 @@ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap) > * Remove swap entry from page cache, free the swap and its page cache. > */ > static int shmem_free_swap(struct address_space *mapping, > - pgoff_t index, void *radswap) > + pgoff_t index, void *radswap, int order) > { > void *old; Matthew Wilcox suggested [1] returning the number of pages freed in shmem_free_swap(). [1] https://lore.kernel.org/all/ZQRf2pGWurrE0uO+@xxxxxxxxxxxxxxxxxxxx/ Which I submitted here: https://lore.kernel.org/all/20231028211518.3424020-5-da.gomez@xxxxxxxxxxx/ Do you agree with the suggestion? If so, could we update my patch to use free_swap_and_cache_nr() or include that here? > > old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); > if (old != radswap) > return -ENOENT; > - free_swap_and_cache(radix_to_swp_entry(radswap)); > + free_swap_and_cache_nr(radix_to_swp_entry(radswap), 1 << order); > return 0; > } > > @@ -981,6 +981,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, > pgoff_t end = (lend + 1) >> PAGE_SHIFT; > struct folio_batch fbatch; > pgoff_t indices[PAGEVEC_SIZE]; > + int orders[PAGEVEC_SIZE]; > struct folio *folio; > bool same_folio; > long nr_swaps_freed = 0; > @@ -996,15 +997,15 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, > folio_batch_init(&fbatch); > index = start; > while (index < end && find_lock_entries(mapping, &index, end - 1, > - &fbatch, indices)) { > + &fbatch, indices, orders)) { > for (i = 0; i < folio_batch_count(&fbatch); i++) { > folio = fbatch.folios[i]; > > if (xa_is_value(folio)) { > if (unfalloc) > continue; > - nr_swaps_freed += !shmem_free_swap(mapping, > - indices[i], folio); > + if (!shmem_free_swap(mapping, indices[i], folio, orders[i])) > + nr_swaps_freed += 1 << orders[i]; > continue; > } > > @@ -1058,7 +1059,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, > cond_resched(); > > if (!find_get_entries(mapping, &index, end - 1, &fbatch, > - indices)) { > + indices, orders)) { > /* If all gone or hole-punch or unfalloc, we're done */ > if (index == start || end != -1) > break; > @@ -1072,12 +1073,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, > if (xa_is_value(folio)) { > if (unfalloc) > continue; > - if (shmem_free_swap(mapping, indices[i], folio)) { > + if (shmem_free_swap(mapping, indices[i], folio, orders[i])) { > /* Swap was replaced by page: retry */ > index = indices[i]; > break; > } > - nr_swaps_freed++; > + nr_swaps_freed += 1 << orders[i]; > continue; > } > > diff --git a/mm/truncate.c b/mm/truncate.c > index 5ce62a939e55..3a4bc9dba451 100644 > --- a/mm/truncate.c > +++ b/mm/truncate.c > @@ -352,7 +352,7 @@ void truncate_inode_pages_range(struct address_space *mapping, > folio_batch_init(&fbatch); > index = start; > while (index < end && find_lock_entries(mapping, &index, end - 1, > - &fbatch, indices)) { > + &fbatch, indices, NULL)) { > truncate_folio_batch_exceptionals(mapping, &fbatch, indices); > for (i = 0; i < folio_batch_count(&fbatch); i++) > truncate_cleanup_folio(fbatch.folios[i]); > @@ -392,7 +392,7 @@ void truncate_inode_pages_range(struct address_space *mapping, > while (index < end) { > cond_resched(); > if (!find_get_entries(mapping, &index, end - 1, &fbatch, > - indices)) { > + indices, NULL)) { > /* If all gone from start onwards, we're done */ > if (index == start) > break; > @@ -496,7 +496,7 @@ unsigned long mapping_try_invalidate(struct address_space *mapping, > int i; > > folio_batch_init(&fbatch); > - while (find_lock_entries(mapping, &index, end, &fbatch, indices)) { > + while (find_lock_entries(mapping, &index, end, &fbatch, indices, NULL)) { > for (i = 0; i < folio_batch_count(&fbatch); i++) { > struct folio *folio = fbatch.folios[i]; > > @@ -622,7 +622,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, > > folio_batch_init(&fbatch); > index = start; > - while (find_get_entries(mapping, &index, end, &fbatch, indices)) { > + while (find_get_entries(mapping, &index, end, &fbatch, indices, NULL)) { > for (i = 0; i < folio_batch_count(&fbatch); i++) { > struct folio *folio = fbatch.folios[i]; > > -- > 2.39.3 > Daniel