From: Kairui Song <kasong@xxxxxxxxxxx> After removing the cache bypass swapin, the first thing could be gone is all the clear_shadow_from_swap_cache calls. Currently clear_shadow_from_swap_cache is being called in many paths. It's currently being called by swap_range_free which has two direct callers: - swap_free_cluster, which is only called by put_swap_folio to free up the shadow of a slot cluster. - swap_entry_free, which is only called by swapcache_free_entries to free up shadow of a slot. And these two are very commonly used everywhere in SWAP codes. Notice the shadow is only written by __delete_from_swap_cache after after a successful SWAP out, so clearly we only want to clear shadow after SWAP in (the shadow is used and no longer needed) or Unmap/MADV_FREE. After all swapin is using cached swapin path, clear_shadow_from_swap_cache is not needed for swapin anymore, because we have to insert the folio first, and this already removed the shadow. So we only need to clear the shadow for Unmap/MADV_FREE. All direct/indirect caller of swap_free_cluster and swap_entry_free are listed below: - swap_free_cluster: -> put_swap_folio (Clean the cache flag and try delete shadow, after removing the cache or error handling) -> delete_from_swap_cache -> __remove_mapping -> shmem_writepage -> folio_alloc_swap -> add_to_swap -> __read_swap_cache_async - swap_entry_free -> swapcache_free_entries -> drain_slots_cache_cpu -> free_swap_slot -> put_swap_folio (Already covered above) -> __swap_entry_free / swap_free -> free_swap_and_cache (Called by Unmap/Zap/MADV_FREE) -> madvise_free_single_vma -> unmap_page_range -> shmem_undo_range -> swap_free (Called by swapin path) -> do_swap_page (Swapin path) -> alloc_swapdev_block/free_all_swap_pages () -> try_to_unmap_one (Error handling, no shadow) -> shmem_set_folio_swapin_error (Shadow just gone) -> shmem_swapin_folio (Shmem's do_swap_page) -> unuse_pte (Swapoff, which always use swapcache) So now we only need to call clear_shadow_from_swap_cache in free_swap_and_cache because all swapin/out will went through swap cache now. Previously all above functions could invoke clear_shadow_from_swap_cache in case a cache bypass swapin left a entry with uncleared shadow. Also make clear_shadow_from_swap_cache only clear one entry for simplicity. Test result of sequential swapin/out: Before (us) After (us) Swapout: 33624641 33648529 Swapin: 41614858 40667696 (+2.3%) Swapout (THP): 7795530 7658664 Swapin (THP) : 41708471 40602278 (+2.7%) Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> --- mm/swap.h | 6 ++---- mm/swap_state.c | 33 ++++++++------------------------- mm/swapfile.c | 6 ++++-- 3 files changed, 14 insertions(+), 31 deletions(-) diff --git a/mm/swap.h b/mm/swap.h index ac9573b03432..7721ddb3bdbc 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -39,8 +39,7 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, void __delete_from_swap_cache(struct folio *folio, swp_entry_t entry, void *shadow); void delete_from_swap_cache(struct folio *folio); -void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end); +void clear_shadow_from_swap_cache(swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct folio *filemap_get_incore_folio(struct address_space *mapping, @@ -148,8 +147,7 @@ static inline void delete_from_swap_cache(struct folio *folio) { } -static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end) +static inline void clear_shadow_from_swap_cache(swp_entry_t entry) { } diff --git a/mm/swap_state.c b/mm/swap_state.c index 49ef6250f676..b84e7b0ea4a5 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -245,34 +245,17 @@ void delete_from_swap_cache(struct folio *folio) folio_ref_sub(folio, folio_nr_pages(folio)); } -void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end) +void clear_shadow_from_swap_cache(swp_entry_t entry) { - unsigned long curr = begin; - void *old; - - for (;;) { - swp_entry_t entry = swp_entry(type, curr); - struct address_space *address_space = swap_address_space(entry); - XA_STATE(xas, &address_space->i_pages, curr); - - xas_set_update(&xas, workingset_update_node); + struct address_space *address_space = swap_address_space(entry); + XA_STATE(xas, &address_space->i_pages, swp_offset(entry)); - xa_lock_irq(&address_space->i_pages); - xas_for_each(&xas, old, end) { - if (!xa_is_value(old)) - continue; - xas_store(&xas, NULL); - } - xa_unlock_irq(&address_space->i_pages); + xas_set_update(&xas, workingset_update_node); - /* search the next swapcache until we meet end */ - curr >>= SWAP_ADDRESS_SPACE_SHIFT; - curr++; - curr <<= SWAP_ADDRESS_SPACE_SHIFT; - if (curr > end) - break; - } + xa_lock_irq(&address_space->i_pages); + if (xa_is_value(xas_load(&xas))) + xas_store(&xas, NULL); + xa_unlock_irq(&address_space->i_pages); } /* diff --git a/mm/swapfile.c b/mm/swapfile.c index ae8d3aa05df7..bafae23c0f26 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -724,7 +724,6 @@ static void add_to_avail_list(struct swap_info_struct *p) static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { - unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); @@ -748,7 +747,6 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_slot_free_notify(si->bdev, offset); offset++; } - clear_shadow_from_swap_cache(si->type, begin, end); /* * Make sure that try_to_unuse() observes si->inuse_pages reaching 0 @@ -1605,6 +1603,8 @@ bool folio_free_swap(struct folio *folio) /* * Free the swap entry like above, but also try to * free the page cache entry if it is the last user. + * Useful when clearing the swap map and swap cache + * without reading swap content (eg. unmap, MADV_FREE) */ int free_swap_and_cache(swp_entry_t entry) { @@ -1626,6 +1626,8 @@ int free_swap_and_cache(swp_entry_t entry) !swap_page_trans_huge_swapped(p, entry)) __try_to_reclaim_swap(p, swp_offset(entry), TTRS_UNMAPPED | TTRS_FULL); + if (!count) + clear_shadow_from_swap_cache(entry); put_swap_device(p); } return p != NULL; -- 2.43.0