From: Kairui Song <kasong@xxxxxxxxxxx> When a xa_value is returned by the cache lookup, keep it to be used later for workingset refault check instead of doing the looking up again in swapin_no_readahead. This does have a side effect of making swapoff also triggers workingset check, but should be fine since swapoff does affect the workload in many ways already. After this commit, swappin is about 4% faster for ZRAM, micro benchmark result which use madvise to swap out 10G zero-filled data to ZRAM then read them in: Before: 11143285 us After: 10692644 us (+4.1%) Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> --- mm/shmem.c | 2 +- mm/swap.h | 3 ++- mm/swap_state.c | 24 +++++++++++++----------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 928aa2304932..9da9f7a0e620 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1872,7 +1872,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, } /* Look it up and read it in.. */ - folio = swap_cache_get_folio(swap, NULL, 0); + folio = swap_cache_get_folio(swap, NULL, 0, NULL); if (!folio) { /* Or update major stats only when swapin succeeds?? */ if (fault_type) { diff --git a/mm/swap.h b/mm/swap.h index 1f4cdb324bf0..9180411afcfe 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -58,7 +58,8 @@ void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); struct folio *swap_cache_get_folio(swp_entry_t entry, - struct vm_area_struct *vma, unsigned long addr); + struct vm_area_struct *vma, unsigned long addr, + void **shadowp); struct folio *filemap_get_incore_folio(struct address_space *mapping, pgoff_t index); diff --git a/mm/swap_state.c b/mm/swap_state.c index f6f1e6f5d782..21badd4f0fc7 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -335,12 +335,18 @@ static inline bool swap_use_vma_readahead(void) * Caller must lock the swap device or hold a reference to keep it valid. */ struct folio *swap_cache_get_folio(swp_entry_t entry, - struct vm_area_struct *vma, unsigned long addr) + struct vm_area_struct *vma, unsigned long addr, void **shadowp) { struct folio *folio; - folio = filemap_get_folio(swap_address_space(entry), swp_offset(entry)); - if (!IS_ERR(folio)) { + folio = filemap_get_entry(swap_address_space(entry), swp_offset(entry)); + if (xa_is_value(folio)) { + if (shadowp) + *shadowp = folio; + return NULL; + } + + if (folio) { bool vma_ra = swap_use_vma_readahead(); bool readahead; @@ -370,8 +376,6 @@ struct folio *swap_cache_get_folio(swp_entry_t entry, if (!vma || !vma_ra) atomic_inc(&swapin_readahead_hits); } - } else { - folio = NULL; } return folio; @@ -876,11 +880,10 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, * in. */ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask, - struct vm_fault *vmf) + struct vm_fault *vmf, void *shadow) { struct vm_area_struct *vma = vmf->vma; struct folio *folio; - void *shadow = NULL; /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, @@ -897,7 +900,6 @@ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask, mem_cgroup_swapin_uncharge_swap(entry); - shadow = get_shadow_from_swap_cache(entry); if (shadow) workingset_refault(folio, shadow); @@ -931,17 +933,18 @@ struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask, { enum swap_cache_result cache_result; struct mempolicy *mpol; + void *shadow = NULL; struct folio *folio; pgoff_t ilx; - folio = swap_cache_get_folio(entry, vmf->vma, vmf->address); + folio = swap_cache_get_folio(entry, vmf->vma, vmf->address, &shadow); if (folio) { cache_result = SWAP_CACHE_HIT; goto done; } if (swap_use_no_readahead(swp_swap_info(entry), entry)) { - folio = swapin_direct(entry, gfp_mask, vmf); + folio = swapin_direct(entry, gfp_mask, vmf, shadow); cache_result = SWAP_CACHE_BYPASS; } else { mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx); @@ -952,7 +955,6 @@ struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask, mpol_cond_put(mpol); cache_result = SWAP_CACHE_MISS; } - done: if (result) *result = cache_result; -- 2.43.0