From: Kairui Song <kasong@xxxxxxxxxxx> Simply move the routine to a standalone function, having a cleaner split and avoid helpers being referenced corss multiple files. Basically no feature change, but the error path is very slightly different. Previously a mem_cgroup_swapin_charge_folio fail will cause direct OOM, now we go through the error checking path in do_swap_pte, if the page is already there, just return as the page fault was handled. Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> --- mm/memory.c | 42 +++------------------------------- mm/swap.h | 8 +++++++ mm/swap_state.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 39 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index f2bc6dd15eb8..e42fadc25268 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3937,7 +3937,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) swp_entry_t entry; pte_t pte; vm_fault_t ret = 0; - void *shadow = NULL; if (!pte_unmap_same(vmf)) goto out; @@ -4001,47 +4000,12 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { - /* - * Prevent parallel swapin from proceeding with - * the cache flag. Otherwise, another thread may - * finish swapin first, free the entry, and swapout - * reusing the same entry. It's undetectable as - * pte_same() returns true due to entry reuse. - */ - if (swapcache_prepare(entry)) { - /* Relax a bit to prevent rapid repeated page faults */ - schedule_timeout_uninterruptible(1); + /* skip swapcache and readahead */ + folio = swapin_direct(entry, GFP_HIGHUSER_MOVABLE, vmf); + if (PTR_ERR(folio) == -EBUSY) goto out; - } need_clear_cache = true; - - /* skip swapcache */ - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, - vma, vmf->address, false); page = &folio->page; - if (folio) { - __folio_set_locked(folio); - __folio_set_swapbacked(folio); - - if (mem_cgroup_swapin_charge_folio(folio, - vma->vm_mm, GFP_KERNEL, - entry)) { - ret = VM_FAULT_OOM; - goto out_page; - } - mem_cgroup_swapin_uncharge_swap(entry); - - shadow = get_shadow_from_swap_cache(entry); - if (shadow) - workingset_refault(folio, shadow); - - folio_add_lru(folio); - - /* To provide entry to swap_read_folio() */ - folio->swap = entry; - swap_read_folio(folio, true, NULL); - folio->private = NULL; - } } else { page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vmf); diff --git a/mm/swap.h b/mm/swap.h index fc2f6ade7f80..40e902812cc5 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -55,6 +55,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags, bool skip_if_exists); struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, struct mempolicy *mpol, pgoff_t ilx); +struct folio *swapin_direct(swp_entry_t entry, gfp_t flag, + struct vm_fault *vmf); struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); @@ -87,6 +89,12 @@ static inline struct folio *swap_cluster_readahead(swp_entry_t entry, return NULL; } +static inline struct folio *swapin_direct(swp_entry_t entry, gfp_t flag, + struct vm_fault *vmf) +{ + return NULL; +} + static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, struct vm_fault *vmf) { diff --git a/mm/swap_state.c b/mm/swap_state.c index bfc7e8c58a6d..0a3fa48b3893 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -879,6 +879,66 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, return folio; } +/** + * swapin_direct - swap in folios skipping swap cache and readahead + * @entry: swap entry of this memory + * @gfp_mask: memory allocation flags + * @vmf: fault information + * + * Returns the struct folio for entry and addr after the swap entry is read + * in. + */ +struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask, + struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct folio *folio; + void *shadow = NULL; + + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + return ERR_PTR(-EBUSY); + } + + /* skip swapcache */ + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, + vma, vmf->address, false); + if (folio) { + __folio_set_locked(folio); + __folio_set_swapbacked(folio); + + if (mem_cgroup_swapin_charge_folio(folio, + vma->vm_mm, GFP_KERNEL, + entry)) { + folio_unlock(folio); + folio_put(folio); + return NULL; + } + mem_cgroup_swapin_uncharge_swap(entry); + + shadow = get_shadow_from_swap_cache(entry); + if (shadow) + workingset_refault(folio, shadow); + + folio_add_lru(folio); + + /* To provide entry to swap_read_folio() */ + folio->swap = entry; + swap_read_folio(folio, true, NULL); + folio->private = NULL; + } + + return folio; +} + /** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory -- 2.43.0