To avoid the PFRA reclaiming the page resided in the COWed PTE table, break COW when it using rmap to unmap all the processes. Signed-off-by: Chih-En Lin <shiyn.lin@xxxxxxxxx> --- include/linux/rmap.h | 2 ++ mm/page_vma_mapped.c | 5 +++++ mm/rmap.c | 2 +- mm/swapfile.c | 1 + mm/vmscan.c | 1 + 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b89b4b86951f8..5c7e3bedc068b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -312,6 +312,8 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, #define PVMW_SYNC (1 << 0) /* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) +/* Break COW PTE during the walking */ +#define PVMW_COW_PTE (1 << 2) struct page_vma_mapped_walk { unsigned long pfn; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 8e9e574d535aa..5008957bbe4a7 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -251,6 +251,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) step_forward(pvmw, PMD_SIZE); continue; } + + /* TODO: Does breaking COW PTE here is correct? */ + if (pvmw->flags & PVMW_COW_PTE) + handle_cow_pte(vma, pvmw->pmd, pvmw->address, false); + if (!map_pte(pvmw)) goto next_pte; this_pte: diff --git a/mm/rmap.c b/mm/rmap.c index 93d5a6f793d20..8f737cb44e48a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1477,7 +1477,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, unsigned long address, void *arg) { struct mm_struct *mm = vma->vm_mm; - DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); + DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_COW_PTE); pte_t pteval; struct page *subpage; bool anon_exclusive, ret = true; diff --git a/mm/swapfile.c b/mm/swapfile.c index 1fdccd2f1422e..ef4d3d81a824b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1916,6 +1916,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, do { cond_resched(); next = pmd_addr_end(addr, end); + handle_cow_pte(vma, pmd, addr, false); if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; ret = unuse_pte_range(vma, pmd, addr, next, type); diff --git a/mm/vmscan.c b/mm/vmscan.c index b2b1431352dcd..030fad3d310d9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1822,6 +1822,7 @@ static unsigned int shrink_page_list(struct list_head *page_list, /* * The folio is mapped into the page tables of one or more * processes. Try to unmap it here. + * It will write to the page tables, break COW PTE here. */ if (folio_mapped(folio)) { enum ttu_flags flags = TTU_BATCH_FLUSH; -- 2.37.3