From: David Stevens <stevensd@xxxxxxxxxxxx> Collapsing memory in a vma that has an armed userfaultfd results in zero-filling any missing pages, which breaks user-space paging for those filled pages. Avoid khugepage bypassing userfaultfd by not collapsing pages in shmem reached via scanning a vma with an armed userfaultfd if doing so would zero-fill any pages. Signed-off-by: David Stevens <stevensd@xxxxxxxxxxxx> --- mm/khugepaged.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 79be13133322..48e944fb8972 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1736,8 +1736,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * + restore gaps in the page cache; * + unlock and free huge page; */ -static int collapse_file(struct mm_struct *mm, unsigned long addr, - struct file *file, pgoff_t start, +static int collapse_file(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, struct file *file, pgoff_t start, struct collapse_control *cc) { struct address_space *mapping = file->f_mapping; @@ -1784,6 +1784,9 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, * be able to map it or use it in another way until we unlock it. */ + if (is_shmem) + mmap_read_lock(mm); + xas_set(&xas, start); for (index = start; index < end; index++) { struct page *page = xas_next(&xas); @@ -1792,6 +1795,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, VM_BUG_ON(index != xas.xa_index); if (is_shmem) { if (!page) { + if (userfaultfd_armed(vma)) { + result = SCAN_EXCEED_NONE_PTE; + goto xa_locked; + } /* * Stop if extent has been truncated or * hole-punched, and is now completely @@ -2095,6 +2102,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, hpage->mapping = NULL; } + if (is_shmem) + mmap_read_unlock(mm); if (hpage) unlock_page(hpage); out: @@ -2108,8 +2117,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, return result; } -static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, - struct file *file, pgoff_t start, +static int hpage_collapse_scan_file(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, struct file *file, pgoff_t start, struct collapse_control *cc) { struct page *page = NULL; @@ -2118,6 +2127,9 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, int present, swap; int node = NUMA_NO_NODE; int result = SCAN_SUCCEED; + bool uffd_was_armed = userfaultfd_armed(vma); + + mmap_read_unlock(mm); present = 0; swap = 0; @@ -2193,13 +2205,16 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, } rcu_read_unlock(); + if (uffd_was_armed && present < HPAGE_PMD_NR) + result = SCAN_EXCEED_SWAP_PTE; + if (result == SCAN_SUCCEED) { if (cc->is_khugepaged && present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { result = SCAN_EXCEED_NONE_PTE; count_vm_event(THP_SCAN_EXCEED_NONE_PTE); } else { - result = collapse_file(mm, addr, file, start, cc); + result = collapse_file(mm, vma, addr, file, start, cc); } } @@ -2207,8 +2222,8 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, return result; } #else -static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, - struct file *file, pgoff_t start, +static int hpage_collapse_scan_file(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, struct file *file, pgoff_t start, struct collapse_control *cc) { BUILD_BUG(); @@ -2304,8 +2319,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, pgoff_t pgoff = linear_page_index(vma, khugepaged_scan.address); - mmap_read_unlock(mm); - *result = hpage_collapse_scan_file(mm, + *result = hpage_collapse_scan_file(mm, vma, khugepaged_scan.address, file, pgoff, cc); mmap_locked = false; @@ -2656,9 +2670,8 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); - mmap_read_unlock(mm); mmap_locked = false; - result = hpage_collapse_scan_file(mm, addr, file, pgoff, + result = hpage_collapse_scan_file(mm, vma, addr, file, pgoff, cc); fput(file); } else { -- 2.39.1.456.gfc5497dd1b-goog