When the CPU touches a zone device THP entry, the data needs to be migrated back to the CPU, call migrate_to_ram() on these pages via do_huge_pmd_device_private() fault handling helper. Signed-off-by: Balbir Singh <balbirs@xxxxxxxxxx> --- include/linux/huge_mm.h | 7 +++++++ mm/huge_memory.c | 35 +++++++++++++++++++++++++++++++++++ mm/memory.c | 6 ++++-- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e893d546a49f..ad0c0ccfcbc2 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -479,6 +479,8 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); +vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf); + extern struct folio *huge_zero_folio; extern unsigned long huge_zero_pfn; @@ -634,6 +636,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } +static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) +{ + return 0; +} + static inline bool is_huge_zero_folio(const struct folio *folio) { return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d8e018d1bdbd..995ac8be5709 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1375,6 +1375,41 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) return __do_huge_pmd_anonymous_page(vmf); } +vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; + vm_fault_t ret; + spinlock_t *ptl; + swp_entry_t swp_entry; + struct page *page; + + if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER)) + return VM_FAULT_FALLBACK; + + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + + ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) { + spin_unlock(ptl); + return 0; + } + + swp_entry = pmd_to_swp_entry(vmf->orig_pmd); + page = pfn_swap_entry_to_page(swp_entry); + vmf->page = page; + vmf->pte = NULL; + get_page(page); + spin_unlock(ptl); + ret = page_pgmap(page)->ops->migrate_to_ram(vmf); + put_page(page); + + return ret; +} + static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write, pgtable_t pgtable) diff --git a/mm/memory.c b/mm/memory.c index a838c8c44bfd..deaa67b88708 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6149,8 +6149,10 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, vmf.orig_pmd = pmdp_get_lockless(vmf.pmd); if (unlikely(is_swap_pmd(vmf.orig_pmd))) { - VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(vmf.orig_pmd)); + if (is_device_private_entry( + pmd_to_swp_entry(vmf.orig_pmd))) + return do_huge_pmd_device_private(&vmf); + if (is_pmd_migration_entry(vmf.orig_pmd)) pmd_migration_entry_wait(mm, vmf.pmd); return 0; -- 2.48.1