The patch titled Subject: huge_memory: add vmf_insert_folio_pmd() has been added to the -mm mm-unstable branch. Its filename is huge_memory-add-vmf_insert_folio_pmd.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/huge_memory-add-vmf_insert_folio_pmd.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Alistair Popple <apopple@xxxxxxxxxx> Subject: huge_memory: add vmf_insert_folio_pmd() Date: Wed, 5 Feb 2025 09:48:13 +1100 Currently DAX folio/page reference counts are managed differently to normal pages. To allow these to be managed the same as normal pages introduce vmf_insert_folio_pmd. This will map the entire PMD-sized folio and take references as it would for a normally mapped page. This is distinct from the current mechanism, vmf_insert_pfn_pmd, which simply inserts a special devmap PMD entry into the page table without holding a reference to the page for the mapping. It is not currently useful to implement a more generic vmf_insert_folio() which selects the correct behaviour based on folio_order(). This is because PTE faults require only a subpage of the folio to be PTE mapped rather than the entire folio. It would be possible to add this context somewhere but callers already need to handle PTE faults and PMD faults separately so a more generic function is not useful. Link: https://lkml.kernel.org/r/9f10e88441f3cb26eff6be0c9ef5997844c8c24e.1738709036.git-series.apopple@xxxxxxxxxx Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx> Tested-by: Alison Schofield <alison.schofield@xxxxxxxxx> Cc: Alexander Gordeev <agordeev@xxxxxxxxxxxxx> Cc: Asahi Lina <lina@xxxxxxxxxxxxx> Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Christian Borntraeger <borntraeger@xxxxxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Chunyan Zhang <zhang.lyra@xxxxxxxxx> Cc: Dan Wiliams <dan.j.williams@xxxxxxxxx> Cc: "Darrick J. Wong" <djwong@xxxxxxxxxx> Cc: Dave Chinner <david@xxxxxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Dave Jiang <dave.jiang@xxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Gerald Schaefer <gerald.schaefer@xxxxxxxxxxxxx> Cc: Heiko Carstens <hca@xxxxxxxxxxxxx> Cc: Huacai Chen <chenhuacai@xxxxxxxxxx> Cc: Ira Weiny <ira.weiny@xxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxx> Cc: John Hubbard <jhubbard@xxxxxxxxxx> Cc: linmiaohe <linmiaohe@xxxxxxxxxx> Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx> Cc: Mattew Wilcox <willy@xxxxxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Sven Schnelle <svens@xxxxxxxxxxxxx> Cc: Ted Ts'o <tytso@xxxxxxx> Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx> Cc: Vishal Verma <vishal.l.verma@xxxxxxxxx> Cc: Vivek Goyal <vgoyal@xxxxxxxxxx> Cc: WANG Xuerui <kernel@xxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/huge_mm.h | 1 mm/huge_memory.c | 61 +++++++++++++++++++++++++++++++------- 2 files changed, 51 insertions(+), 11 deletions(-) --- a/include/linux/huge_mm.h~huge_memory-add-vmf_insert_folio_pmd +++ a/include/linux/huge_mm.h @@ -39,6 +39,7 @@ int change_huge_pmd(struct mmu_gather *t vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, bool write); vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, bool write); enum transparent_hugepage_flag { --- a/mm/huge_memory.c~huge_memory-add-vmf_insert_folio_pmd +++ a/mm/huge_memory.c @@ -1375,20 +1375,20 @@ vm_fault_t do_huge_pmd_anonymous_page(st return __do_huge_pmd_anonymous_page(vmf); } -static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, +static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write, pgtable_t pgtable) { struct mm_struct *mm = vma->vm_mm; pmd_t entry; - spinlock_t *ptl; - ptl = pmd_lock(mm, pmd); + lockdep_assert_held(pmd_lockptr(mm, pmd)); + if (!pmd_none(*pmd)) { if (write) { if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) { WARN_ON_ONCE(!is_huge_zero_pmd(*pmd)); - goto out_unlock; + return -EEXIST; } entry = pmd_mkyoung(*pmd); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); @@ -1396,7 +1396,7 @@ static void insert_pfn_pmd(struct vm_are update_mmu_cache_pmd(vma, addr, pmd); } - goto out_unlock; + return -EEXIST; } entry = pmd_mkhuge(pfn_t_pmd(pfn, prot)); @@ -1417,11 +1417,7 @@ static void insert_pfn_pmd(struct vm_are set_pmd_at(mm, addr, pmd, entry); update_mmu_cache_pmd(vma, addr, pmd); - -out_unlock: - spin_unlock(ptl); - if (pgtable) - pte_free(mm, pgtable); + return 0; } /** @@ -1440,6 +1436,8 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_ struct vm_area_struct *vma = vmf->vma; pgprot_t pgprot = vma->vm_page_prot; pgtable_t pgtable = NULL; + spinlock_t *ptl; + int error; /* * If we had pmd_special, we could avoid all these restrictions, @@ -1462,12 +1460,53 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_ } track_pfn_insert(vma, &pgprot, pfn); + ptl = pmd_lock(vma->vm_mm, vmf->pmd); + error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); + spin_unlock(ptl); + if (error && pgtable) + pte_free(vma->vm_mm, pgtable); - insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); +vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, bool write) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long addr = vmf->address & PMD_MASK; + struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; + pgtable_t pgtable = NULL; + int error; + + if (addr < vma->vm_start || addr >= vma->vm_end) + return VM_FAULT_SIGBUS; + + if (WARN_ON_ONCE(folio_order(folio) != PMD_ORDER)) + return VM_FAULT_SIGBUS; + + if (arch_needs_pgtable_deposit()) { + pgtable = pte_alloc_one(vma->vm_mm); + if (!pgtable) + return VM_FAULT_OOM; + } + + ptl = pmd_lock(mm, vmf->pmd); + if (pmd_none(*vmf->pmd)) { + folio_get(folio); + folio_add_file_rmap_pmd(folio, &folio->page, vma); + add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR); + } + error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn_to_pfn_t(folio_pfn(folio)), + vma->vm_page_prot, write, pgtable); + spin_unlock(ptl); + if (error && pgtable) + pte_free(mm, pgtable); + + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(vmf_insert_folio_pmd); + #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) { _ Patches currently in -mm which might be from apopple@xxxxxxxxxx are fuse-fix-dax-truncate-punch_hole-fault-path.patch fs-dax-return-unmapped-busy-pages-from-dax_layout_busy_page_range.patch fs-dax-dont-skip-locked-entries-when-scanning-entries.patch fs-dax-refactor-wait-for-dax-idle-page.patch fs-dax-create-a-common-implementation-to-break-dax-layouts.patch fs-dax-always-remove-dax-page-cache-entries-when-breaking-layouts.patch fs-dax-ensure-all-pages-are-idle-prior-to-filesystem-unmount.patch fs-dax-remove-page_mapping_dax_shared-mapping-flag.patch mm-gup-remove-redundant-check-for-pci-p2pdma-page.patch mm-mm_init-move-p2pdma-page-refcount-initialisation-to-p2pdma.patch mm-allow-compound-zone-device-pages.patch mm-memory-enhance-insert_page_into_pte_locked-to-create-writable-mappings.patch mm-memory-add-vmf_insert_page_mkwrite.patch rmap-add-support-for-pud-sized-mappings-to-rmap.patch huge_memory-add-vmf_insert_folio_pud.patch huge_memory-add-vmf_insert_folio_pmd.patch mm-gup-dont-allow-foll_longterm-pinning-of-fs-dax-pages.patch fs-dax-properly-refcount-fs-dax-pages.patch device-dax-properly-refcount-device-dax-pages-when-mapping.patch