Currently DAX folio/page reference counts are managed differently to normal pages. To allow these to be managed the same as normal pages introduce dax_insert_pfn_pmd. This will map the entire PMD-sized folio and take references as it would for a normally mapped page. This is distinct from the current mechanism, vmf_insert_pfn_pmd, which simply inserts a special devmap PMD entry into the page table without holding a reference to the page for the mapping. Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx> --- include/linux/huge_mm.h | 1 +- mm/huge_memory.c | 70 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b98a3cc..9207d8e 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t dax_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t dax_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); enum transparent_hugepage_flag { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e1f053e..a9874ac 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1202,6 +1202,76 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); +vm_fault_t dax_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long addr = vmf->address & PMD_MASK; + pmd_t *pmd = vmf->pmd; + struct mm_struct *mm = vma->vm_mm; + pmd_t entry; + spinlock_t *ptl; + pgtable_t pgtable = NULL; + struct folio *folio; + struct page *page; + + if (addr < vma->vm_start || addr >= vma->vm_end) + return VM_FAULT_SIGBUS; + + if (arch_needs_pgtable_deposit()) { + pgtable = pte_alloc_one(vma->vm_mm); + if (!pgtable) + return VM_FAULT_OOM; + } + + track_pfn_insert(vma, &vma->vm_page_prot, pfn); + + ptl = pmd_lock(mm, pmd); + if (!pmd_none(*pmd)) { + if (write) { + if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_huge_zero_pmd(*pmd)); + goto out_unlock; + } + entry = pmd_mkyoung(*pmd); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + if (pmdp_set_access_flags(vma, addr, pmd, entry, 1)) + update_mmu_cache_pmd(vma, addr, pmd); + } + + goto out_unlock; + } + + entry = pmd_mkhuge(pfn_t_pmd(pfn, vma->vm_page_prot)); + if (pfn_t_devmap(pfn)) + entry = pmd_mkdevmap(entry); + if (write) { + entry = pmd_mkyoung(pmd_mkdirty(entry)); + entry = maybe_pmd_mkwrite(entry, vma); + } + + if (pgtable) { + pgtable_trans_huge_deposit(mm, pmd, pgtable); + mm_inc_nr_ptes(mm); + pgtable = NULL; + } + + page = pfn_t_to_page(pfn); + folio = page_folio(page); + folio_get(folio); + folio_add_file_rmap_pmd(folio, page, vma); + add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR); + set_pmd_at(mm, addr, pmd, entry); + update_mmu_cache_pmd(vma, addr, pmd); + +out_unlock: + spin_unlock(ptl); + if (pgtable) + pte_free(mm, pgtable); + + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(dax_insert_pfn_pmd); + #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) { -- git-series 0.9.1