The patch titled Subject: fsdax: replace mmap entry in case of CoW has been added to the -mm mm-unstable branch. Its filename is fsdax-replace-mmap-entry-in-case-of-cow.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/fsdax-replace-mmap-entry-in-case-of-cow.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx> Subject: fsdax: replace mmap entry in case of CoW Date: Fri, 3 Jun 2022 13:37:34 +0800 Replace the existing entry to the newly allocated one in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this entry as writeprotected. This helps us snapshots so new write pagefaults after snapshots trigger a CoW. Link: https://lkml.kernel.org/r/20220603053738.1218681-11-ruansy.fnst@xxxxxxxxxxx Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx> Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx> Reviewed-by: Christoph Hellwig <hch@xxxxxx> Reviewed-by: Ritesh Harjani <riteshh@xxxxxxxxxxxxx> Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Dan Williams <dan.j.wiliams@xxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Dave Chinner <david@xxxxxxxxxxxxx> Cc: Goldwyn Rodrigues <rgoldwyn@xxxxxxx> Cc: Jane Chu <jane.chu@xxxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/dax.c | 77 ++++++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 35 deletions(-) --- a/fs/dax.c~fsdax-replace-mmap-entry-in-case-of-cow +++ a/fs/dax.c @@ -830,22 +830,42 @@ static int copy_cow_page_dax(struct vm_f } /* + * MAP_SYNC on a dax mapping guarantees dirty metadata is + * flushed on write-faults (non-cow), but not read-faults. + */ +static bool dax_fault_is_synchronous(const struct iomap_iter *iter, + struct vm_area_struct *vma) +{ + return (iter->flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) && + (iter->iomap.flags & IOMAP_F_DIRTY); +} + +static bool dax_fault_is_cow(const struct iomap_iter *iter) +{ + return (iter->flags & IOMAP_WRITE) && + (iter->iomap.flags & IOMAP_F_SHARED); +} + +/* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to * PTEs. If we happen to be trying to insert a PTE and there is a PMD * already in the tree, we will skip the insertion and just dirty the PMD as * appropriate. */ -static void *dax_insert_entry(struct xa_state *xas, - struct address_space *mapping, struct vm_fault *vmf, - void *entry, pfn_t pfn, unsigned long flags, bool dirty) +static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, + const struct iomap_iter *iter, void *entry, pfn_t pfn, + unsigned long flags) { + struct address_space *mapping = vmf->vma->vm_file->f_mapping; void *new_entry = dax_make_entry(pfn, flags); + bool dirty = !dax_fault_is_synchronous(iter, vmf->vma); + bool cow = dax_fault_is_cow(iter); if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { + if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) { unsigned long index = xas->xa_index; /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) @@ -857,12 +877,12 @@ static void *dax_insert_entry(struct xa_ xas_reset(xas); xas_lock_irq(xas); - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { void *old; dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, - false); + cow); /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -882,6 +902,9 @@ static void *dax_insert_entry(struct xa_ if (dirty) xas_set_mark(xas, PAGECACHE_TAG_DIRTY); + if (cow) + xas_set_mark(xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irq(xas); return entry; } @@ -1123,17 +1146,15 @@ static int dax_iomap_cow_copy(loff_t pos * If this page is ever written to we will re-fault and change the mapping to * point to real DAX storage instead. */ -static vm_fault_t dax_load_hole(struct xa_state *xas, - struct address_space *mapping, void **entry, - struct vm_fault *vmf) +static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf, + const struct iomap_iter *iter, void **entry) { - struct inode *inode = mapping->host; + struct inode *inode = iter->inode; unsigned long vaddr = vmf->address; pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); vm_fault_t ret; - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_ZERO_PAGE, false); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE); ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); trace_dax_load_hole(inode, vmf, ret); @@ -1142,7 +1163,7 @@ static vm_fault_t dax_load_hole(struct x #ifdef CONFIG_FS_DAX_PMD static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap *iomap, void **entry) + const struct iomap_iter *iter, void **entry) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; @@ -1160,8 +1181,8 @@ static vm_fault_t dax_pmd_load_hole(stru goto fallback; pfn = page_to_pfn_t(zero_page); - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_PMD | DAX_ZERO_PAGE, false); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, + DAX_PMD | DAX_ZERO_PAGE); if (arch_needs_pgtable_deposit()) { pgtable = pte_alloc_one(vma->vm_mm); @@ -1194,7 +1215,7 @@ fallback: } #else static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap *iomap, void **entry) + const struct iomap_iter *iter, void **entry) { return VM_FAULT_FALLBACK; } @@ -1440,17 +1461,6 @@ static vm_fault_t dax_fault_return(int e } /* - * MAP_SYNC on a dax mapping guarantees dirty metadata is - * flushed on write-faults (non-cow), but not read-faults. - */ -static bool dax_fault_is_synchronous(unsigned long flags, - struct vm_area_struct *vma, const struct iomap *iomap) -{ - return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) - && (iomap->flags & IOMAP_F_DIRTY); -} - -/* * When handling a synchronous page fault and the inode need a fsync, we can * insert the PTE/PMD into page tables only after that fsync happened. Skip * insertion for now and return the pfn so that caller can insert it after the @@ -1507,13 +1517,11 @@ static vm_fault_t dax_fault_iter(struct const struct iomap_iter *iter, pfn_t *pfnp, struct xa_state *xas, void **entry, bool pmd) { - struct address_space *mapping = vmf->vma->vm_file->f_mapping; const struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = &iter->srcmap; size_t size = pmd ? PMD_SIZE : PAGE_SIZE; loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; - bool write = vmf->flags & FAULT_FLAG_WRITE; - bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap); + bool write = iter->flags & IOMAP_WRITE; unsigned long entry_flags = pmd ? DAX_PMD : 0; int err = 0; pfn_t pfn; @@ -1526,8 +1534,8 @@ static vm_fault_t dax_fault_iter(struct if (!write && (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) { if (!pmd) - return dax_load_hole(xas, mapping, entry, vmf); - return dax_pmd_load_hole(xas, vmf, iomap, entry); + return dax_load_hole(xas, vmf, iter, entry); + return dax_pmd_load_hole(xas, vmf, iter, entry); } if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) { @@ -1539,8 +1547,7 @@ static vm_fault_t dax_fault_iter(struct if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags, - write && !sync); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags); if (write && srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) { @@ -1549,7 +1556,7 @@ static vm_fault_t dax_fault_iter(struct return dax_fault_return(err); } - if (sync) + if (dax_fault_is_synchronous(iter, vmf->vma)) return dax_fault_synchronous_pfnp(pfnp, pfn); /* insert PMD pfn */ _ Patches currently in -mm which might be from ruansy.fnst@xxxxxxxxxxx are dax-introduce-holder-for-dax_device.patch mm-factor-helpers-for-memory_failure_dev_pagemap.patch pagemappmem-introduce-memory_failure.patch fsdax-introduce-dax_lock_mapping_entry.patch mm-introduce-mf_dax_kill_procs-for-fsdax-case.patch xfs-implement-notify_failure-for-xfs.patch fsdax-set-a-cow-flag-when-associate-reflink-mappings.patch fsdax-output-address-in-dax_iomap_pfn-and-rename-it.patch fsdax-introduce-dax_iomap_cow_copy.patch fsdax-replace-mmap-entry-in-case-of-cow.patch fsdax-add-dax_iomap_cow_copy-for-dax-zero.patch fsdax-dedup-file-range-to-use-a-compare-function.patch xfs-support-cow-in-fsdax-mode.patch xfs-add-dax-dedupe-support.patch