From: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx> Add a new vm_operations struct btrfs_dax_vm_ops specifically for dax files. Since we will be removing(nulling) readpages/writepages for dax return ENOEXEC only for non-dax files. dax_insert_entry() looks ugly. Do you think we should break it into dax_insert_cow_entry() and dax_insert_entry()? Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx> --- fs/btrfs/ctree.h | 1 + fs/btrfs/dax.c | 11 +++++++++++ fs/btrfs/file.c | 18 ++++++++++++++++-- fs/dax.c | 17 ++++++++++------- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3bcd2a4959c1..0e5060933bde 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3802,6 +3802,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err); /* dax.c */ ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to); ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from); +vm_fault_t btrfs_dax_fault(struct vm_fault *vmf); #else static inline ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from) { diff --git a/fs/btrfs/dax.c b/fs/btrfs/dax.c index 49619fe3f94f..927f962d1e88 100644 --- a/fs/btrfs/dax.c +++ b/fs/btrfs/dax.c @@ -157,4 +157,15 @@ ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *iter) } return ret; } + +vm_fault_t btrfs_dax_fault(struct vm_fault *vmf) +{ + vm_fault_t ret; + pfn_t pfn; + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &pfn, NULL, &btrfs_iomap_ops); + if (ret & VM_FAULT_NEEDDSYNC) + ret = dax_finish_sync_fault(vmf, PE_SIZE_PTE, pfn); + + return ret; +} #endif /* CONFIG_FS_DAX */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3b320d0ab495..196c8f37ff9d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2214,15 +2214,29 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { .page_mkwrite = btrfs_page_mkwrite, }; +#ifdef CONFIG_FS_DAX +static const struct vm_operations_struct btrfs_dax_vm_ops = { + .fault = btrfs_dax_fault, + .page_mkwrite = btrfs_dax_fault, + .pfn_mkwrite = btrfs_dax_fault, +}; +#else +#define btrfs_dax_vm_ops btrfs_file_vm_ops +#endif + static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) { struct address_space *mapping = filp->f_mapping; + struct inode *inode = file_inode(filp); - if (!mapping->a_ops->readpage) + if (!IS_DAX(inode) && !mapping->a_ops->readpage) return -ENOEXEC; file_accessed(filp); - vma->vm_ops = &btrfs_file_vm_ops; + if (IS_DAX(inode)) + vma->vm_ops = &btrfs_dax_vm_ops; + else + vma->vm_ops = &btrfs_file_vm_ops; return 0; } diff --git a/fs/dax.c b/fs/dax.c index 21ee3df6f02c..41061da42771 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -708,14 +708,15 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, */ static void *dax_insert_entry(struct xa_state *xas, struct address_space *mapping, struct vm_fault *vmf, - void *entry, pfn_t pfn, unsigned long flags, bool dirty) + void *entry, pfn_t pfn, unsigned long flags, bool dirty, + bool cow) { void *new_entry = dax_make_entry(pfn, flags); if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { + if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) { unsigned long index = xas->xa_index; /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) @@ -732,7 +733,7 @@ static void *dax_insert_entry(struct xa_state *xas, dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); } - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -1031,7 +1032,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, vm_fault_t ret; *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_ZERO_PAGE, false); + DAX_ZERO_PAGE, false, false); ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); trace_dax_load_hole(inode, vmf, ret); @@ -1408,7 +1409,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, goto error_finish_iomap; entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, - 0, write && !sync); + 0, write && !sync, + (iomap.flags & IOMAP_F_COW) != 0); /* * If we are doing synchronous page fault and inode needs fsync, @@ -1487,7 +1489,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, pfn = page_to_pfn_t(zero_page); *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_PMD | DAX_ZERO_PAGE, false); + DAX_PMD | DAX_ZERO_PAGE, false, false); ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { @@ -1610,7 +1612,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, goto finish_iomap; entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, - DAX_PMD, write && !sync); + DAX_PMD, write && !sync, + false); /* * If we are doing synchronous page fault and inode needs fsync, -- 2.16.4