Before xfs can be sure that it is safe to truncate it needs to hold XFS_MMAP_LOCK_EXCL and flush any FL_ALLOCATED leases. Introduce xfs_break_allocated() modeled after xfs_break_layouts() for use in the file space deletion path. We also use a new address_space_operation for the fs/dax core to coordinate reaping these leases in the case where there is no active truncate process to reap them. Cc: Jan Kara <jack@xxxxxxx> Cc: Jeff Moyer <jmoyer@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Dave Chinner <david@xxxxxxxxxxxxx> Cc: "Darrick J. Wong" <darrick.wong@xxxxxxxxxx> Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx> Cc: Jeff Layton <jlayton@xxxxxxxxxxxxxxx> Cc: "J. Bruce Fields" <bfields@xxxxxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- fs/xfs/xfs_aops.c | 24 ++++++++++++++++++++ fs/xfs/xfs_file.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_inode.h | 1 + fs/xfs/xfs_ioctl.c | 7 ++---- 4 files changed, 86 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f18e5932aec4..00da08d0d6db 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1455,6 +1455,29 @@ xfs_vm_set_page_dirty( return newly_dirty; } +/* + * Reap any in-flight FL_ALLOCATE leases when the pages represented by + * that lease are no longer under dma. We hold XFS_MMAPLOCK_EXCL to + * synchronize with the file space deletion path that may be doing the + * same operation. + */ +static void +xfs_vm_dax_flush_dma( + struct inode *inode) +{ + uint iolock = XFS_MMAPLOCK_EXCL; + + /* + * try to catch cases where the inode dax mode was changed + * without first synchronizing leases + */ + WARN_ON_ONCE(!IS_DAX(inode)); + + xfs_ilock(XFS_I(inode), iolock); + xfs_break_allocated(inode, &iolock); + xfs_iunlock(XFS_I(inode), iolock); +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, @@ -1468,4 +1491,5 @@ const struct address_space_operations xfs_address_space_operations = { .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, + .dax_flush_dma = xfs_vm_dax_flush_dma, }; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c6780743f8ec..5bc72f1da301 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -40,6 +40,7 @@ #include "xfs_iomap.h" #include "xfs_reflink.h" +#include <linux/dax.h> #include <linux/dcache.h> #include <linux/falloc.h> #include <linux/pagevec.h> @@ -746,6 +747,39 @@ xfs_file_write_iter( return ret; } +/* + * DAX breaks the traditional truncate model that assumes in-flight DMA + * to a file-backed page can continue until the final put of the page + * regardless of that page's relationship to the file. In the case of + * DAX the page has 1:1 relationship with filesytem blocks. We need to + * hold off truncate while any DMA might be in-flight. This assumes that + * all DMA usage is transient, any non-transient usages of + * get_user_pages must be disallowed for DAX files. + * + * This also unlocks FL_LAYOUT leases. + */ +int +xfs_break_allocated( + struct inode *inode, + uint *iolock) +{ + struct xfs_inode *ip = XFS_I(inode); + int error; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL + | XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)); + + while ((error = break_allocated(inode, false) == -EWOULDBLOCK)) { + xfs_iunlock(ip, *iolock); + error = break_allocated(inode, true); + *iolock &= ~XFS_MMAPLOCK_SHARED|XFS_IOLOCK_SHARED; + *iolock |= XFS_MMAPLOCK_EXCL|XFS_IOLOCK_EXCL; + xfs_ilock(ip, *iolock); + } + + return error; +} + #define XFS_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ @@ -762,7 +796,7 @@ xfs_file_fallocate( struct xfs_inode *ip = XFS_I(inode); long error; enum xfs_prealloc_flags flags = 0; - uint iolock = XFS_IOLOCK_EXCL; + uint iolock = XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL; loff_t new_size = 0; bool do_file_insert = 0; @@ -772,13 +806,10 @@ xfs_file_fallocate( return -EOPNOTSUPP; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock); + error = xfs_break_allocated(inode, &iolock); if (error) goto out_unlock; - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - iolock |= XFS_MMAPLOCK_EXCL; - if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) @@ -1136,6 +1167,28 @@ xfs_file_mmap( return 0; } +/* + * Any manipulation of FL_ALLOCATED leases need to be coordinated with + * XFS_MMAPLOCK_EXCL to synchronize get_user_pages() + DMA vs truncate. + */ +static int +xfs_file_setlease( + struct file *filp, + long arg, + struct file_lock **flp, + void **priv) +{ + struct inode *inode = file_inode(filp); + struct xfs_inode *ip = XFS_I(inode); + uint iolock = XFS_MMAPLOCK_EXCL; + int error; + + xfs_ilock(ip, iolock); + error = generic_setlease(filp, arg, flp, priv); + xfs_iunlock(ip, iolock); + return error; +} + const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read_iter = xfs_file_read_iter, @@ -1154,6 +1207,7 @@ const struct file_operations xfs_file_operations = { .fallocate = xfs_file_fallocate, .clone_file_range = xfs_file_clone_range, .dedupe_file_range = xfs_file_dedupe_range, + .setlease = xfs_file_setlease, }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0ee453de239a..e0d421884fe4 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -445,6 +445,7 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count, bool *did_zero); +int xfs_break_allocated(struct inode *inode, uint *iolock); /* from xfs_iops.c */ extern void xfs_setup_inode(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index aa75389be8cf..5be60c74bede 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -612,7 +612,7 @@ xfs_ioc_space( struct xfs_inode *ip = XFS_I(inode); struct iattr iattr; enum xfs_prealloc_flags flags = 0; - uint iolock = XFS_IOLOCK_EXCL; + uint iolock = XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL; int error; /* @@ -642,13 +642,10 @@ xfs_ioc_space( return error; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock); + error = xfs_break_allocated(inode, &iolock); if (error) goto out_unlock; - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - iolock |= XFS_MMAPLOCK_EXCL; - switch (bf->l_whence) { case 0: /*SEEK_SET*/ break;