When xfs is operating as the back-end of a pNFS block server, it prevents collisions between local and remote operations by requiring a lease to be held for remotely accessed blocks. Local filesystem operations break those leases before writing or mutating the extent map of the file. A similar mechanism is needed to prevent operations on pinned dax mappings, like device-DMA, from colliding with extent unmap operations. XFS_BREAK_REMOTE and XFS_BREAK_MAPS are introduced as flags to control the layouts that need to be broken by xfs_break_layouts(). While XFS_BREAK_REMOTE is invoked in all calls to the new xfs_break_layouts(), XFS_BREAK_MAPS only needs to specified when extents may be unmapped, i.e. xfs_file_fallocate() and xfs_ioc_space(). XFS_BREAK_MAPS also imposes the additional locking constraint of breaking (awaiting) pinned dax mappings while holding XFS_MMAPLOCK_EXCL. There is a small functional change in this rework. For the cases where XFS_BREAK_MAPS is specified to xfs_break_layouts(), the XFS_MMAPLOCK_EXCL ilock is held over the break_layouts() loop in xfs_break_leased_layouts(). Cc: "Darrick J. Wong" <darrick.wong@xxxxxxxxxx> Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx> Reported-by: Dave Chinner <david@xxxxxxxxxxxxx> Reported-by: Christoph Hellwig <hch@xxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- fs/xfs/xfs_file.c | 32 ++++++++++++++++++++++++++------ fs/xfs/xfs_inode.h | 9 +++++++++ fs/xfs/xfs_ioctl.c | 9 +++------ fs/xfs/xfs_iops.c | 12 +++++++----- fs/xfs/xfs_pnfs.c | 8 +++----- fs/xfs/xfs_pnfs.h | 4 ++-- 6 files changed, 50 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 9ea08326f876..f914f0628dc2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -350,7 +350,7 @@ xfs_file_aio_write_checks( if (error <= 0) return error; - error = xfs_break_layouts(inode, iolock); + error = xfs_break_layouts(inode, iolock, XFS_BREAK_REMOTE); if (error) return error; @@ -752,6 +752,28 @@ xfs_file_write_iter( return ret; } +int +xfs_break_layouts( + struct inode *inode, + uint *iolock, + unsigned long flags) +{ + struct xfs_inode *ip = XFS_I(inode); + uint iolock_assert = 0; + int ret = 0; + + if (flags & XFS_BREAK_REMOTE) + iolock_assert |= XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL; + if (flags & XFS_BREAK_MAPS) + iolock_assert |= XFS_MMAPLOCK_EXCL; + + ASSERT(xfs_isilocked(ip, iolock_assert)); + + if (flags & XFS_BREAK_REMOTE) + ret = xfs_break_leased_layouts(inode, iolock); + return ret; +} + #define XFS_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ @@ -768,7 +790,7 @@ xfs_file_fallocate( struct xfs_inode *ip = XFS_I(inode); long error; enum xfs_prealloc_flags flags = 0; - uint iolock = XFS_IOLOCK_EXCL; + uint iolock = XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL; loff_t new_size = 0; bool do_file_insert = false; @@ -778,13 +800,11 @@ xfs_file_fallocate( return -EOPNOTSUPP; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock); + error = xfs_break_layouts(inode, &iolock, + XFS_BREAK_REMOTE | XFS_BREAK_MAPS); if (error) goto out_unlock; - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - iolock |= XFS_MMAPLOCK_EXCL; - if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3e8dc990d41c..9b73ceb09cb1 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -379,6 +379,12 @@ static inline void xfs_ifunlock(struct xfs_inode *ip) >> XFS_ILOCK_SHIFT) /* + * Flags for layout breaks + */ +#define XFS_BREAK_REMOTE (1<<0) /* break remote layout leases */ +#define XFS_BREAK_MAPS (1<<1) /* break local direct (dax) mappings */ + +/* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and * new subdirectory gets S_ISGID bit from parent. @@ -447,6 +453,9 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count, bool *did_zero); +int xfs_break_layouts(struct inode *inode, uint *iolock, + unsigned long flags); + /* from xfs_iops.c */ extern void xfs_setup_inode(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 89fb1eb80aae..31288d900c8a 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -39,7 +39,6 @@ #include "xfs_icache.h" #include "xfs_symlink.h" #include "xfs_trans.h" -#include "xfs_pnfs.h" #include "xfs_acl.h" #include "xfs_btree.h" #include <linux/fsmap.h> @@ -614,7 +613,7 @@ xfs_ioc_space( struct xfs_inode *ip = XFS_I(inode); struct iattr iattr; enum xfs_prealloc_flags flags = 0; - uint iolock = XFS_IOLOCK_EXCL; + uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; int error; /* @@ -644,13 +643,11 @@ xfs_ioc_space( return error; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock); + error = xfs_break_layouts(inode, &iolock, + XFS_BREAK_REMOTE | XFS_BREAK_MAPS); if (error) goto out_unlock; - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - iolock |= XFS_MMAPLOCK_EXCL; - switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 951e84df5576..15a1c7c874d9 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -37,7 +37,6 @@ #include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_trans_space.h" -#include "xfs_pnfs.h" #include "xfs_iomap.h" #include <linux/capability.h> @@ -1030,11 +1029,14 @@ xfs_vn_setattr( struct xfs_inode *ip = XFS_I(d_inode(dentry)); uint iolock = XFS_IOLOCK_EXCL; - error = xfs_break_layouts(d_inode(dentry), &iolock); - if (error) - return error; - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + error = xfs_break_layouts(d_inode(dentry), &iolock, + XFS_BREAK_REMOTE | XFS_BREAK_MAPS); + + if (error) { + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + return error; + } error = xfs_vn_setattr_size(dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index aa6c5c193f45..ed13c9baffff 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -31,15 +31,13 @@ * rules in the page fault path we don't bother. */ int -xfs_break_layouts( +xfs_break_leased_layouts( struct inode *inode, uint *iolock) { struct xfs_inode *ip = XFS_I(inode); int error; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); - while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { xfs_iunlock(ip, *iolock); error = break_layout(inode, true); @@ -120,8 +118,8 @@ xfs_fs_map_blocks( * Lock out any other I/O before we flush and invalidate the pagecache, * and then hand out a layout to the remote system. This is very * similar to direct I/O, except that the synchronization is much more - * complicated. See the comment near xfs_break_layouts for a detailed - * explanation. + * complicated. See the comment near xfs_break_leased_layouts + * for a detailed explanation. */ xfs_ilock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h index bf45951e28fe..12f46fe6d902 100644 --- a/fs/xfs/xfs_pnfs.h +++ b/fs/xfs/xfs_pnfs.h @@ -9,10 +9,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, struct iattr *iattr); -int xfs_break_layouts(struct inode *inode, uint *iolock); +int xfs_break_leased_layouts(struct inode *inode, uint *iolock); #else static inline int -xfs_break_layouts(struct inode *inode, uint *iolock) +xfs_break_leased_layouts(struct inode *inode, uint *iolock) { return 0; }