In preparation for moving DAX pages to be 0-based rather than 1-based for the idle refcount, the fsdax core wants to have all mappings in a "zapped" state before truncate. For typical pages this happens naturally via unmap_mapping_range(), for DAX pages some help is needed to record this state in the 'struct address_space' of the inode(s) where the page is mapped. That "zapped" state is recorded in DAX entries as a side effect of xfs_break_layouts(). Arrange for it to be called before all truncation events which already happens for truncate() and PUNCH_HOLE, but not truncate_inode_pages_final(). Arrange for xfs_break_layouts() before truncate_inode_pages_final(). Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: "Darrick J. Wong" <djwong@xxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: John Hubbard <jhubbard@xxxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- fs/xfs/xfs_file.c | 13 +++++++++---- fs/xfs/xfs_inode.c | 3 ++- fs/xfs/xfs_inode.h | 6 ++++-- fs/xfs/xfs_super.c | 22 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 556e28d06788..d3ff692d5546 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -816,7 +816,8 @@ xfs_wait_dax_page( int xfs_break_dax_layouts( struct inode *inode, - bool *retry) + bool *retry, + int state) { struct page *page; @@ -827,8 +828,8 @@ xfs_break_dax_layouts( return 0; *retry = true; - return ___wait_var_event(page, dax_page_idle(page), TASK_INTERRUPTIBLE, - 0, 0, xfs_wait_dax_page(inode)); + return ___wait_var_event(page, dax_page_idle(page), state, 0, 0, + xfs_wait_dax_page(inode)); } int @@ -839,14 +840,18 @@ xfs_break_layouts( { bool retry; int error; + int state = TASK_INTERRUPTIBLE; ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); do { retry = false; switch (reason) { + case BREAK_UNMAP_FINAL: + state = TASK_UNINTERRUPTIBLE; + fallthrough; case BREAK_UNMAP: - error = xfs_break_dax_layouts(inode, &retry); + error = xfs_break_dax_layouts(inode, &retry, state); if (error || retry) break; fallthrough; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 28493c8e9bb2..72ce1cb72736 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3452,6 +3452,7 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( struct xfs_inode *ip1, struct xfs_inode *ip2) { + int state = TASK_INTERRUPTIBLE; int error; bool retry; struct page *page; @@ -3463,7 +3464,7 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( retry = false; /* Lock the first inode */ xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); - error = xfs_break_dax_layouts(VFS_I(ip1), &retry); + error = xfs_break_dax_layouts(VFS_I(ip1), &retry, state); if (error || retry) { xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); if (error == 0 && retry) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fa780f08dc89..e4994eb6e521 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -454,11 +454,13 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) * layout-holder has a consistent view of the file's extent map. While * BREAK_WRITE breaks can be satisfied by recalling FL_LAYOUT leases, * BREAK_UNMAP breaks additionally require waiting for busy dax-pages to - * go idle. + * go idle. BREAK_UNMAP_FINAL is an uninterruptible version of + * BREAK_UNMAP. */ enum layout_break_reason { BREAK_WRITE, BREAK_UNMAP, + BREAK_UNMAP_FINAL, }; /* @@ -531,7 +533,7 @@ xfs_itruncate_extents( } /* from xfs_file.c */ -int xfs_break_dax_layouts(struct inode *inode, bool *retry); +int xfs_break_dax_layouts(struct inode *inode, bool *retry, int state); int xfs_break_layouts(struct inode *inode, uint *iolock, enum layout_break_reason reason); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 9ac59814bbb6..ebb4a6eba3fc 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -725,6 +725,27 @@ xfs_fs_drop_inode( return generic_drop_inode(inode); } +STATIC void +xfs_fs_evict_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + long error; + + xfs_ilock(ip, iolock); + + error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP_FINAL); + + /* The final layout break is uninterruptible */ + ASSERT_ALWAYS(!error); + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); + + xfs_iunlock(ip, iolock); +} + static void xfs_mount_free( struct xfs_mount *mp) @@ -1144,6 +1165,7 @@ static const struct super_operations xfs_super_operations = { .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .drop_inode = xfs_fs_drop_inode, + .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze,