John Hubbard <jhubbard@xxxxxxxxxx> writes: > On 11/21/24 5:40 PM, Alistair Popple wrote: >> Prior to freeing a block file systems supporting FS DAX must check >> that the associated pages are both unmapped from user-space and not >> undergoing DMA or other access from eg. get_user_pages(). This is >> achieved by unmapping the file range and scanning the FS DAX >> page-cache to see if any pages within the mapping have an elevated >> refcount. >> This is done using two functions - dax_layout_busy_page_range() >> which >> returns a page to wait for the refcount to become idle on. Rather than >> open-code this introduce a common implementation to both unmap and >> wait for the page to become idle. >> Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx> >> --- >> fs/dax.c | 29 +++++++++++++++++++++++++++++ >> fs/ext4/inode.c | 10 +--------- >> fs/fuse/dax.c | 29 +++++------------------------ >> fs/xfs/xfs_inode.c | 23 +++++------------------ >> fs/xfs/xfs_inode.h | 2 +- >> include/linux/dax.h | 7 +++++++ >> 6 files changed, 48 insertions(+), 52 deletions(-) >> diff --git a/fs/dax.c b/fs/dax.c >> index efc1d56..b1ad813 100644 >> --- a/fs/dax.c >> +++ b/fs/dax.c >> @@ -845,6 +845,35 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) >> return ret; >> } >> +static int wait_page_idle(struct page *page, >> + void (cb)(struct inode *), >> + struct inode *inode) >> +{ >> + return ___wait_var_event(page, page_ref_count(page) == 1, >> + TASK_INTERRUPTIBLE, 0, 0, cb(inode)); >> +} >> + >> +/* >> + * Unmaps the inode and waits for any DMA to complete prior to deleting the >> + * DAX mapping entries for the range. >> + */ >> +int dax_break_mapping(struct inode *inode, loff_t start, loff_t end, >> + void (cb)(struct inode *)) >> +{ >> + struct page *page; >> + int error; >> + >> + do { >> + page = dax_layout_busy_page_range(inode->i_mapping, start, end); >> + if (!page) >> + break; >> + >> + error = wait_page_idle(page, cb, inode); >> + } while (error == 0); >> + >> + return error; >> +} > > Hi Alistair! > > This needs to be EXPORT'ed. In fact so do two others, but I thought I'd > reply at the exact point that the first fix needs to be inserted, which > is here. And let you sprinkle the remaining two into the right patches. Argh, thanks. I guess one of the kernel build bots will yell at me soon enough... :-) - Alistair > The overall diff required for me to build the kernel with this series is: > > diff --git a/fs/dax.c b/fs/dax.c > index 0169b356b975..35e3c41eb517 100644 > --- a/fs/dax.c > +++ b/fs/dax.c > @@ -921,6 +921,7 @@ void dax_delete_mapping_range(struct address_space *mapping, > } > xas_unlock_irq(&xas); > } > +EXPORT_SYMBOL_GPL(dax_delete_mapping_range); > static int wait_page_idle(struct page *page, > void (cb)(struct inode *), > @@ -961,6 +962,7 @@ int dax_break_mapping(struct inode *inode, loff_t start, loff_t end, > return error; > } > +EXPORT_SYMBOL_GPL(dax_break_mapping); > void dax_break_mapping_uninterruptible(struct inode *inode, > void (cb)(struct inode *)) > @@ -979,6 +981,7 @@ void dax_break_mapping_uninterruptible(struct inode *inode, > if (!page) > dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX); > } > +EXPORT_SYMBOL_GPL(dax_break_mapping_uninterruptible); > /* > * Invalidate DAX entry if it is clean. > > > thanks, > John Hubbard > > >> + >> /* >> * Invalidate DAX entry if it is clean. >> */ >> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c >> index cf87c5b..d42c011 100644 >> --- a/fs/ext4/inode.c >> +++ b/fs/ext4/inode.c >> @@ -3885,15 +3885,7 @@ int ext4_break_layouts(struct inode *inode) >> if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock))) >> return -EINVAL; >> - do { >> - page = dax_layout_busy_page(inode->i_mapping); >> - if (!page) >> - return 0; >> - >> - error = dax_wait_page_idle(page, ext4_wait_dax_page, inode); >> - } while (error == 0); >> - >> - return error; >> + return dax_break_mapping_inode(inode, ext4_wait_dax_page); >> } >> /* >> diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c >> index af436b5..2493f9c 100644 >> --- a/fs/fuse/dax.c >> +++ b/fs/fuse/dax.c >> @@ -665,38 +665,19 @@ static void fuse_wait_dax_page(struct inode *inode) >> filemap_invalidate_lock(inode->i_mapping); >> } >> -/* Should be called with mapping->invalidate_lock held >> exclusively */ >> -static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, >> - loff_t start, loff_t end) >> -{ >> - struct page *page; >> - >> - page = dax_layout_busy_page_range(inode->i_mapping, start, end); >> - if (!page) >> - return 0; >> - >> - *retry = true; >> - return dax_wait_page_idle(page, fuse_wait_dax_page, inode); >> -} >> - >> -/* dmap_end == 0 leads to unmapping of whole file */ >> +/* Should be called with mapping->invalidate_lock held exclusively. >> + * dmap_end == 0 leads to unmapping of whole file. >> + */ >> int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, >> u64 dmap_end) >> { >> - bool retry; >> - int ret; >> - >> - do { >> - retry = false; >> - ret = __fuse_dax_break_layouts(inode, &retry, dmap_start, >> - dmap_end); >> - } while (ret == 0 && retry); >> if (!dmap_end) { >> dmap_start = 0; >> dmap_end = LLONG_MAX; >> } >> - return ret; >> + return dax_break_mapping(inode, dmap_start, dmap_end, >> + fuse_wait_dax_page); >> } >> ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter >> *to) >> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c >> index eb12123..120597a 100644 >> --- a/fs/xfs/xfs_inode.c >> +++ b/fs/xfs/xfs_inode.c >> @@ -2704,21 +2704,17 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( >> struct xfs_inode *ip2) >> { >> int error; >> - bool retry; >> struct page *page; >> if (ip1->i_ino > ip2->i_ino) >> swap(ip1, ip2); >> again: >> - retry = false; >> /* Lock the first inode */ >> xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); >> - error = xfs_break_dax_layouts(VFS_I(ip1), &retry); >> - if (error || retry) { >> + error = xfs_break_dax_layouts(VFS_I(ip1)); >> + if (error) { >> xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); >> - if (error == 0 && retry) >> - goto again; >> return error; >> } >> @@ -2977,19 +2973,11 @@ xfs_wait_dax_page( >> int >> xfs_break_dax_layouts( >> - struct inode *inode, >> - bool *retry) >> + struct inode *inode) >> { >> - struct page *page; >> - >> xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL); >> - page = dax_layout_busy_page(inode->i_mapping); >> - if (!page) >> - return 0; >> - >> - *retry = true; >> - return dax_wait_page_idle(page, xfs_wait_dax_page, inode); >> + return dax_break_mapping_inode(inode, xfs_wait_dax_page); >> } >> int >> @@ -3007,8 +2995,7 @@ xfs_break_layouts( >> retry = false; >> switch (reason) { >> case BREAK_UNMAP: >> - error = xfs_break_dax_layouts(inode, &retry); >> - if (error || retry) >> + if (xfs_break_dax_layouts(inode)) >> break; >> fallthrough; >> case BREAK_WRITE: >> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h >> index 97ed912..0db27ba 100644 >> --- a/fs/xfs/xfs_inode.h >> +++ b/fs/xfs/xfs_inode.h >> @@ -564,7 +564,7 @@ xfs_itruncate_extents( >> return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0); >> } >> -int xfs_break_dax_layouts(struct inode *inode, bool >> *retry); >> +int xfs_break_dax_layouts(struct inode *inode); >> int xfs_break_layouts(struct inode *inode, uint *iolock, >> enum layout_break_reason reason); >> diff --git a/include/linux/dax.h b/include/linux/dax.h >> index 773dfc4..7419c88 100644 >> --- a/include/linux/dax.h >> +++ b/include/linux/dax.h >> @@ -257,6 +257,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, >> int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); >> int dax_invalidate_mapping_entry_sync(struct address_space *mapping, >> pgoff_t index); >> +int __must_check dax_break_mapping(struct inode *inode, loff_t start, >> + loff_t end, void (cb)(struct inode *)); >> +static inline int __must_check dax_break_mapping_inode(struct inode *inode, >> + void (cb)(struct inode *)) >> +{ >> + return dax_break_mapping(inode, 0, LLONG_MAX, cb); >> +} >> int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, >> struct inode *dest, loff_t destoff, >> loff_t len, bool *is_same,