On 11/21/24 5:40 PM, Alistair Popple wrote:
Prior to freeing a block file systems supporting FS DAX must check
that the associated pages are both unmapped from user-space and not
undergoing DMA or other access from eg. get_user_pages(). This is
achieved by unmapping the file range and scanning the FS DAX
page-cache to see if any pages within the mapping have an elevated
refcount.
This is done using two functions - dax_layout_busy_page_range() which
returns a page to wait for the refcount to become idle on. Rather than
open-code this introduce a common implementation to both unmap and
wait for the page to become idle.
Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx>
---
fs/dax.c | 29 +++++++++++++++++++++++++++++
fs/ext4/inode.c | 10 +---------
fs/fuse/dax.c | 29 +++++------------------------
fs/xfs/xfs_inode.c | 23 +++++------------------
fs/xfs/xfs_inode.h | 2 +-
include/linux/dax.h | 7 +++++++
6 files changed, 48 insertions(+), 52 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index efc1d56..b1ad813 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -845,6 +845,35 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
return ret;
}
+static int wait_page_idle(struct page *page,
+ void (cb)(struct inode *),
+ struct inode *inode)
+{
+ return ___wait_var_event(page, page_ref_count(page) == 1,
+ TASK_INTERRUPTIBLE, 0, 0, cb(inode));
+}
+
+/*
+ * Unmaps the inode and waits for any DMA to complete prior to deleting the
+ * DAX mapping entries for the range.
+ */
+int dax_break_mapping(struct inode *inode, loff_t start, loff_t end,
+ void (cb)(struct inode *))
+{
+ struct page *page;
+ int error;
+
+ do {
+ page = dax_layout_busy_page_range(inode->i_mapping, start, end);
+ if (!page)
+ break;
+
+ error = wait_page_idle(page, cb, inode);
+ } while (error == 0);
+
+ return error;
+}
Hi Alistair!
This needs to be EXPORT'ed. In fact so do two others, but I thought I'd
reply at the exact point that the first fix needs to be inserted, which
is here. And let you sprinkle the remaining two into the right patches.
The overall diff required for me to build the kernel with this series is:
diff --git a/fs/dax.c b/fs/dax.c
index 0169b356b975..35e3c41eb517 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -921,6 +921,7 @@ void dax_delete_mapping_range(struct address_space *mapping,
}
xas_unlock_irq(&xas);
}
+EXPORT_SYMBOL_GPL(dax_delete_mapping_range);
static int wait_page_idle(struct page *page,
void (cb)(struct inode *),
@@ -961,6 +962,7 @@ int dax_break_mapping(struct inode *inode, loff_t start, loff_t end,
return error;
}
+EXPORT_SYMBOL_GPL(dax_break_mapping);
void dax_break_mapping_uninterruptible(struct inode *inode,
void (cb)(struct inode *))
@@ -979,6 +981,7 @@ void dax_break_mapping_uninterruptible(struct inode *inode,
if (!page)
dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
}
+EXPORT_SYMBOL_GPL(dax_break_mapping_uninterruptible);
/*
* Invalidate DAX entry if it is clean.
thanks,
John Hubbard
+
/*
* Invalidate DAX entry if it is clean.
*/
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cf87c5b..d42c011 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3885,15 +3885,7 @@ int ext4_break_layouts(struct inode *inode)
if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
return -EINVAL;
- do {
- page = dax_layout_busy_page(inode->i_mapping);
- if (!page)
- return 0;
-
- error = dax_wait_page_idle(page, ext4_wait_dax_page, inode);
- } while (error == 0);
-
- return error;
+ return dax_break_mapping_inode(inode, ext4_wait_dax_page);
}
/*
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index af436b5..2493f9c 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -665,38 +665,19 @@ static void fuse_wait_dax_page(struct inode *inode)
filemap_invalidate_lock(inode->i_mapping);
}
-/* Should be called with mapping->invalidate_lock held exclusively */
-static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
- loff_t start, loff_t end)
-{
- struct page *page;
-
- page = dax_layout_busy_page_range(inode->i_mapping, start, end);
- if (!page)
- return 0;
-
- *retry = true;
- return dax_wait_page_idle(page, fuse_wait_dax_page, inode);
-}
-
-/* dmap_end == 0 leads to unmapping of whole file */
+/* Should be called with mapping->invalidate_lock held exclusively.
+ * dmap_end == 0 leads to unmapping of whole file.
+ */
int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
u64 dmap_end)
{
- bool retry;
- int ret;
-
- do {
- retry = false;
- ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
- dmap_end);
- } while (ret == 0 && retry);
if (!dmap_end) {
dmap_start = 0;
dmap_end = LLONG_MAX;
}
- return ret;
+ return dax_break_mapping(inode, dmap_start, dmap_end,
+ fuse_wait_dax_page);
}
ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index eb12123..120597a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2704,21 +2704,17 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
struct xfs_inode *ip2)
{
int error;
- bool retry;
struct page *page;
if (ip1->i_ino > ip2->i_ino)
swap(ip1, ip2);
again:
- retry = false;
/* Lock the first inode */
xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
- error = xfs_break_dax_layouts(VFS_I(ip1), &retry);
- if (error || retry) {
+ error = xfs_break_dax_layouts(VFS_I(ip1));
+ if (error) {
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
- if (error == 0 && retry)
- goto again;
return error;
}
@@ -2977,19 +2973,11 @@ xfs_wait_dax_page(
int
xfs_break_dax_layouts(
- struct inode *inode,
- bool *retry)
+ struct inode *inode)
{
- struct page *page;
-
xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL);
- page = dax_layout_busy_page(inode->i_mapping);
- if (!page)
- return 0;
-
- *retry = true;
- return dax_wait_page_idle(page, xfs_wait_dax_page, inode);
+ return dax_break_mapping_inode(inode, xfs_wait_dax_page);
}
int
@@ -3007,8 +2995,7 @@ xfs_break_layouts(
retry = false;
switch (reason) {
case BREAK_UNMAP:
- error = xfs_break_dax_layouts(inode, &retry);
- if (error || retry)
+ if (xfs_break_dax_layouts(inode))
break;
fallthrough;
case BREAK_WRITE:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 97ed912..0db27ba 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -564,7 +564,7 @@ xfs_itruncate_extents(
return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0);
}
-int xfs_break_dax_layouts(struct inode *inode, bool *retry);
+int xfs_break_dax_layouts(struct inode *inode);
int xfs_break_layouts(struct inode *inode, uint *iolock,
enum layout_break_reason reason);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 773dfc4..7419c88 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -257,6 +257,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
+int __must_check dax_break_mapping(struct inode *inode, loff_t start,
+ loff_t end, void (cb)(struct inode *));
+static inline int __must_check dax_break_mapping_inode(struct inode *inode,
+ void (cb)(struct inode *))
+{
+ return dax_break_mapping(inode, 0, LLONG_MAX, cb);
+}
int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
struct inode *dest, loff_t destoff,
loff_t len, bool *is_same,