[PATCH v3] exfat: move extend valid_size into ->page_mkwrite()

"Yuezhang.Mo@xxxxxxxx" <Yuezhang.Mo@xxxxxxxx> · Mon, 5 Aug 2024 10:48:33 +0000

It is not a good way to extend valid_size to the end of the
mmap area by writing zeros in mmap. Because after calling mmap,
no data may be written, or only a small amount of data may be
written to the head of the mmap area.

This commit moves extending valid_size to exfat_page_mkwrite().
In exfat_page_mkwrite() only extend valid_size to the starting
position of new data writing, which reduces unnecessary writing
of zeros.

If the block is not mapped and is marked as new after being
mapped for writing, block_write_begin() will zero the page
cache corresponding to the block, so there is no need to call
zero_user_segment() in exfat_file_zeroed_range(). And after moving
extending valid_size to exfat_page_mkwrite(), the data written by
mmap will be copied to the page cache but the page cache may be
not mapped to the disk. Calling zero_user_segment() will cause
the data written by mmap to be cleared. So this commit removes
calling zero_user_segment() from exfat_file_zeroed_range() and
renames exfat_file_zeroed_range() to exfat_extend_valid_size().

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@xxxxxxxx>
---

Changes for v2:
  - Remove a unnecessary check from exfat_file_mmap()

Changes for v3:
  - Fix the potential deadlock
  - Change to use ->valid_size to determine whether
    exfat_block_page_mkwrite() needs to be called

 fs/exfat/exfat_fs.h |  1 +
 fs/exfat/file.c     | 92 ++++++++++++++++++++++++++++++++-------------
 fs/exfat/inode.c    |  5 +++
 3 files changed, 72 insertions(+), 26 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index ecc5db952deb..1d207eee3197 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -516,6 +516,7 @@ int __exfat_write_inode(struct inode *inode, int sync);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
 int exfat_block_truncate_page(struct inode *inode, loff_t from);
+int exfat_block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* exfat/nls.c */
 unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 64c31867bc76..781b4d4dbda1 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -526,32 +526,32 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	return blkdev_issue_flush(inode->i_sb->s_bdev);
 }
 
-static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
+static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size)
 {
 	int err;
+	loff_t pos;
 	struct inode *inode = file_inode(file);
+	struct exfat_inode_info *ei = EXFAT_I(inode);
 	struct address_space *mapping = inode->i_mapping;
 	const struct address_space_operations *ops = mapping->a_ops;
 
-	while (start < end) {
-		u32 zerofrom, len;
+	pos = ei->valid_size;
+	while (pos < new_valid_size) {
+		u32 len;
 		struct page *page = NULL;
 
-		zerofrom = start & (PAGE_SIZE - 1);
-		len = PAGE_SIZE - zerofrom;
-		if (start + len > end)
-			len = end - start;
+		len = PAGE_SIZE - (pos & (PAGE_SIZE - 1));
+		if (pos + len > new_valid_size)
+			len = new_valid_size - pos;
 
-		err = ops->write_begin(file, mapping, start, len, &page, NULL);
+		err = ops->write_begin(file, mapping, pos, len, &page, NULL);
 		if (err)
 			goto out;
 
-		zero_user_segment(page, zerofrom, zerofrom + len);
-
-		err = ops->write_end(file, mapping, start, len, len, page, NULL);
+		err = ops->write_end(file, mapping, pos, len, len, page, NULL);
 		if (err < 0)
 			goto out;
-		start += len;
+		pos += len;
 
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
@@ -579,7 +579,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		goto unlock;
 
 	if (pos > valid_size) {
-		ret = exfat_file_zeroed_range(file, valid_size, pos);
+		ret = exfat_extend_valid_size(file, pos);
 		if (ret < 0 && ret != -ENOSPC) {
 			exfat_err(inode->i_sb,
 				"write: fail to zero from %llu to %llu(%zd)",
@@ -613,26 +613,66 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	return ret;
 }
 
-static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
 {
-	int ret;
+	int err;
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
-	loff_t start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-	loff_t end = min_t(loff_t, i_size_read(inode),
-			start + vma->vm_end - vma->vm_start);
+	struct folio *folio = page_folio(vmf->page);
+	vm_fault_t ret = VM_FAULT_LOCKED;
 
-	if ((vma->vm_flags & VM_WRITE) && ei->valid_size < end) {
-		ret = exfat_file_zeroed_range(file, ei->valid_size, end);
-		if (ret < 0) {
-			exfat_err(inode->i_sb,
-				  "mmap: fail to zero from %llu to %llu(%d)",
-				  start, end, ret);
-			return ret;
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(file);
+
+	if (ei->valid_size < folio_pos(folio) + folio_size(folio)) {
+		if (!inode_trylock(inode)) {
+			ret = VM_FAULT_RETRY;
+			goto out;
 		}
+
+		if (ei->valid_size < folio_pos(folio)) {
+			err = exfat_extend_valid_size(file, folio_pos(folio));
+			if (err < 0) {
+				ret = vmf_fs_error(err);
+				inode_unlock(inode);
+				goto out;
+			}
+		}
+
+		err = exfat_block_page_mkwrite(vma, vmf);
+		inode_unlock(inode);
+		if (err)
+			ret = vmf_fs_error(err);
+	} else {
+		folio_lock(folio);
+		if (folio->mapping != file->f_mapping) {
+			folio_unlock(folio);
+			ret = VM_FAULT_NOPAGE;
+			goto out;
+		}
+
+		folio_mark_dirty(folio);
+		folio_wait_stable(folio);
 	}
 
-	return generic_file_mmap(file, vma);
+out:
+	sb_end_pagefault(inode->i_sb);
+	return ret;
+}
+
+static const struct vm_operations_struct exfat_file_vm_ops = {
+	.fault		= filemap_fault,
+	.map_pages	= filemap_map_pages,
+	.page_mkwrite	= exfat_page_mkwrite,
+};
+
+static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	file_accessed(file);
+	vma->vm_ops = &exfat_file_vm_ops;
+	return 0;
 }
 
 const struct file_operations exfat_file_operations = {
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index dd894e558c91..804de7496a7f 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -564,6 +564,11 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from)
 	return block_truncate_page(inode->i_mapping, from, exfat_get_block);
 }
 
+int exfat_block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return block_page_mkwrite(vma, vmf, exfat_get_block);
+}
+
 static const struct address_space_operations exfat_aops = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
-- 
2.34.1