[PATCH v1] exfat: move extend valid_size into ->page_mkwrite()

"Yuezhang.Mo@xxxxxxxx" <Yuezhang.Mo@xxxxxxxx> · Wed, 3 Apr 2024 07:34:08 +0000

It is not a good way to extend valid_size to the end of the
mmap area by writing zeros in mmap. Because after calling mmap,
no data may be written, or only a small amount of data may be
written to the head of the mmap area.

This commit moves extending valid_size to exfat_page_mkwrite().
In exfat_page_mkwrite() only extend valid_size to the starting
position of new data writing, which reduces unnecessary writing
of zeros.

If the block is not mapped and is marked as new after being
mapped for writing, block_write_begin() will zero the page
cache corresponding to the block, so there is no need to call
zero_user_segment() in exfat_file_zeroed_range(). And after moving
extending valid_size to exfat_page_mkwrite(), the data written by
mmap will be copied to the page cache but the page cache may be
not mapped to the disk. Calling zero_user_segment() will cause
the data written by mmap to be cleared. So this commit removes
calling zero_user_segment() from exfat_file_zeroed_range() and
renames exfat_file_zeroed_range() to exfat_extend_valid_size().

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@xxxxxxxx>
---
 fs/exfat/exfat_fs.h |   1 +
 fs/exfat/file.c     | 101 +++++++++++++++++++++++++++++++++++---------
 fs/exfat/inode.c    |   5 +++
 3 files changed, 87 insertions(+), 20 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index ecc5db952deb..1d207eee3197 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -516,6 +516,7 @@ int __exfat_write_inode(struct inode *inode, int sync);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
 int exfat_block_truncate_page(struct inode *inode, loff_t from);
+int exfat_block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* exfat/nls.c */
 unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index cc00f1a7a1e1..1a8af4860acf 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -523,7 +523,7 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	return blkdev_issue_flush(inode->i_sb->s_bdev);
 }
 
-static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
+static int exfat_extend_valid_size(struct file *file, loff_t start, loff_t end)
 {
 	int err;
 	struct inode *inode = file_inode(file);
@@ -531,11 +531,10 @@ static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
 	const struct address_space_operations *ops = mapping->a_ops;
 
 	while (start < end) {
-		u32 zerofrom, len;
+		u32 len;
 		struct page *page = NULL;
 
-		zerofrom = start & (PAGE_SIZE - 1);
-		len = PAGE_SIZE - zerofrom;
+		len = PAGE_SIZE - (start & (PAGE_SIZE - 1));
 		if (start + len > end)
 			len = end - start;
 
@@ -543,8 +542,6 @@ static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
 		if (err)
 			goto out;
 
-		zero_user_segment(page, zerofrom, zerofrom + len);
-
 		err = ops->write_end(file, mapping, start, len, len, page, NULL);
 		if (err < 0)
 			goto out;
@@ -576,7 +573,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		goto unlock;
 
 	if (pos > valid_size) {
-		ret = exfat_file_zeroed_range(file, valid_size, pos);
+		ret = exfat_extend_valid_size(file, valid_size, pos);
 		if (ret < 0 && ret != -ENOSPC) {
 			exfat_err(inode->i_sb,
 				"write: fail to zero from %llu to %llu(%zd)",
@@ -610,26 +607,90 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	return ret;
 }
 
-static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
 {
-	int ret;
+	int err;
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
-	loff_t start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-	loff_t end = min_t(loff_t, i_size_read(inode),
-			start + vma->vm_end - vma->vm_start);
+	struct folio *folio = page_folio(vmf->page);
+	vm_fault_t ret = VM_FAULT_LOCKED;
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(file);
+	folio_lock(folio);
+	if (folio->mapping != file->f_mapping) {
+		folio_unlock(folio);
+		ret = VM_FAULT_NOPAGE;
+		goto out;
+	}
 
-	if ((vma->vm_flags & VM_WRITE) && ei->valid_size < end) {
-		ret = exfat_file_zeroed_range(file, ei->valid_size, end);
-		if (ret < 0) {
-			exfat_err(inode->i_sb,
-				  "mmap: fail to zero from %llu to %llu(%d)",
-				  start, end, ret);
-			return ret;
+	if (ei->valid_size < folio_pos(folio)) {
+		inode_lock(inode);
+		err = exfat_extend_valid_size(file, ei->valid_size, folio_pos(folio));
+		inode_unlock(inode);
+		if (err < 0) {
+			ret = vmf_fs_error(err);
+			goto out;
 		}
 	}
 
-	return generic_file_mmap(file, vma);
+	/*
+	 * check if the folio is mapped already (Whether ei->valid_size
+	 * has been extended to folio_pos(folio)+folio_len(folio))
+	 */
+	if (!folio_test_mappedtodisk(folio)) {
+		struct buffer_head *head = folio_buffers(folio);
+
+		if (head) {
+			int fully_mapped = 1;
+			struct buffer_head *bh = head;
+
+			do {
+				if (!buffer_mapped(bh)) {
+					fully_mapped = 0;
+					break;
+				}
+			} while (bh = bh->b_this_page, bh != head);
+
+			if (fully_mapped)
+				folio_set_mappedtodisk(folio);
+		}
+	}
+
+	if (folio_test_mappedtodisk(folio)) {
+		folio_mark_dirty(folio);
+		folio_wait_stable(folio);
+		goto out;
+	}
+
+	folio_unlock(folio);
+
+	err = exfat_block_page_mkwrite(vma, vmf);
+	if (err)
+		ret = vmf_fs_error(err);
+
+out:
+	sb_end_pagefault(inode->i_sb);
+	return ret;
+}
+
+static const struct vm_operations_struct exfat_file_vm_ops = {
+	.fault		= filemap_fault,
+	.map_pages	= filemap_map_pages,
+	.page_mkwrite	= exfat_page_mkwrite,
+};
+
+static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct address_space *mapping = file->f_mapping;
+
+	if (!mapping->a_ops->read_folio)
+		return -ENOEXEC;
+	file_accessed(file);
+	vma->vm_ops = &exfat_file_vm_ops;
+	return 0;
 }
 
 const struct file_operations exfat_file_operations = {
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index dd894e558c91..804de7496a7f 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -564,6 +564,11 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from)
 	return block_truncate_page(inode->i_mapping, from, exfat_get_block);
 }
 
+int exfat_block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return block_page_mkwrite(vma, vmf, exfat_get_block);
+}
+
 static const struct address_space_operations exfat_aops = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
-- 
2.34.1

From bd935dc0a8c08090317a8fb03ca874731ec86039 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@xxxxxxxx>
Date: Fri, 8 Mar 2024 14:03:46 +0800
Subject: [PATCH v1] exfat: move extend valid_size into ->page_mkwrite()

It is not a good way to extend valid_size to the end of the
mmap area by writing zeros in mmap. Because after calling mmap,
no data may be written, or only a small amount of data may be
written to the head of the mmap area.

This commit moves extending valid_size to exfat_page_mkwrite().
In exfat_page_mkwrite() only extend valid_size to the starting
position of new data writing, which reduces unnecessary writing
of zeros.

If the block is not mapped and is marked as new after being
mapped for writing, block_write_begin() will zero the page
cache corresponding to the block, so there is no need to call
zero_user_segment() in exfat_file_zeroed_range(). And after moving
extending valid_size to exfat_page_mkwrite(), the data written by
mmap will be copied to the page cache but the page cache may be
not mapped to the disk. Calling zero_user_segment() will cause
the data written by mmap to be cleared. So this commit removes
calling zero_user_segment() from exfat_file_zeroed_range() and
renames exfat_file_zeroed_range() to exfat_extend_valid_size().

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@xxxxxxxx>
---
 fs/exfat/exfat_fs.h |   1 +
 fs/exfat/file.c     | 101 +++++++++++++++++++++++++++++++++++---------
 fs/exfat/inode.c    |   5 +++
 3 files changed, 87 insertions(+), 20 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index ecc5db952deb..1d207eee3197 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -516,6 +516,7 @@ int __exfat_write_inode(struct inode *inode, int sync);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
 int exfat_block_truncate_page(struct inode *inode, loff_t from);
+int exfat_block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* exfat/nls.c */
 unsigned short exfat_toupper(struct super_block *sb, unsigned short a);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index cc00f1a7a1e1..1a8af4860acf 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -523,7 +523,7 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	return blkdev_issue_flush(inode->i_sb->s_bdev);
 }
 
-static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
+static int exfat_extend_valid_size(struct file *file, loff_t start, loff_t end)
 {
 	int err;
 	struct inode *inode = file_inode(file);
@@ -531,11 +531,10 @@ static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
 	const struct address_space_operations *ops = mapping->a_ops;
 
 	while (start < end) {
-		u32 zerofrom, len;
+		u32 len;
 		struct page *page = NULL;
 
-		zerofrom = start & (PAGE_SIZE - 1);
-		len = PAGE_SIZE - zerofrom;
+		len = PAGE_SIZE - (start & (PAGE_SIZE - 1));
 		if (start + len > end)
 			len = end - start;
 
@@ -543,8 +542,6 @@ static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
 		if (err)
 			goto out;
 
-		zero_user_segment(page, zerofrom, zerofrom + len);
-
 		err = ops->write_end(file, mapping, start, len, len, page, NULL);
 		if (err < 0)
 			goto out;
@@ -576,7 +573,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		goto unlock;
 
 	if (pos > valid_size) {
-		ret = exfat_file_zeroed_range(file, valid_size, pos);
+		ret = exfat_extend_valid_size(file, valid_size, pos);
 		if (ret < 0 && ret != -ENOSPC) {
 			exfat_err(inode->i_sb,
 				"write: fail to zero from %llu to %llu(%zd)",
@@ -610,26 +607,90 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	return ret;
 }
 
-static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
 {
-	int ret;
+	int err;
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
-	loff_t start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-	loff_t end = min_t(loff_t, i_size_read(inode),
-			start + vma->vm_end - vma->vm_start);
+	struct folio *folio = page_folio(vmf->page);
+	vm_fault_t ret = VM_FAULT_LOCKED;
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(file);
+	folio_lock(folio);
+	if (folio->mapping != file->f_mapping) {
+		folio_unlock(folio);
+		ret = VM_FAULT_NOPAGE;
+		goto out;
+	}
 
-	if ((vma->vm_flags & VM_WRITE) && ei->valid_size < end) {
-		ret = exfat_file_zeroed_range(file, ei->valid_size, end);
-		if (ret < 0) {
-			exfat_err(inode->i_sb,
-				  "mmap: fail to zero from %llu to %llu(%d)",
-				  start, end, ret);
-			return ret;
+	if (ei->valid_size < folio_pos(folio)) {
+		inode_lock(inode);
+		err = exfat_extend_valid_size(file, ei->valid_size, folio_pos(folio));
+		inode_unlock(inode);
+		if (err < 0) {
+			ret = vmf_fs_error(err);
+			goto out;
 		}
 	}
 
-	return generic_file_mmap(file, vma);
+	/*
+	 * check if the folio is mapped already (Whether ei->valid_size
+	 * has been extended to folio_pos(folio)+folio_len(folio))
+	 */
+	if (!folio_test_mappedtodisk(folio)) {
+		struct buffer_head *head = folio_buffers(folio);
+
+		if (head) {
+			int fully_mapped = 1;
+			struct buffer_head *bh = head;
+
+			do {
+				if (!buffer_mapped(bh)) {
+					fully_mapped = 0;
+					break;
+				}
+			} while (bh = bh->b_this_page, bh != head);
+
+			if (fully_mapped)
+				folio_set_mappedtodisk(folio);
+		}
+	}
+
+	if (folio_test_mappedtodisk(folio)) {
+		folio_mark_dirty(folio);
+		folio_wait_stable(folio);
+		goto out;
+	}
+
+	folio_unlock(folio);
+
+	err = exfat_block_page_mkwrite(vma, vmf);
+	if (err)
+		ret = vmf_fs_error(err);
+
+out:
+	sb_end_pagefault(inode->i_sb);
+	return ret;
+}
+
+static const struct vm_operations_struct exfat_file_vm_ops = {
+	.fault		= filemap_fault,
+	.map_pages	= filemap_map_pages,
+	.page_mkwrite	= exfat_page_mkwrite,
+};
+
+static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct address_space *mapping = file->f_mapping;
+
+	if (!mapping->a_ops->read_folio)
+		return -ENOEXEC;
+	file_accessed(file);
+	vma->vm_ops = &exfat_file_vm_ops;
+	return 0;
 }
 
 const struct file_operations exfat_file_operations = {
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index dd894e558c91..804de7496a7f 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -564,6 +564,11 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from)
 	return block_truncate_page(inode->i_mapping, from, exfat_get_block);
 }
 
+int exfat_block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return block_page_mkwrite(vma, vmf, exfat_get_block);
+}
+
 static const struct address_space_operations exfat_aops = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
-- 
2.34.1