Re: [PATCH 1/2] filemap: Convert generic_perform_write() to support large folios

Shaun Tancheff <shaun.tancheff@xxxxxxx> · Tue, 11 Jun 2024 17:47:12 +0700

On 5/27/24 23:36, Christoph Hellwig wrote:

From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx>

Modelled after the loop in iomap_write_iter(), copy larger chunks from
userspace if the filesystem has created large folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
[hch: use mapping_max_folio_size to keep supporting file systems that do
  not support large folios]
Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
  mm/filemap.c | 40 +++++++++++++++++++++++++---------------
  1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 382c3d06bfb10c..860728e26ccf32 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3981,21 +3981,24 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
  	loff_t pos = iocb->ki_pos;
  	struct address_space *mapping = file->f_mapping;
  	const struct address_space_operations *a_ops = mapping->a_ops;
+	size_t chunk = mapping_max_folio_size(mapping);

Better to default chunk to PAGE_SIZE for backward compat
+       size_t chunk = PAGE_SIZE;

  	long status = 0;
  	ssize_t written = 0;
  

Have fs opt in to large folio support:

+       if (mapping_large_folio_support(mapping))
+               chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;

  	do {
  		struct page *page;
-		unsigned long offset;	/* Offset into pagecache page */
-		unsigned long bytes;	/* Bytes to write to page */
+		struct folio *folio;
+		size_t offset;		/* Offset into folio */
+		size_t bytes;		/* Bytes to write to folio */
  		size_t copied;		/* Bytes copied from user */
  		void *fsdata = NULL;
  
-		offset = (pos & (PAGE_SIZE - 1));
-		bytes = min_t(unsigned long, PAGE_SIZE - offset,
-						iov_iter_count(i));
+		bytes = iov_iter_count(i);
+retry:
+		offset = pos & (chunk - 1);
+		bytes = min(chunk - offset, bytes);
+		balance_dirty_pages_ratelimited(mapping);
  
-again:
  		/*
  		 * Bring in the user page that we will copy from _first_.
  		 * Otherwise there's a nasty deadlock on copying from the
@@ -4017,11 +4020,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
  		if (unlikely(status < 0))
  			break;
  
+		folio = page_folio(page);
+		offset = offset_in_folio(folio, pos);
+		if (bytes > folio_size(folio) - offset)
+			bytes = folio_size(folio) - offset;
+
  		if (mapping_writably_mapped(mapping))
-			flush_dcache_page(page);
+			flush_dcache_folio(folio);
  
-		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
-		flush_dcache_page(page);
+		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
+		flush_dcache_folio(folio);
  
  		status = a_ops->write_end(file, mapping, pos, bytes, copied,
  						page, fsdata);
@@ -4039,14 +4047,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
  			 * halfway through, might be a race with munmap,
  			 * might be severe memory pressure.
  			 */
-			if (copied)
+			if (chunk > PAGE_SIZE)
+				chunk /= 2;
+			if (copied) {
  				bytes = copied;
-			goto again;
+				goto retry;
+			}
+		} else {
+			pos += status;
+			written += status;
  		}
-		pos += status;
-		written += status;
-
-		balance_dirty_pages_ratelimited(mapping);
  	} while (iov_iter_count(i));
  
  	if (!written)

Tested with Lustre with large folios and kernel 6.6 with this patch (and suggested changes).

Tested-by: Shaun Tancheff <shaun.tancheff@xxxxxxx>