Re: [PATCH 1/3] fs: Introduce buffered_write_operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 01, 2024 at 05:42:46AM +0100, Christoph Hellwig wrote:
> On Thu, Feb 01, 2024 at 04:36:09AM +0000, Matthew Wilcox wrote:
> > +static struct folio *iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> > +               size_t len)
> > 
> > with corresponding changes.  Again, ends up looking slightly cleaner.
> 
> iomap also really needs some tweaks to the naming in the area as a
> __foo function calling foo as the default is horrible.  I'll take a
> look at your patches and can add that on top.
> 
> > f2fs also doesn't seem terribly objectional; passing rpages between
> > begin & end.
> > 
> > ocfs2 is passing a ocfs2_write_ctxt between the two.
> 
> Well, it might be the intended purpose, but as-is it is horribly
> inefficient - they need to do a dynamic allocation for every
> page they iterate over.  So all of them are candidates for an
> iterator model that does this allocation once per write.

Oh, I see what you mean.  What I could do is pass in the fsdata,
like this.

commit 753c7d2d62e1
Author: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
Date:   Mon Jan 29 23:20:34 2024 -0500

    fs: Introduce buffered_write_operations
    
    Start the process of moving write_begin and write_end out from the
    address_space_operations to their own struct.
    
    The new write_begin returns the folio or an ERR_PTR instead of returning
    the folio by reference.  It also accepts len as a size_t and (as
    documented) the len may be larger than PAGE_SIZE.
    
    Pass an optional buffered_write_operations pointer to various functions
    in filemap.c.  The old names are available as macros for now, except
    for generic_file_write_iter() which is used as a function pointer by
    many filesystems.  If using the new functions, the filesystem can have
    per-operation fsdata instead of per-page fsdata.
    
    Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..a79c7f15ca9f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -18,6 +18,27 @@
 
 struct folio_batch;
 
+struct buffered_write_operations {
+	struct folio *(*write_begin)(struct file *, struct address_space *,
+			loff_t pos, size_t len, void **fsdata);
+	size_t (*write_end)(struct file *, struct address_space *,
+			loff_t pos, size_t len, size_t copied,
+			struct folio *folio, void **fsdata);
+};
+
+ssize_t filemap_write_iter(struct kiocb *, struct iov_iter *,
+		const struct buffered_write_operations *, void *fsdata);
+ssize_t __filemap_write_iter(struct kiocb *, struct iov_iter *,
+		const struct buffered_write_operations *, void *fsdata);
+ssize_t filemap_perform_write(struct kiocb *, struct iov_iter *,
+		const struct buffered_write_operations *, void *fsdata);
+
+ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+#define generic_perform_write(kiocb, iter)		\
+	filemap_perform_write(kiocb, iter, NULL, NULL)
+#define __generic_file_write_iter(kiocb, iter)		\
+	__filemap_write_iter(kiocb, iter, NULL, NULL)
+
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db..214266aeaca5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -95,7 +95,7 @@
  *    ->invalidate_lock		(filemap_fault)
  *      ->lock_page		(filemap_fault, access_process_vm)
  *
- *  ->i_rwsem			(generic_perform_write)
+ *  ->i_rwsem			(filemap_perform_write)
  *    ->mmap_lock		(fault_in_readable->do_page_fault)
  *
  *  bdi->wb.list_lock
@@ -3890,7 +3890,8 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL(generic_file_direct_write);
 
-ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
+ssize_t filemap_perform_write(struct kiocb *iocb, struct iov_iter *i,
+		const struct buffered_write_operations *ops, void *fsdata)
 {
 	struct file *file = iocb->ki_filp;
 	loff_t pos = iocb->ki_pos;
@@ -3900,11 +3901,10 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
 	ssize_t written = 0;
 
 	do {
-		struct page *page;
-		unsigned long offset;	/* Offset into pagecache page */
-		unsigned long bytes;	/* Bytes to write to page */
+		struct folio *folio;
+		size_t offset;		/* Offset into pagecache folio */
+		size_t bytes;		/* Bytes to write to page */
 		size_t copied;		/* Bytes copied from user */
-		void *fsdata = NULL;
 
 		offset = (pos & (PAGE_SIZE - 1));
 		bytes = min_t(unsigned long, PAGE_SIZE - offset,
@@ -3927,19 +3927,33 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
 			break;
 		}
 
-		status = a_ops->write_begin(file, mapping, pos, bytes,
+		if (ops) {
+			folio = ops->write_begin(file, mapping, pos, bytes, &fsdata);
+			if (IS_ERR(folio)) {
+				status = PTR_ERR(folio);
+				break;
+			}
+		} else {
+			struct page *page;
+			status = a_ops->write_begin(file, mapping, pos, bytes,
 						&page, &fsdata);
-		if (unlikely(status < 0))
-			break;
+			if (unlikely(status < 0))
+				break;
+			folio = page_folio(page);
+		}
 
 		if (mapping_writably_mapped(mapping))
-			flush_dcache_page(page);
+			flush_dcache_folio(folio);
 
-		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
-		flush_dcache_page(page);
+		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
+		flush_dcache_folio(folio);
 
-		status = a_ops->write_end(file, mapping, pos, bytes, copied,
-						page, fsdata);
+		if (ops)
+			status = ops->write_end(file, mapping, pos, bytes,
+					copied, folio, &fsdata);
+		else
+			status = a_ops->write_end(file, mapping, pos, bytes,
+					copied, &folio->page, fsdata);
 		if (unlikely(status != copied)) {
 			iov_iter_revert(i, copied - max(status, 0L));
 			if (unlikely(status < 0))
@@ -3969,12 +3983,13 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
 	iocb->ki_pos += written;
 	return written;
 }
-EXPORT_SYMBOL(generic_perform_write);
+EXPORT_SYMBOL(filemap_perform_write);
 
 /**
- * __generic_file_write_iter - write data to a file
- * @iocb:	IO state structure (file, offset, etc.)
- * @from:	iov_iter with data to write
+ * __filemap_write_iter - write data to a file
+ * @iocb: IO state structure (file, offset, etc.)
+ * @from: iov_iter with data to write
+ * @ops: How to inform the filesystem that a write is starting/finishing.
  *
  * This function does all the work needed for actually writing data to a
  * file. It does all basic checks, removes SUID from the file, updates
@@ -3992,7 +4007,8 @@ EXPORT_SYMBOL(generic_perform_write);
  * * number of bytes written, even for truncated writes
  * * negative error code if no data has been written at all
  */
-ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ssize_t __filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
+		const struct buffered_write_operations *ops, void *fsdata)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -4019,27 +4035,29 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
 			return ret;
 		return direct_write_fallback(iocb, from, ret,
-				generic_perform_write(iocb, from));
+				filemap_perform_write(iocb, from, ops, fsdata));
 	}
 
-	return generic_perform_write(iocb, from);
+	return filemap_perform_write(iocb, from, ops, fsdata);
 }
-EXPORT_SYMBOL(__generic_file_write_iter);
+EXPORT_SYMBOL(__filemap_write_iter);
 
 /**
- * generic_file_write_iter - write data to a file
+ * filemap_write_iter - write data to a file
  * @iocb:	IO state structure
  * @from:	iov_iter with data to write
  *
- * This is a wrapper around __generic_file_write_iter() to be used by most
+ * This is a wrapper around __filemap_write_iter() to be used by most
  * filesystems. It takes care of syncing the file in case of O_SYNC file
  * and acquires i_rwsem as needed.
+ *
  * Return:
- * * negative error code if no data has been written at all of
+ * * negative error code if no data has been written at all or if
  *   vfs_fsync_range() failed for a synchronous write
  * * number of bytes written, even for truncated writes
  */
-ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ssize_t filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
+		const struct buffered_write_operations *ops, void *fsdata)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -4048,13 +4066,18 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	inode_lock(inode);
 	ret = generic_write_checks(iocb, from);
 	if (ret > 0)
-		ret = __generic_file_write_iter(iocb, from);
+		ret = __filemap_write_iter(iocb, from, ops, fsdata);
 	inode_unlock(inode);
 
 	if (ret > 0)
 		ret = generic_write_sync(iocb, ret);
 	return ret;
 }
+
+ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	return filemap_write_iter(iocb, from, NULL, NULL);
+}
 EXPORT_SYMBOL(generic_file_write_iter);
 
 /**




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux