[PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



If RWF_UNCACHED is set for io_uring (or pwritev2(2)), we'll drop the
cache instantiated for buffered writes. If new pages aren't
instantiated, we leave them alone. This provides similar semantics to
reads with RWF_UNCACHED set.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 include/linux/fs.h |  5 +++
 mm/filemap.c       | 85 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bf58db1bc032..7ea3dfdd9aa5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -285,6 +285,7 @@ enum positive_aop_returns {
 #define AOP_FLAG_NOFS			0x0002 /* used by filesystem to direct
 						* helper code (eg buffer layer)
 						* to clear GFP_FS from alloc */
+#define AOP_FLAG_UNCACHED		0x0004
 
 /*
  * oh the beauties of C type declarations.
@@ -3106,6 +3107,10 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *,
 				     struct kiocb *);
 
+struct pagevec;
+extern void write_drop_cached_pages(struct pagevec *pvec,
+				struct address_space *mapping);
+
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
diff --git a/mm/filemap.c b/mm/filemap.c
index fe37bd2b2630..2e36129ebe38 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3287,10 +3287,12 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 					pgoff_t index, unsigned flags)
 {
 	struct page *page;
-	int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
+	int fgp_flags = FGP_LOCK|FGP_WRITE;
 
 	if (flags & AOP_FLAG_NOFS)
 		fgp_flags |= FGP_NOFS;
+	if (!(flags & AOP_FLAG_UNCACHED))
+		fgp_flags |= FGP_CREAT;
 
 	page = pagecache_get_page(mapping, index, fgp_flags,
 			mapping_gfp_mask(mapping));
@@ -3301,21 +3303,65 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
 
+/*
+ * Start writeback on the pages in pgs[], and then try and remove those pages
+ * from the page cached. Used with RWF_UNCACHED.
+ */
+void write_drop_cached_pages(struct pagevec *pvec,
+			     struct address_space *mapping)
+{
+	loff_t start, end;
+	int i;
+
+	end = 0;
+	start = LLONG_MAX;
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		loff_t off = page_offset(pvec->pages[i]);
+		if (off < start)
+			start = off;
+		if (off > end)
+			end = off;
+	}
+
+	__filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE);
+
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+
+		lock_page(page);
+		if (page->mapping == mapping) {
+			wait_on_page_writeback(page);
+			if (!page_has_private(page) ||
+			    try_to_release_page(page, 0))
+				remove_mapping(mapping, page);
+		}
+		unlock_page(page);
+	}
+	pagevec_release(pvec);
+}
+EXPORT_SYMBOL_GPL(write_drop_cached_pages);
+
+#define GPW_PAGE_BATCH		16
+
 ssize_t generic_perform_write(struct file *file,
 				struct iov_iter *i, struct kiocb *iocb)
 {
 	struct address_space *mapping = file->f_mapping;
 	const struct address_space_operations *a_ops = mapping->a_ops;
 	loff_t pos = iocb->ki_pos;
+	struct pagevec pvec;
 	long status = 0;
 	ssize_t written = 0;
 	unsigned int flags = 0;
 
+	pagevec_init(&pvec);
+
 	do {
 		struct page *page;
 		unsigned long offset;	/* Offset into pagecache page */
 		unsigned long bytes;	/* Bytes to write to page */
 		size_t copied;		/* Bytes copied from user */
+		bool drop_page = false;	/* drop page after IO */
 		void *fsdata;
 
 		offset = (pos & (PAGE_SIZE - 1));
@@ -3323,6 +3369,9 @@ ssize_t generic_perform_write(struct file *file,
 						iov_iter_count(i));
 
 again:
+		if (iocb->ki_flags & IOCB_UNCACHED)
+			flags |= AOP_FLAG_UNCACHED;
+
 		/*
 		 * Bring in the user page that we will copy from _first_.
 		 * Otherwise there's a nasty deadlock on copying from the
@@ -3343,10 +3392,17 @@ ssize_t generic_perform_write(struct file *file,
 			break;
 		}
 
+retry:
 		status = a_ops->write_begin(file, mapping, pos, bytes, flags,
 						&page, &fsdata);
-		if (unlikely(status < 0))
+		if (unlikely(status < 0)) {
+			if (status == -ENOMEM && (flags & AOP_FLAG_UNCACHED)) {
+				drop_page = true;
+				flags &= ~AOP_FLAG_UNCACHED;
+				goto retry;
+			}
 			break;
+		}
 
 		if (mapping_writably_mapped(mapping))
 			flush_dcache_page(page);
@@ -3354,10 +3410,16 @@ ssize_t generic_perform_write(struct file *file,
 		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
 		flush_dcache_page(page);
 
+		if (drop_page)
+			get_page(page);
+
 		status = a_ops->write_end(file, mapping, pos, bytes, copied,
 						page, fsdata);
-		if (unlikely(status < 0))
+		if (unlikely(status < 0)) {
+			if (drop_page)
+				put_page(page);
 			break;
+		}
 		copied = status;
 
 		cond_resched();
@@ -3374,14 +3436,27 @@ ssize_t generic_perform_write(struct file *file,
 			 */
 			bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_single_seg_count(i));
+			if (drop_page)
+				put_page(page);
 			goto again;
 		}
+		if (drop_page &&
+		    ((pos >> PAGE_SHIFT) != ((pos + copied) >> PAGE_SHIFT))) {
+			if (!pagevec_add(&pvec, page))
+				write_drop_cached_pages(&pvec, mapping);
+		} else {
+			if (drop_page)
+				put_page(page);
+			balance_dirty_pages_ratelimited(mapping);
+		}
+
 		pos += copied;
 		written += copied;
-
-		balance_dirty_pages_ratelimited(mapping);
 	} while (iov_iter_count(i));
 
+	if (pagevec_count(&pvec))
+		write_drop_cached_pages(&pvec, mapping);
+
 	return written ? written : status;
 }
 EXPORT_SYMBOL(generic_perform_write);
-- 
2.24.0




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux