[PATCH 4/8] mm/fs: Add a_ops->readahead

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx>

This will replace ->readpages with a saner interface:
 - No return type (errors are ignored for read ahead anyway)
 - Pages are already in the page cache when ->readpages is called
 - Pages are passed in a pagevec instead of a linked list

Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
---
 Documentation/filesystems/locking.rst |  8 +++++-
 Documentation/filesystems/vfs.rst     |  9 ++++++
 include/linux/fs.h                    |  3 ++
 mm/readahead.c                        | 40 ++++++++++++++++++++++++++-
 4 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 5057e4d9dcd1..1e2f1186fd1a 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -239,6 +239,8 @@ prototypes::
 	int (*readpage)(struct file *, struct page *);
 	int (*writepages)(struct address_space *, struct writeback_control *);
 	int (*set_page_dirty)(struct page *page);
+	int (*readahead)(struct file *, struct address_space *,
+			struct pagevec *, pgoff_t index);
 	int (*readpages)(struct file *filp, struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages);
 	int (*write_begin)(struct file *, struct address_space *mapping,
@@ -271,7 +273,8 @@ writepage:		yes, unlocks (see below)
 readpage:		yes, unlocks
 writepages:
 set_page_dirty		no
-readpages:
+readpages:              no
+readahead:              yes, unlocks
 write_begin:		locks the page		 exclusive
 write_end:		yes, unlocks		 exclusive
 bmap:
@@ -298,6 +301,9 @@ completion.
 ->readpages() populates the pagecache with the passed pages and starts
 I/O against them.  They come unlocked upon I/O completion.
 
+->readahead() starts I/O against the pages.  They come unlocked upon
+I/O completion.
+
 ->writepage() is used for two purposes: for "memory cleansing" and for
 "sync".  These are quite different operations and the behaviour may differ
 depending upon the mode.
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 7d4d09dd5e6d..63d0f0dbbf9c 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -706,6 +706,8 @@ cache in your filesystem.  The following members are defined:
 		int (*readpage)(struct file *, struct page *);
 		int (*writepages)(struct address_space *, struct writeback_control *);
 		int (*set_page_dirty)(struct page *page);
+		int (*readahead)(struct file *, struct address_space *,
+				 struct pagevec *, pgoff_t index);
 		int (*readpages)(struct file *filp, struct address_space *mapping,
 				 struct list_head *pages, unsigned nr_pages);
 		int (*write_begin)(struct file *, struct address_space *mapping,
@@ -781,6 +783,13 @@ cache in your filesystem.  The following members are defined:
 	If defined, it should set the PageDirty flag, and the
 	PAGECACHE_TAG_DIRTY tag in the radix tree.
 
+``readahead``
+	called by the VM to read pages associated with the address_space
+	object.  This is essentially a vector version of readpage.
+	Instead of just one page, several pages are requested.
+	Since this is readahead, attempt to start I/O on each page and
+        let the I/O completion path set errors on the page.
+
 ``readpages``
 	called by the VM to read pages associated with the address_space
 	object.  This is essentially just a vector version of readpage.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98e0349adb52..2769f89666fb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -52,6 +52,7 @@ struct hd_geometry;
 struct iovec;
 struct kiocb;
 struct kobject;
+struct pagevec;
 struct pipe_inode_info;
 struct poll_table_struct;
 struct kstatfs;
@@ -375,6 +376,8 @@ struct address_space_operations {
 	 */
 	int (*readpages)(struct file *filp, struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages);
+	void (*readahead)(struct file *, struct address_space *,
+			struct pagevec *, pgoff_t offset);
 
 	int (*write_begin)(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
diff --git a/mm/readahead.c b/mm/readahead.c
index 76a70a4406b5..2fe0974173ea 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -123,7 +123,45 @@ static unsigned read_pages(struct address_space *mapping, struct file *filp,
 	struct page *page;
 	unsigned int nr_pages = pagevec_count(pvec);
 
-	if (mapping->a_ops->readpages) {
+	if (mapping->a_ops->readahead) {
+		/*
+		 * When we remove support for ->readpages, we'll call
+		 * add_to_page_cache_lru() in the parent and all this
+		 * grot goes away.
+		 */
+		unsigned char first = pvec->first;
+		unsigned char saved_nr = pvec->nr;
+		pgoff_t base = offset;
+		pagevec_for_each(pvec, page) {
+			if (!add_to_page_cache_lru(page, mapping, offset++,
+						gfp)) {
+				unsigned char saved_first = pvec->first;
+
+				pvec->nr = pvec->first - 1;
+				pvec->first = first;
+				mapping->a_ops->readahead(filp, mapping, pvec,
+						base + first);
+				first = pvec->nr + 1;
+				pvec->nr = saved_nr;
+				pvec->first = saved_first;
+
+				put_page(page);
+			}
+		}
+		pvec->first = first;
+		offset = base + first;
+		mapping->a_ops->readahead(filp, mapping, pvec, offset);
+		/*
+		 * Ideally the implementation would at least attempt to
+		 * start I/O against all the pages, but there are times
+		 * when it makes more sense to just give up.  Take care
+		 * of any un-attempted pages here.
+		 */
+		pagevec_for_each(pvec, page) {
+			unlock_page(page);
+			put_page(page);
+		}
+	} else if (mapping->a_ops->readpages) {
 		LIST_HEAD(pages);
 
 		pagevec_for_each(pvec, page) {
-- 
2.24.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux