->steal semantics on anon pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I've started looking into resurrecting splice F_MOVE support, and it
seems ->steal for anon pages is completly bogus at the moment:

 - the page count check is incorrect
 - it doesn't isolate the mapping from the lru
 - it sets the PIPE_BUF_FLAG_LRU flag, which doesn't get the file
   added to the file lru

Currently on fuse calls ->steal, but I'm not sure it could work on
a vmspliced buffered at all.

Below is a patch that attempts to fix / paper over ->steal, and the
second is the unfinished F_MOVE resurrection patch which shows
what additional workarouns we need for ->steal from anon pages.
>From 94958673ed6d0add7f5d95cc17fb0c9fa8f58c03 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@xxxxxx>
Date: Wed, 1 Apr 2015 14:28:50 +0200
Subject: fix ->steal for anon pages

---
 fs/splice.c          | 20 ++++++++++++++++++--
 include/linux/swap.h |  1 +
 mm/internal.h        |  1 -
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index 41cbb16..36aa4a9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -145,11 +145,27 @@ const struct pipe_buf_operations page_cache_pipe_buf_ops = {
 static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 				    struct pipe_buffer *buf)
 {
+	struct page *page = buf->page;
+
 	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
 		return 1;
 
-	buf->flags |= PIPE_BUF_FLAG_LRU;
-	return generic_pipe_buf_steal(pipe, buf);
+	/*
+	 * We should have three references to the page: gup, lru, and
+	 * one for being mapped into page tables.
+	 */
+	if (page_count(page) != 3)
+		return 1;
+
+	lock_page(page);
+
+	if (!isolate_lru_page(page)) {
+		ClearPageActive(page);
+		ClearPageUnevictable(page);
+		page_cache_release(page);
+	}
+		
+	return 0;
 }
 
 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7067eca..a3742f3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -318,6 +318,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
+extern int isolate_lru_page(struct page *page);
 extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
diff --git a/mm/internal.h b/mm/internal.h
index a96da5b..48c5731 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -96,7 +96,6 @@ extern unsigned long highest_memmap_pfn;
 /*
  * in mm/vmscan.c:
  */
-extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
 extern bool zone_reclaimable(struct zone *zone);
 
-- 
1.9.1

>From b2b9ed7a41d6d629abefedbfafcdbbd1b1094491 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@xxxxxx>
Date: Wed, 1 Apr 2015 14:23:09 +0200
Subject: splice F_MOVE WIP

---
 fs/splice.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/fs/splice.c b/fs/splice.c
index 36aa4a9..33f611e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -939,6 +939,36 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	return ret;
 }
 
+static void steal_page(struct pipe_inode_info *pipe,
+		struct splice_desc *sd, struct pipe_buffer *buf)
+{
+    	struct page *page = buf->page;
+	bool was_anon = PageAnon(page);
+
+	if (buf->ops->steal(pipe, buf)) {
+		printk("failed to steal page..\n");
+		return;
+	}
+
+	if (add_to_page_cache_lru(page, sd->u.file->f_mapping,
+			sd->pos >> PAGE_CACHE_SHIFT, GFP_KERNEL)) {
+		/*
+		 * This is harmless as we fall back to simply
+		 * copying the page.  Of course that will most
+		 * likely fail to add to the page cache as well,
+		 * but at that point it's not our problem..
+		 */
+		goto out_unlock;
+	}
+
+	if (was_anon) {
+		inc_mm_counter(current->mm, MM_FILEPAGES);
+		dec_mm_counter(current->mm, MM_ANONPAGES);
+	}
+out_unlock:
+	unlock_page(page);
+}
+
 /**
  * iter_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
@@ -1013,6 +1043,14 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 				goto done;
 			}
 
+			/*
+			 * XXX: hch: we should allow page steals
+			 * around EOF a well.
+			 */
+			if ((sd.flags & SPLICE_F_MOVE) &&
+			    this_len == PAGE_CACHE_SIZE)
+				steal_page(pipe, &sd, buf);
+
 			array[n].bv_page = buf->page;
 			array[n].bv_len = this_len;
 			array[n].bv_offset = buf->offset;
-- 
1.9.1


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux