I've started looking into resurrecting splice F_MOVE support, and it seems ->steal for anon pages is completly bogus at the moment: - the page count check is incorrect - it doesn't isolate the mapping from the lru - it sets the PIPE_BUF_FLAG_LRU flag, which doesn't get the file added to the file lru Currently on fuse calls ->steal, but I'm not sure it could work on a vmspliced buffered at all. Below is a patch that attempts to fix / paper over ->steal, and the second is the unfinished F_MOVE resurrection patch which shows what additional workarouns we need for ->steal from anon pages.
>From 94958673ed6d0add7f5d95cc17fb0c9fa8f58c03 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@xxxxxx> Date: Wed, 1 Apr 2015 14:28:50 +0200 Subject: fix ->steal for anon pages --- fs/splice.c | 20 ++++++++++++++++++-- include/linux/swap.h | 1 + mm/internal.h | 1 - 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 41cbb16..36aa4a9 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -145,11 +145,27 @@ const struct pipe_buf_operations page_cache_pipe_buf_ops = { static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { + struct page *page = buf->page; + if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) return 1; - buf->flags |= PIPE_BUF_FLAG_LRU; - return generic_pipe_buf_steal(pipe, buf); + /* + * We should have three references to the page: gup, lru, and + * one for being mapped into page tables. + */ + if (page_count(page) != 3) + return 1; + + lock_page(page); + + if (!isolate_lru_page(page)) { + ClearPageActive(page); + ClearPageUnevictable(page); + page_cache_release(page); + } + + return 0; } static const struct pipe_buf_operations user_page_pipe_buf_ops = { diff --git a/include/linux/swap.h b/include/linux/swap.h index 7067eca..a3742f3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -318,6 +318,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page, /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); +extern int isolate_lru_page(struct page *page); extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, diff --git a/mm/internal.h b/mm/internal.h index a96da5b..48c5731 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -96,7 +96,6 @@ extern unsigned long highest_memmap_pfn; /* * in mm/vmscan.c: */ -extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); extern bool zone_reclaimable(struct zone *zone); -- 1.9.1
>From b2b9ed7a41d6d629abefedbfafcdbbd1b1094491 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@xxxxxx> Date: Wed, 1 Apr 2015 14:23:09 +0200 Subject: splice F_MOVE WIP --- fs/splice.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/fs/splice.c b/fs/splice.c index 36aa4a9..33f611e 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -939,6 +939,36 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, return ret; } +static void steal_page(struct pipe_inode_info *pipe, + struct splice_desc *sd, struct pipe_buffer *buf) +{ + struct page *page = buf->page; + bool was_anon = PageAnon(page); + + if (buf->ops->steal(pipe, buf)) { + printk("failed to steal page..\n"); + return; + } + + if (add_to_page_cache_lru(page, sd->u.file->f_mapping, + sd->pos >> PAGE_CACHE_SHIFT, GFP_KERNEL)) { + /* + * This is harmless as we fall back to simply + * copying the page. Of course that will most + * likely fail to add to the page cache as well, + * but at that point it's not our problem.. + */ + goto out_unlock; + } + + if (was_anon) { + inc_mm_counter(current->mm, MM_FILEPAGES); + dec_mm_counter(current->mm, MM_ANONPAGES); + } +out_unlock: + unlock_page(page); +} + /** * iter_file_splice_write - splice data from a pipe to a file * @pipe: pipe info @@ -1013,6 +1043,14 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, goto done; } + /* + * XXX: hch: we should allow page steals + * around EOF a well. + */ + if ((sd.flags & SPLICE_F_MOVE) && + this_len == PAGE_CACHE_SIZE) + steal_page(pipe, &sd, buf); + array[n].bv_page = buf->page; array[n].bv_len = this_len; array[n].bv_offset = buf->offset; -- 1.9.1