[PATCH 1/4] fs/splice: enhance direct pipe & splice for moving pages in kernel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Per-task direct pipe can transfer page between two files or one
file and other kernel components, especially by splice_direct_to_actor
and __splice_from_pipe().

This way is helpful for fuse/ublk to implement zero copy by transferring
pages from device to file or socket. However, when device's ->splice_read()
produces pages, the kernel consumer may read from or write to these pages,
and from device viewpoint, there could be unexpected read or write on
pages.

Enhance the limit by the following approach:

1) add kernel splice flags of SPLICE_F_KERN_FOR_[READ|WRITE] which is
   passed to device's ->read_splice(), then device can check if this
   READ or WRITE is expected on pages filled to pipe together with
   information from ppos & len

2) add kernel splice flag of SPLICE_F_KERN_NEED_CONFIRM which is passed
   to device's ->read_splice() for asking device to confirm if it
   really supports this kind of usage of feeding pages by ->read_splice().
   If device does support, pipe->ack_page_consuming is set. This way
   can avoid misuse.

Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
 fs/splice.c               | 15 +++++++++++++++
 include/linux/pipe_fs_i.h | 10 ++++++++++
 include/linux/splice.h    | 22 ++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/fs/splice.c b/fs/splice.c
index 87d9b19349de..c4770e1644cc 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -792,6 +792,14 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
 
+static inline bool slice_read_acked(const struct pipe_inode_info *pipe,
+			       int flags)
+{
+	if (flags & SPLICE_F_KERN_NEED_CONFIRM)
+		return pipe->ack_page_consuming;
+	return true;
+}
+
 /**
  * splice_direct_to_actor - splices data directly between two non-pipes
  * @in:		file to splice from
@@ -861,10 +869,17 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 		size_t read_len;
 		loff_t pos = sd->pos, prev_pos = pos;
 
+		pipe->ack_page_consuming = false;
 		ret = do_splice_to(in, &pos, pipe, len, flags);
 		if (unlikely(ret <= 0))
 			goto out_release;
 
+		if (!slice_read_acked(pipe, flags)) {
+			bytes = 0;
+			ret = -EACCES;
+			goto out_release;
+		}
+
 		read_len = ret;
 		sd->total_len = read_len;
 
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 6cb65df3e3ba..09ee1a9380ec 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -72,6 +72,7 @@ struct pipe_inode_info {
 	unsigned int r_counter;
 	unsigned int w_counter;
 	bool poll_usage;
+	bool ack_page_consuming;	/* only for direct pipe */
 	struct page *tmp_page;
 	struct fasync_struct *fasync_readers;
 	struct fasync_struct *fasync_writers;
@@ -218,6 +219,15 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
 		pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]);
 }
 
+/*
+ * Called in ->splice_read() for confirming the READ/WRITE page is allowed
+ */
+static inline void pipe_ack_page_consume(struct pipe_inode_info *pipe)
+{
+	if (!WARN_ON_ONCE(current->splice_pipe != pipe))
+		pipe->ack_page_consuming = true;
+}
+
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE		PAGE_SIZE
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..98c471fd918d 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -23,6 +23,28 @@
 
 #define SPLICE_F_ALL (SPLICE_F_MOVE|SPLICE_F_NONBLOCK|SPLICE_F_MORE|SPLICE_F_GIFT)
 
+/*
+ * Flags used for kernel internal page move from ->splice_read()
+ * by internal direct pipe, and user pipe can't touch these
+ * flags.
+ *
+ * Pages filled from ->splice_read() are usually moved/copied to
+ * ->splice_write(). Here address fuse/ublk zero copy by transferring
+ * page from device to file/socket for either READ or WRITE. So we
+ * need ->splice_read() to confirm if this READ/WRITE is allowed on
+ * pages filled in ->splice_read().
+ */
+/* The page consumer is for READ from pages moved from direct pipe */
+#define SPLICE_F_KERN_FOR_READ	(0x100)
+/* The page consumer is for WRITE to pages moved from direct pipe */
+#define SPLICE_F_KERN_FOR_WRITE	(0x200)
+/*
+ * ->splice_read() has to confirm if consumer's READ/WRITE on pages
+ * is allow. If yes, ->splice_read() has to set pipe->ack_page_consuming,
+ * otherwise pipe->ack_page_consuming has to be cleared.
+ */
+#define SPLICE_F_KERN_NEED_CONFIRM	(0x400)
+
 /*
  * Passed to the actors
  */
-- 
2.31.1




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux