[PATCH 3/3] ext4: Fix stale data exposure when read races with hole punch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hole puching currently evicts pages from page cache and then goes on to
remove blocks from the inode. This happens under both i_mmap_sem and
i_rwsem held exclusively which provides appropriate serialization with
racing page faults. However there is currently nothing that prevents
ordinary read(2) from racing with the hole punch and instantiating page
cache page after hole punching has evicted page cache but before it has
removed blocks from the inode. This page cache page will be mapping soon
to be freed block and that can lead to returning stale data to userspace
or even filesystem corruption.

Fix the problem by protecting reads as well as readahead requests with
i_mmap_sem.

CC: stable@xxxxxxxxxxxxxxx
Reported-by: Amir Goldstein <amir73il@xxxxxxxxx>
Signed-off-by: Jan Kara <jack@xxxxxxx>
---
 fs/ext4/file.c  | 16 ++++++++++++++++
 fs/ext4/inode.c | 21 +++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 349b27f0dda0..d66f7c08b123 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -30,6 +30,7 @@
 #include <linux/uio.h>
 #include <linux/mman.h>
 #include <linux/backing-dev.h>
+#include <linux/fadvise.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -131,6 +132,20 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	return generic_file_read_iter(iocb, to);
 }
 
+static int ext4_fadvise(struct file *filp, loff_t start, loff_t end, int advice)
+{
+	struct inode *inode = file_inode(filp);
+	int ret;
+
+	/* Readahead needs protection from hole punching and similar ops */
+	if (advice == POSIX_FADV_WILLNEED)
+		down_read(&EXT4_I(inode)->i_mmap_sem);
+	ret = generic_fadvise(filp, start, end, advice);
+	if (advice == POSIX_FADV_WILLNEED)
+		up_read(&EXT4_I(inode)->i_mmap_sem);
+	return ret;
+}
+
 /*
  * Called when an inode is released. Note that this is different
  * from ext4_file_open: open gets called at every open, but release
@@ -911,6 +926,7 @@ const struct file_operations ext4_file_operations = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= ext4_fallocate,
+	.fadvise	= ext4_fadvise,
 };
 
 const struct inode_operations ext4_file_inode_operations = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c173c8405856..a01569f2fa49 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3646,9 +3646,28 @@ static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
 				       &ext4_iomap_report_ops);
 }
 
+static int ext4_fill_pages(struct kiocb *iocb, size_t len, bool partial_page,
+			   struct page **pagep, unsigned int nr_pages)
+{
+	struct ext4_inode_info *ei = EXT4_I(iocb->ki_filp->f_mapping->host);
+	int ret;
+
+	/*
+	 * Protect adding of pages into page cache against hole punching and
+	 * other cache manipulation functions so that we don't expose
+	 * potentially stale user data.
+	 */
+	down_read(&ei->i_mmap_sem);
+	ret = generic_file_buffered_read_get_pages(iocb, len, partial_page,
+						   pagep, nr_pages);
+	up_read(&ei->i_mmap_sem);
+	return ret;
+}
+
 static const struct address_space_operations ext4_aops = {
 	.readpage		= ext4_readpage,
 	.readahead		= ext4_readahead,
+	.fill_pages		= ext4_fill_pages,
 	.writepage		= ext4_writepage,
 	.writepages		= ext4_writepages,
 	.write_begin		= ext4_write_begin,
@@ -3667,6 +3686,7 @@ static const struct address_space_operations ext4_aops = {
 static const struct address_space_operations ext4_journalled_aops = {
 	.readpage		= ext4_readpage,
 	.readahead		= ext4_readahead,
+	.fill_pages		= ext4_fill_pages,
 	.writepage		= ext4_writepage,
 	.writepages		= ext4_writepages,
 	.write_begin		= ext4_write_begin,
@@ -3684,6 +3704,7 @@ static const struct address_space_operations ext4_journalled_aops = {
 static const struct address_space_operations ext4_da_aops = {
 	.readpage		= ext4_readpage,
 	.readahead		= ext4_readahead,
+	.fill_pages		= ext4_fill_pages,
 	.writepage		= ext4_writepage,
 	.writepages		= ext4_writepages,
 	.write_begin		= ext4_da_write_begin,
-- 
2.26.2




[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux