[PATCH] ext4: Make ext4_writepages() resilient to i_size changes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Inode size can arbitrarily change while writeback is in progress. This
can have various strange effects when we use one value of i_size for one
decision during writeback and another value of i_size for a different
decision during writeback. In particular a check for lblk < blocks in
mpage_map_and_submit_buffers() causes problems when i_size is reduced
while writeback is running because we can end up not using all blocks
we've allocated. Thus these blocks are leaked and also delalloc
accounting gets wrong manifesting as a warning like:

ext4_da_release_space:1333: ext4_da_release_space: ino 12, to_free 1
with only 0 reserved data blocks

The problem can happen only when blocksize < pagesize because the check
for size is performed only after the first iteration of the mapping
loop.

Fix the problem by removing the size check from the mapping loop. We
have an extent allocated so we have to use it all before checking for
i_size. We may call add_page_bufs_to_extent() unnecessarily but that
function won't do anything if passed block number is beyond file size.

Also to avoid future surprises like this sample inode size when
starting writeback in ext4_writepages() and then use this sampled size
throughout the writeback call stack.

Reported-by: Dave Jones <davej@xxxxxxxxxx>
Reported-by: Zheng Liu <gnehzuil.liu@xxxxxxxxx>
Signed-off-by: Jan Kara <jack@xxxxxxx>
---
 fs/ext4/inode.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ba33c67..8bd0240 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1413,6 +1413,8 @@ static void ext4_da_page_release_reservation(struct page *page,
 struct mpage_da_data {
 	struct inode *inode;
 	struct writeback_control *wbc;
+	loff_t i_size;		/* Inode size can change while we do writeback.
+				 * Use one fixed value to make things simpler */
 
 	pgoff_t first_page;	/* The first page to write */
 	pgoff_t next_page;	/* Current page to examine */
@@ -1943,7 +1945,7 @@ static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
 				    ext4_lblk_t lblk)
 {
 	struct inode *inode = mpd->inode;
-	ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
+	ext4_lblk_t blocks = (mpd->i_size + (1 << inode->i_blkbits) - 1)
 							>> inode->i_blkbits;
 
 	do {
@@ -1968,12 +1970,11 @@ static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
 static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 {
 	int len;
-	loff_t size = i_size_read(mpd->inode);
 	int err;
 
 	BUG_ON(page->index != mpd->first_page);
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == mpd->i_size >> PAGE_CACHE_SHIFT)
+		len = mpd->i_size & ~PAGE_CACHE_MASK;
 	else
 		len = PAGE_CACHE_SIZE;
 	clear_page_dirty_for_io(page);
@@ -2006,8 +2007,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 	struct inode *inode = mpd->inode;
 	struct buffer_head *head, *bh;
 	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
-	ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
-							>> inode->i_blkbits;
 	pgoff_t start, end;
 	ext4_lblk_t lblk;
 	sector_t pblock;
@@ -2052,8 +2051,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 					bh->b_blocknr = pblock++;
 				}
 				clear_buffer_unwritten(bh);
-			} while (++lblk < blocks &&
-				 (bh = bh->b_this_page) != head);
+			} while (lblk++, (bh = bh->b_this_page) != head);
 
 			/*
 			 * FIXME: This is going to break if dioread_nolock
@@ -2200,7 +2198,11 @@ static int mpage_map_and_submit_extent(handle_t *handle,
 			return err;
 	} while (map->m_len);
 
-	/* Update on-disk size after IO is submitted */
+	/*
+	 * Update on-disk size after IO is submitted. Here we cannot use
+	 * mpd->i_size as we must avoid increasing i_disksize when racing
+	 * truncate already set it to a small value.
+	 */
 	disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
 	if (disksize > i_size_read(inode))
 		disksize = i_size_read(inode);
@@ -2453,6 +2455,7 @@ static int ext4_writepages(struct address_space *mapping,
 
 	mpd.inode = inode;
 	mpd.wbc = wbc;
+	mpd.i_size = i_size_read(inode);
 	ext4_io_submit_init(&mpd.io_submit, wbc);
 retry:
 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux