From: Dave Chinner <dchinner@xxxxxxxxxx> If the pages sent down by write_cache_pages to the writepage callback are discontiguous, we need to detect this and put each discontiguous page range into individual ioends. This is needed to ensure that the ioend accurately represents the range of the file that it covers so that file size updates during IO completion set the size correctly. Failure to take into account the discontiguous ranges results in files being too small when writeback patterns are non-sequential. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/xfs/xfs_aops.c | 82 ++++++++++++++++++++++--------------------------------- 1 file changed, 32 insertions(+), 50 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e4184f5..93bf13c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -45,6 +45,7 @@ struct xfs_writepage_ctx { unsigned int io_type; struct xfs_ioend *iohead; struct xfs_ioend *ioend; + sector_t last_block; }; void @@ -537,29 +538,27 @@ xfs_add_to_ioend( struct inode *inode, struct buffer_head *bh, xfs_off_t offset, - unsigned int type, - xfs_ioend_t **result, - int need_ioend) + struct xfs_writepage_ctx *wpc) { - xfs_ioend_t *ioend = *result; - - if (!ioend || need_ioend || type != ioend->io_type) { - xfs_ioend_t *previous = *result; - - ioend = xfs_alloc_ioend(inode, type); - ioend->io_offset = offset; - ioend->io_buffer_head = bh; - ioend->io_buffer_tail = bh; - if (previous) - previous->io_list = ioend; - *result = ioend; + if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || + bh->b_blocknr != wpc->last_block + 1) { + struct xfs_ioend *new; + + new = xfs_alloc_ioend(inode, wpc->io_type); + new->io_offset = offset; + new->io_buffer_head = bh; + new->io_buffer_tail = bh; + if (wpc->ioend) + wpc->ioend->io_list = new; + wpc->ioend = new; } else { - ioend->io_buffer_tail->b_private = bh; - ioend->io_buffer_tail = bh; + wpc->ioend->io_buffer_tail->b_private = bh; + wpc->ioend->io_buffer_tail = bh; } bh->b_private = NULL; - ioend->io_size += bh->b_size; + wpc->ioend->io_size += bh->b_size; + wpc->last_block = bh->b_blocknr; } STATIC void @@ -656,17 +655,15 @@ xfs_convert_page( struct inode *inode, struct page *page, loff_t tindex, - struct xfs_bmbt_irec *imap, - xfs_ioend_t **ioendp, + struct xfs_writepage_ctx *wpc, struct writeback_control *wbc) { struct buffer_head *bh, *head; xfs_off_t end_offset; unsigned long p_offset; - unsigned int type; int len, page_dirty; int count = 0, done = 0, uptodate = 1; - xfs_off_t offset = page_offset(page); + xfs_off_t offset = page_offset(page); if (page->index != tindex) goto fail; @@ -676,7 +673,7 @@ xfs_convert_page( goto fail_unlock_page; if (page->mapping != inode->i_mapping) goto fail_unlock_page; - if (!xfs_check_page_type(page, (*ioendp)->io_type, false)) + if (!xfs_check_page_type(page, wpc->ioend->io_type, false)) goto fail_unlock_page; /* @@ -712,7 +709,7 @@ xfs_convert_page( * writeback. Hence for more optimal IO patterns, we should always * avoid partial page writeback due to multiple mappings on a page here. */ - if (!xfs_imap_valid(inode, imap, end_offset)) + if (!xfs_imap_valid(inode, &wpc->imap, end_offset)) goto fail_unlock_page; len = 1 << inode->i_blkbits; @@ -744,23 +741,22 @@ xfs_convert_page( if (buffer_unwritten(bh) || buffer_delay(bh) || buffer_mapped(bh)) { if (buffer_unwritten(bh)) - type = XFS_IO_UNWRITTEN; + wpc->io_type = XFS_IO_UNWRITTEN; else if (buffer_delay(bh)) - type = XFS_IO_DELALLOC; + wpc->io_type = XFS_IO_DELALLOC; else - type = XFS_IO_OVERWRITE; + wpc->io_type = XFS_IO_OVERWRITE; /* * imap should always be valid because of the above * partial page end_offset check on the imap. */ - ASSERT(xfs_imap_valid(inode, imap, offset)); + ASSERT(xfs_imap_valid(inode, &wpc->imap, offset)); lock_buffer(bh); - if (type != XFS_IO_OVERWRITE) - xfs_map_at_offset(inode, bh, imap, offset); - xfs_add_to_ioend(inode, bh, offset, type, - ioendp, done); + if (wpc->io_type != XFS_IO_OVERWRITE) + xfs_map_at_offset(inode, bh, &wpc->imap, offset); + xfs_add_to_ioend(inode, bh, offset, wpc); page_dirty--; count++; @@ -795,8 +791,7 @@ STATIC void xfs_cluster_write( struct inode *inode, pgoff_t tindex, - struct xfs_bmbt_irec *imap, - xfs_ioend_t **ioendp, + struct xfs_writepage_ctx *wpc, struct writeback_control *wbc, pgoff_t tlast) { @@ -812,7 +807,7 @@ xfs_cluster_write( for (i = 0; i < pagevec_count(&pvec); i++) { done = xfs_convert_page(inode, pvec.pages[i], tindex++, - imap, ioendp, wbc); + wpc, wbc); if (done) break; } @@ -1041,8 +1036,6 @@ xfs_do_writepage( offset = page_offset(page); do { - int new_ioend = 0; - if (offset >= end_offset) break; if (!buffer_uptodate(bh)) @@ -1091,15 +1084,6 @@ xfs_do_writepage( wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); if (!wpc->imap_valid) { - /* - * If we didn't have a valid mapping then we need to - * put the new mapping into a separate ioend structure. - * This ensures non-contiguous extents always have - * separate ioends, which is particularly important - * for unwritten extent conversion at I/O completion - * time. - */ - new_ioend = 1; err = xfs_map_blocks(inode, offset, &wpc->imap, wpc->io_type); if (err) @@ -1111,8 +1095,7 @@ xfs_do_writepage( lock_buffer(bh); if (wpc->io_type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, &wpc->imap, offset); - xfs_add_to_ioend(inode, bh, offset, wpc->io_type, - &wpc->ioend, new_ioend); + xfs_add_to_ioend(inode, bh, offset, wpc); count++; } @@ -1152,8 +1135,7 @@ xfs_do_writepage( if (end_index > last_index) end_index = last_index; - xfs_cluster_write(inode, page->index + 1, &wpc->imap, - &wpc->ioend, wbc, end_index); + xfs_cluster_write(inode, page->index + 1, wpc, wbc, end_index); } return 0; -- 2.5.0 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs