Do not use unlogged metadata updates and the VFS dirty bit for updating the file size after writeback. In addition to causing various problems with updates getting delayed for far too log this also drags in the unscalable VFS dirty tracking, and is one of the few remaining unlogged metadata updates. XXX: figure out how we can get a good log reservation at writepage time. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- fs/xfs/xfs_aops.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) Index: xfs/fs/xfs/xfs_aops.c =================================================================== --- xfs.orig/fs/xfs/xfs_aops.c 2011-10-27 22:39:58.640675607 +0200 +++ xfs/fs/xfs/xfs_aops.c 2011-10-27 22:39:59.104674728 +0200 @@ -26,6 +26,7 @@ #include "xfs_bmap_btree.h" #include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_rw.h" @@ -110,22 +111,39 @@ static inline bool xfs_ioend_is_append(s /* * Update on-disk file size now that data has been written to disk. */ -STATIC void +STATIC int xfs_setfilesize( struct xfs_ioend *ioend) { struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; xfs_fsize_t isize; + int error = 0; xfs_ilock(ip, XFS_ILOCK_EXCL); isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); - if (isize) { - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); - ip->i_d.di_size = isize; - xfs_mark_inode_dirty(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + if (!isize) + return 0; + + trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); + + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; } - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_ilock(ip, XFS_ILOCK_EXCL); + + ip->i_d.di_size = isize; + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + return xfs_trans_commit(tp, 0); } /* @@ -179,12 +197,10 @@ xfs_end_io( ioend->io_error = -error; goto done; } - } else { - /* - * We might have to update the on-disk file size after - * extending writes. - */ - xfs_setfilesize(ioend); + } else if (xfs_ioend_is_append(ioend)) { + error = xfs_setfilesize(ioend); + if (error) + ioend->io_error = error; } done: @@ -341,18 +357,9 @@ xfs_submit_ioend_bio( xfs_ioend_t *ioend, struct bio *bio) { - struct xfs_inode *ip = XFS_I(ioend->io_inode); atomic_inc(&ioend->io_remaining); bio->bi_private = ioend; bio->bi_end_io = xfs_end_bio; - - /* - * If the I/O is beyond EOF we mark the inode dirty immediately - * but don't update the inode size until I/O completion. - */ - if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size)) - xfs_mark_inode_dirty(ip); - submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); } _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs