Instead of updating the inode size through the VFS dirty mechanism and ->write_inode just log it directly. This fixes a nasty bug where we'd lose file size updates if they happen from writeout during inode eviction, and prepares for getting rid of non-transaction metadata updates entirely. This may cause a few additional transactions for inode core updates, but with the delaylog code those are cheap enough to not bother. Signed-off-by: Christoph Hellwig <hch@xxxxxx> Index: xfs/fs/xfs/xfs_aops.c =================================================================== --- xfs.orig/fs/xfs/xfs_aops.c 2011-08-24 00:41:58.878003324 +0200 +++ xfs/fs/xfs/xfs_aops.c 2011-08-24 00:43:39.107460335 +0200 @@ -126,30 +126,6 @@ static inline bool xfs_ioend_is_append(s } /* - * Update on-disk file size now that data has been written to disk. The - * current in-memory file size is i_size. If a write is beyond eof i_new_size - * will be the intended file size until i_size is updated. If this write does - * not extend all the way to the valid file size then restrict this update to - * the end of the write. - */ -STATIC void -xfs_setfilesize( - xfs_ioend_t *ioend) -{ - xfs_inode_t *ip = XFS_I(ioend->io_inode); - xfs_fsize_t isize; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - isize = xfs_ioend_new_eof(ioend); - if (isize) { - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); - ip->i_d.di_size = isize; - xfs_mark_inode_dirty(ip); - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); -} - -/* * Schedule IO completion handling on the final put of an ioend. * * If there is no work to do we might as well call it a day and free the @@ -203,10 +179,29 @@ xfs_end_io( } /* - * We might have to update the on-disk file size after extending - * writes. - */ - xfs_setfilesize(ioend); + * Update on-disk file size now that data has been written to disk. + * + * The current in-memory file size is i_size. If a write is beyond + * eof i_new_size will be the intended file size until i_size is + * updated. If this write does not extend all the way to the valid + * file size then restrict this update to the end of the write. + */ + if (xfs_ioend_is_append(ioend)) { + xfs_fsize_t new_size; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + new_size = xfs_ioend_new_eof(ioend); + if (new_size) { + trace_xfs_setfilesize(ip, ioend->io_offset, + ioend->io_size); + error = xfs_setfilesize(ip, new_size); + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + if (error) + ioend->io_error = -error; + } + done: xfs_destroy_ioend(ioend); } @@ -364,14 +359,6 @@ xfs_submit_ioend_bio( atomic_inc(&ioend->io_remaining); bio->bi_private = ioend; bio->bi_end_io = xfs_end_bio; - - /* - * If the I/O is beyond EOF we mark the inode dirty immediately - * but don't update the inode size until I/O completion. - */ - if (xfs_ioend_new_eof(ioend)) - xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); - submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); } Index: xfs/fs/xfs/xfs_file.c =================================================================== --- xfs.orig/fs/xfs/xfs_file.c 2011-08-24 00:41:55.864686316 +0200 +++ xfs/fs/xfs/xfs_file.c 2011-08-24 00:41:59.691332251 +0200 @@ -409,7 +409,7 @@ xfs_aio_write_newsize_update( xfs_rw_ilock(ip, XFS_ILOCK_EXCL); ip->i_new_size = 0; if (ip->i_d.di_size > ip->i_size) - ip->i_d.di_size = ip->i_size; + xfs_setfilesize(ip, ip->i_size); xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); } } Index: xfs/fs/xfs/xfs_inode.c =================================================================== --- xfs.orig/fs/xfs/xfs_inode.c 2011-08-24 00:41:52.724703326 +0200 +++ xfs/fs/xfs/xfs_inode.c 2011-08-24 00:42:36.824464417 +0200 @@ -1413,6 +1413,34 @@ xfs_itruncate_data( } /* + * Update the inode size during I/O completions or error handling. + */ +int +xfs_setfilesize( + struct xfs_inode *ip, + xfs_fsize_t isize) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error = 0; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + tp = _xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS, KM_NOFS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + ip->i_d.di_size = isize; + xfs_trans_ijoin(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + return xfs_trans_commit(tp, 0); +} + +/* * This is called when the inode's link count goes to 0. * We place the on-disk inode on a list in the AGI. It * will be pulled from this list when the inode is freed. Index: xfs/fs/xfs/xfs_inode.h =================================================================== --- xfs.orig/fs/xfs/xfs_inode.h 2011-08-24 00:41:55.878019576 +0200 +++ xfs/fs/xfs/xfs_inode.h 2011-08-24 00:41:59.697998883 +0200 @@ -493,6 +493,7 @@ int xfs_itruncate_extents(struct xfs_tr int, xfs_fsize_t); int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *, xfs_fsize_t); +int xfs_setfilesize(struct xfs_inode *, xfs_fsize_t); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs