Hey Christoph, On Sun, Dec 18, 2011 at 03:00:04PM -0500, Christoph Hellwig wrote: > This wrapper isn't overly useful, not to say rather confusing. > > Around the call to xfs_itruncate_extents it does: > > - add tracing > - add a few asserts in debug builds > - conditionally update the inode size in two places > - log the inode > > Both the tracing and the inode logging can be moved to xfs_itruncate_extents > as they are useful for the attribute fork as well - in fact the attr code > already does an equivalent xfs_trans_log_inode call just after calling > xfs_itruncate_extents. > > The conditional size updates are a mess, and there > was no reason to do them in two places anyway, as the first one was > conditional on the inode having extents - but without extents we > xfs_itruncate_extents would be a no-op and the placement wouldn't matter > anyway. > > Instead move the size assignments and the asserts that make sense > to the callers that want it. > > As a side effect of this clean up xfs_setattr_size by introducing variables > for the old and new inode size, and moving the size updates into a common > place. > > Reviewed-by: Dave Chinner <dchinner@xxxxxxxxxx> > Signed-off-by: Christoph Hellwig <hch@xxxxxx> Looks good. I had some minor questions below. Reviewed-by: Ben Myers <bpm@xxxxxxx> > --- > fs/xfs/xfs_attr.c | 4 - > fs/xfs/xfs_inode.c | 124 +++-------------------------------------------- > fs/xfs/xfs_inode.h | 2 > fs/xfs/xfs_iops.c | 47 +++++++++++------ > fs/xfs/xfs_qm_syscalls.c | 9 +++ > fs/xfs/xfs_trace.h | 4 - > fs/xfs/xfs_vnodeops.c | 17 +++++- > 7 files changed, 65 insertions(+), 142 deletions(-) > > Index: xfs/fs/xfs/xfs_attr.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_attr.c 2011-11-30 12:58:07.820044461 +0100 > +++ xfs/fs/xfs/xfs_attr.c 2011-11-30 12:58:51.519807719 +0100 > @@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp) > if (error) > goto out; > > - /* > - * Commit the last in the sequence of transactions. > - */ > - xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); > error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); > xfs_iunlock(dp, XFS_ILOCK_EXCL); > > Index: xfs/fs/xfs/xfs_inode.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_inode.c 2011-11-30 12:58:07.830044408 +0100 > +++ xfs/fs/xfs/xfs_inode.c 2011-11-30 12:58:51.519807719 +0100 > @@ -1166,52 +1166,6 @@ xfs_ialloc( > } > > /* > - * Check to make sure that there are no blocks allocated to the > - * file beyond the size of the file. We don't check this for > - * files with fixed size extents or real time extents, but we > - * at least do it for regular files. > - */ > -#ifdef DEBUG > -STATIC void > -xfs_isize_check( > - struct xfs_inode *ip, > - xfs_fsize_t isize) > -{ > - struct xfs_mount *mp = ip->i_mount; > - xfs_fileoff_t map_first; > - int nimaps; > - xfs_bmbt_irec_t imaps[2]; > - int error; > - > - if (!S_ISREG(ip->i_d.di_mode)) > - return; > - > - if (XFS_IS_REALTIME_INODE(ip)) > - return; > - > - if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) > - return; > - > - nimaps = 2; > - map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); > - /* > - * The filesystem could be shutting down, so bmapi may return > - * an error. > - */ > - error = xfs_bmapi_read(ip, map_first, > - (XFS_B_TO_FSB(mp, > - (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), > - imaps, &nimaps, XFS_BMAPI_ENTIRE); > - if (error) > - return; > - ASSERT(nimaps == 1); > - ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); > -} > -#else /* DEBUG */ > -#define xfs_isize_check(ip, isize) > -#endif /* DEBUG */ > - > -/* You've tossed xfs_isize_check in the round filer, but you didn't mention that in your commit message. Isn't this still useful code? > * Free up the underlying blocks past new_size. The new size must be smaller > * than the current size. This routine can be used both for the attribute and > * data fork, and does not modify the inode size, which is left to the caller. > @@ -1258,6 +1212,8 @@ xfs_itruncate_extents( > ASSERT(ip->i_itemp->ili_lock_flags == 0); > ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); > > + trace_xfs_itruncate_extents_start(ip, new_size); > + > /* > * Since it is possible for space to become allocated beyond > * the end of the file (in a crash where the space is allocated > @@ -1325,6 +1281,14 @@ xfs_itruncate_extents( > goto out; > } > > + /* > + * Always re-log the inode so that our permanent transaction can keep > + * on rolling it forward in the log. > + */ > + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); Oh.. This is why it's ok not to log the inode core in xfs_attr_inactive: it calls xfs_itruncate_extents()... > + > + trace_xfs_itruncate_extents_end(ip, new_size); > + > out: > *tpp = tp; > return error; > @@ -1338,74 +1302,6 @@ out_bmap_cancel: > goto out; > } > > -int > -xfs_itruncate_data( > - struct xfs_trans **tpp, > - struct xfs_inode *ip, > - xfs_fsize_t new_size) > -{ > - int error; > - > - trace_xfs_itruncate_data_start(ip, new_size); > - > - /* > - * The first thing we do is set the size to new_size permanently on > - * disk. This way we don't have to worry about anyone ever being able > - * to look at the data being freed even in the face of a crash. > - * What we're getting around here is the case where we free a block, it > - * is allocated to another file, it is written to, and then we crash. > - * If the new data gets written to the file but the log buffers > - * containing the free and reallocation don't, then we'd end up with > - * garbage in the blocks being freed. As long as we make the new_size > - * permanent before actually freeing any blocks it doesn't matter if > - * they get written to. > - */ > - if (ip->i_d.di_nextents > 0) { > - /* > - * If we are not changing the file size then do not update > - * the on-disk file size - we may be called from > - * xfs_inactive_free_eofblocks(). If we update the on-disk > - * file size and then the system crashes before the contents > - * of the file are flushed to disk then the files may be > - * full of holes (ie NULL files bug). > - */ > - if (ip->i_size != new_size) { > - ip->i_d.di_size = new_size; > - ip->i_size = new_size; > - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); > - } > - } > - > - error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); > - if (error) > - return error; > - > - /* > - * If we are not changing the file size then do not update the on-disk > - * file size - we may be called from xfs_inactive_free_eofblocks(). > - * If we update the on-disk file size and then the system crashes > - * before the contents of the file are flushed to disk then the files > - * may be full of holes (ie NULL files bug). > - */ > - xfs_isize_check(ip, new_size); > - if (ip->i_size != new_size) { > - ip->i_d.di_size = new_size; > - ip->i_size = new_size; > - } > - > - ASSERT(new_size != 0 || ip->i_delayed_blks == 0); You didn't pull this assert along with > - ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); this one into xfs_qm_scall_trunc_qfile. Was that intentional? > - > - /* > - * Always re-log the inode so that our permanent transaction can keep > - * on rolling it forward in the log. > - */ > - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); > - > - trace_xfs_itruncate_data_end(ip, new_size); > - return 0; > -} > - > /* > * This is called when the inode's link count goes to 0. > * We place the on-disk inode on a list in the AGI. It > Index: xfs/fs/xfs/xfs_inode.h > =================================================================== > --- xfs.orig/fs/xfs/xfs_inode.h 2011-11-30 12:58:08.843372251 +0100 > +++ xfs/fs/xfs/xfs_inode.h 2011-11-30 12:58:51.523141034 +0100 > @@ -491,8 +491,6 @@ int xfs_ifree(struct xfs_trans *, xfs_i > struct xfs_bmap_free *); > int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, > int, xfs_fsize_t); > -int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *, > - xfs_fsize_t); > int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); > > void xfs_iext_realloc(xfs_inode_t *, int, int); > Index: xfs/fs/xfs/xfs_iops.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_iops.c 2011-11-30 12:58:07.856710929 +0100 > +++ xfs/fs/xfs/xfs_iops.c 2011-11-30 12:58:51.523141034 +0100 > @@ -750,6 +750,7 @@ xfs_setattr_size( > struct xfs_mount *mp = ip->i_mount; > struct inode *inode = VFS_I(ip); > int mask = iattr->ia_valid; > + xfs_off_t oldsize, newsize; > struct xfs_trans *tp; > int error; > uint lock_flags; > @@ -777,11 +778,13 @@ xfs_setattr_size( > lock_flags |= XFS_IOLOCK_EXCL; > xfs_ilock(ip, lock_flags); > > + oldsize = ip->i_size; > + newsize = iattr->ia_size; > + > /* > * Short circuit the truncate case for zero length files. > */ > - if (iattr->ia_size == 0 && > - ip->i_size == 0 && ip->i_d.di_nextents == 0) { > + if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { > if (!(mask & (ATTR_CTIME|ATTR_MTIME))) > goto out_unlock; > > @@ -807,14 +810,14 @@ xfs_setattr_size( > * the inode to the transaction, because the inode cannot be unlocked > * once it is a part of the transaction. > */ > - if (iattr->ia_size > ip->i_size) { > + if (newsize > oldsize) { > /* > * Do the first part of growing a file: zero any data in the > * last block that is beyond the old EOF. We need to do this > * before the inode is joined to the transaction to modify > * i_size. > */ > - error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); > + error = xfs_zero_eof(ip, newsize, oldsize); > if (error) > goto out_unlock; > } > @@ -833,8 +836,8 @@ xfs_setattr_size( > * here and prevents waiting for other data not within the range we > * care about here. > */ > - if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { > - error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0, > + if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { > + error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0, > FI_NONE); > if (error) > goto out_unlock; > @@ -845,8 +848,7 @@ xfs_setattr_size( > */ > inode_dio_wait(inode); > > - error = -block_truncate_page(inode->i_mapping, iattr->ia_size, > - xfs_get_blocks); > + error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); > if (error) > goto out_unlock; > > @@ -857,7 +859,7 @@ xfs_setattr_size( > if (error) > goto out_trans_cancel; > > - truncate_setsize(inode, iattr->ia_size); > + truncate_setsize(inode, newsize); > > commit_flags = XFS_TRANS_RELEASE_LOG_RES; > lock_flags |= XFS_ILOCK_EXCL; > @@ -876,19 +878,30 @@ xfs_setattr_size( > * these flags set. For all other operations the VFS set these flags > * explicitly if it wants a timestamp update. > */ > - if (iattr->ia_size != ip->i_size && > - (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { > + if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { > iattr->ia_ctime = iattr->ia_mtime = > current_fs_time(inode->i_sb); > mask |= ATTR_CTIME | ATTR_MTIME; > } > > - if (iattr->ia_size > ip->i_size) { > - ip->i_d.di_size = iattr->ia_size; > - ip->i_size = iattr->ia_size; > - } else if (iattr->ia_size <= ip->i_size || > - (iattr->ia_size == 0 && ip->i_d.di_nextents)) { > - error = xfs_itruncate_data(&tp, ip, iattr->ia_size); > + /* > + * The first thing we do is set the size to new_size permanently on > + * disk. This way we don't have to worry about anyone ever being able > + * to look at the data being freed even in the face of a crash. > + * What we're getting around here is the case where we free a block, it > + * is allocated to another file, it is written to, and then we crash. > + * If the new data gets written to the file but the log buffers > + * containing the free and reallocation don't, then we'd end up with > + * garbage in the blocks being freed. As long as we make the new size > + * permanent before actually freeing any blocks it doesn't matter if > + * they get written to. > + */ > + ip->i_d.di_size = newsize; > + ip->i_size = newsize; > + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); > + > + if (newsize <= oldsize) { > + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); > if (error) > goto out_trans_abort; This newsize/oldsize section is really a nice cleanup. > > Index: xfs/fs/xfs/xfs_qm_syscalls.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_qm_syscalls.c 2011-11-30 12:58:07.866710876 +0100 > +++ xfs/fs/xfs/xfs_qm_syscalls.c 2011-11-30 12:58:51.523141034 +0100 > @@ -31,6 +31,7 @@ > #include "xfs_mount.h" > #include "xfs_bmap_btree.h" > #include "xfs_inode.h" > +#include "xfs_inode_item.h" > #include "xfs_itable.h" > #include "xfs_bmap.h" > #include "xfs_rtalloc.h" > @@ -263,13 +264,19 @@ xfs_qm_scall_trunc_qfile( > xfs_ilock(ip, XFS_ILOCK_EXCL); > xfs_trans_ijoin(tp, ip, 0); > > - error = xfs_itruncate_data(&tp, ip, 0); > + ip->i_d.di_size = 0; > + ip->i_size = 0; > + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); > + > + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); It it worth a comment that you log the size here because of the possibility of another file being allocated these extents in the face of a crash? > if (error) { > xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | > XFS_TRANS_ABORT); > goto out_unlock; > } > > + ASSERT(ip->i_d.di_nextents == 0); > + > xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); > error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); > > Index: xfs/fs/xfs/xfs_trace.h > =================================================================== > --- xfs.orig/fs/xfs/xfs_trace.h 2011-11-30 12:58:07.876710822 +0100 > +++ xfs/fs/xfs/xfs_trace.h 2011-11-30 12:58:51.523141034 +0100 > @@ -1090,8 +1090,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, > DEFINE_EVENT(xfs_itrunc_class, name, \ > TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ > TP_ARGS(ip, new_size)) > -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); > -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); > +DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start); > +DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end); > > TRACE_EVENT(xfs_pagecache_inval, > TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), > Index: xfs/fs/xfs/xfs_vnodeops.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_vnodeops.c 2011-11-30 12:58:07.893377397 +0100 > +++ xfs/fs/xfs/xfs_vnodeops.c 2011-11-30 12:58:51.526474350 +0100 > @@ -226,7 +226,14 @@ xfs_free_eofblocks( > xfs_ilock(ip, XFS_ILOCK_EXCL); > xfs_trans_ijoin(tp, ip, 0); > > - error = xfs_itruncate_data(&tp, ip, ip->i_size); > + /* > + * Do not update the on-disk file size. If we update the > + * on-disk file size and then the system crashes before the > + * contents of the file are flushed to disk then the files > + * may be full of holes (ie NULL files bug). > + */ > + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, > + ip->i_size); > if (error) { > /* > * If we get an error at this point we simply don't > @@ -670,13 +677,19 @@ xfs_inactive( > xfs_ilock(ip, XFS_ILOCK_EXCL); > xfs_trans_ijoin(tp, ip, 0); > > - error = xfs_itruncate_data(&tp, ip, 0); > + ip->i_d.di_size = 0; > + ip->i_size = 0; > + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); Maybe a comment here too? > + > + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); > if (error) { > xfs_trans_cancel(tp, > XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); > xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); > return VN_INACTIVE_CACHE; > } > + > + ASSERT(ip->i_d.di_nextents == 0); > } else if (S_ISLNK(ip->i_d.di_mode)) { > > /* > > _______________________________________________ > xfs mailing list > xfs@xxxxxxxxxxx > http://oss.sgi.com/mailman/listinfo/xfs _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs