On Tue, Jan 11, 2011 at 10:37:44AM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > > We need to obtain the i_mutex, i_iolock and i_ilock during the read > and write paths. Add a set of wrapper functions to neatly > encapsulate the lock ordering and shared/exclusive semantics to make > the locking easier to follow and get right. > > Note that this changes some of the exclusive locking serialisation in > that serialisation will occur against the i_mutex instead of the > XFS_IOLOCK_EXCL. This does not change any behaviour, and it is > arguably more efficient to use the mutex for such serialisation than > the rw_sem. > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > --- > fs/xfs/linux-2.6/xfs_file.c | 136 ++++++++++++++++++++++++++----------------- > 1 files changed, 82 insertions(+), 54 deletions(-) > > diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c > index c47d7dc0..941d7c2 100644 > --- a/fs/xfs/linux-2.6/xfs_file.c > +++ b/fs/xfs/linux-2.6/xfs_file.c > @@ -41,6 +41,40 @@ > static const struct vm_operations_struct xfs_file_vm_ops; > > /* > + * Locking primitives for read and write IO paths to ensure we consistently use > + * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. > + */ > +static inline void > +xfs_rw_ilock( > + struct xfs_inode *ip, > + int type) > +{ > + if (type & XFS_IOLOCK_EXCL) > + mutex_lock(&VFS_I(ip)->i_mutex); > + xfs_ilock(ip, type); > +} > + > +static inline void > +xfs_rw_iunlock( > + struct xfs_inode *ip, > + int type) > +{ > + xfs_iunlock(ip, type); > + if (type & XFS_IOLOCK_EXCL) > + mutex_unlock(&VFS_I(ip)->i_mutex); > +} > + > +static inline void > +xfs_rw_ilock_demote( > + struct xfs_inode *ip, > + int type) > +{ > + xfs_ilock_demote(ip, type); > + if (type & XFS_IOLOCK_EXCL) > + mutex_unlock(&VFS_I(ip)->i_mutex); > +} > + > +/* > * xfs_iozero > * > * xfs_iozero clears the specified range of buffer supplied, > @@ -262,22 +296,21 @@ xfs_file_aio_read( > if (XFS_FORCED_SHUTDOWN(mp)) > return -EIO; > > - if (unlikely(ioflags & IO_ISDIRECT)) > - mutex_lock(&inode->i_mutex); > - xfs_ilock(ip, XFS_IOLOCK_SHARED); > - > if (unlikely(ioflags & IO_ISDIRECT)) { > + xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); > + > if (inode->i_mapping->nrpages) { > ret = -xfs_flushinval_pages(ip, > (iocb->ki_pos & PAGE_CACHE_MASK), > -1, FI_REMAPF_LOCKED); > + if (ret) { > + xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); > + return ret; > + } > } > - mutex_unlock(&inode->i_mutex); > - if (ret) { > - xfs_iunlock(ip, XFS_IOLOCK_SHARED); > - return ret; > - } > - } > + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); > + } else > + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); > > trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); > > @@ -285,7 +318,7 @@ xfs_file_aio_read( > if (ret > 0) > XFS_STATS_ADD(xs_read_bytes, ret); > > - xfs_iunlock(ip, XFS_IOLOCK_SHARED); > + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); > return ret; > } > > @@ -309,7 +342,7 @@ xfs_file_splice_read( > if (XFS_FORCED_SHUTDOWN(ip->i_mount)) > return -EIO; > > - xfs_ilock(ip, XFS_IOLOCK_SHARED); > + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); > > trace_xfs_file_splice_read(ip, count, *ppos, ioflags); > > @@ -317,7 +350,7 @@ xfs_file_splice_read( > if (ret > 0) > XFS_STATS_ADD(xs_read_bytes, ret); > > - xfs_iunlock(ip, XFS_IOLOCK_SHARED); > + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); > return ret; > } > > @@ -338,10 +371,10 @@ xfs_aio_write_isize_update( > *ppos = isize; > > if (*ppos > ip->i_size) { > - xfs_ilock(ip, XFS_ILOCK_EXCL); > + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); > if (*ppos > ip->i_size) > ip->i_size = *ppos; > - xfs_iunlock(ip, XFS_ILOCK_EXCL); > + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); > } > } > > @@ -356,14 +389,22 @@ xfs_aio_write_newsize_update( > struct xfs_inode *ip) > { > if (ip->i_new_size) { > - xfs_ilock(ip, XFS_ILOCK_EXCL); > + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); > ip->i_new_size = 0; > if (ip->i_d.di_size > ip->i_size) > ip->i_d.di_size = ip->i_size; > - xfs_iunlock(ip, XFS_ILOCK_EXCL); > + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); > } > } > > +/* > + * xfs_file_splice_write() does not use xfs_rw_ilock() because > + * generic_file_splice_write() takes the i_mutex itself. This, in theory, > + * couuld cause lock inversions between the aio_write path and the splice path > + * if someone is doing concurrent splice(2) based writes and write(2) based > + * writes to the same inode. The only real way to fix this is to re-implement > + * the generic code here with correct locking orders. > + */ > STATIC ssize_t > xfs_file_splice_write( > struct pipe_inode_info *pipe, > @@ -386,14 +427,13 @@ xfs_file_splice_write( > if (XFS_FORCED_SHUTDOWN(ip->i_mount)) > return -EIO; > > - xfs_ilock(ip, XFS_IOLOCK_EXCL); > + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL); > > new_size = *ppos + count; > > - xfs_ilock(ip, XFS_ILOCK_EXCL); > if (new_size > ip->i_size) > ip->i_new_size = new_size; > - xfs_iunlock(ip, XFS_ILOCK_EXCL); > + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); While using the xfs_rw_iunlock here actually is correct I think it's highly confusing, given that we didn't use it to take the ilock. What about just leaving all the code in xfs_file_splice_write as-is and not touching it at all? _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs