Re: [PATCH 4/8] xfs: introduce xfs_rw_lock() helpers for locking the inode

Christoph Hellwig <hch@xxxxxxxxxxxxx> · Tue, 11 Jan 2011 12:36:27 -0500

On Tue, Jan 11, 2011 at 10:37:44AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> We need to obtain the i_mutex, i_iolock and i_ilock during the read
> and write paths. Add a set of wrapper functions to neatly
> encapsulate the lock ordering and shared/exclusive semantics to make
> the locking easier to follow and get right.
> 
> Note that this changes some of the exclusive locking serialisation in
> that serialisation will occur against the i_mutex instead of the
> XFS_IOLOCK_EXCL. This does not change any behaviour, and it is
> arguably more efficient to use the mutex for such serialisation than
> the rw_sem.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/linux-2.6/xfs_file.c |  136 ++++++++++++++++++++++++++-----------------
>  1 files changed, 82 insertions(+), 54 deletions(-)
> 
> diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
> index c47d7dc0..941d7c2 100644
> --- a/fs/xfs/linux-2.6/xfs_file.c
> +++ b/fs/xfs/linux-2.6/xfs_file.c
> @@ -41,6 +41,40 @@
>  static const struct vm_operations_struct xfs_file_vm_ops;
>  
>  /*
> + * Locking primitives for read and write IO paths to ensure we consistently use
> + * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
> + */
> +static inline void
> +xfs_rw_ilock(
> +	struct xfs_inode	*ip,
> +	int			type)
> +{
> +	if (type & XFS_IOLOCK_EXCL)
> +		mutex_lock(&VFS_I(ip)->i_mutex);
> +	xfs_ilock(ip, type);
> +}
> +
> +static inline void
> +xfs_rw_iunlock(
> +	struct xfs_inode	*ip,
> +	int			type)
> +{
> +	xfs_iunlock(ip, type);
> +	if (type & XFS_IOLOCK_EXCL)
> +		mutex_unlock(&VFS_I(ip)->i_mutex);
> +}
> +
> +static inline void
> +xfs_rw_ilock_demote(
> +	struct xfs_inode	*ip,
> +	int			type)
> +{
> +	xfs_ilock_demote(ip, type);
> +	if (type & XFS_IOLOCK_EXCL)
> +		mutex_unlock(&VFS_I(ip)->i_mutex);
> +}
> +
> +/*
>   *	xfs_iozero
>   *
>   *	xfs_iozero clears the specified range of buffer supplied,
> @@ -262,22 +296,21 @@ xfs_file_aio_read(
>  	if (XFS_FORCED_SHUTDOWN(mp))
>  		return -EIO;
>  
> -	if (unlikely(ioflags & IO_ISDIRECT))
> -		mutex_lock(&inode->i_mutex);
> -	xfs_ilock(ip, XFS_IOLOCK_SHARED);
> -
>  	if (unlikely(ioflags & IO_ISDIRECT)) {
> +		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
> +
>  		if (inode->i_mapping->nrpages) {
>  			ret = -xfs_flushinval_pages(ip,
>  					(iocb->ki_pos & PAGE_CACHE_MASK),
>  					-1, FI_REMAPF_LOCKED);
> +			if (ret) {
> +				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> +				return ret;
> +			}
>  		}
> -		mutex_unlock(&inode->i_mutex);
> -		if (ret) {
> -			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> -			return ret;
> -		}
> -	}
> +		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> +	} else
> +		xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
>  
>  	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
>  
> @@ -285,7 +318,7 @@ xfs_file_aio_read(
>  	if (ret > 0)
>  		XFS_STATS_ADD(xs_read_bytes, ret);
>  
> -	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> +	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
>  	return ret;
>  }
>  
> @@ -309,7 +342,7 @@ xfs_file_splice_read(
>  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
>  		return -EIO;
>  
> -	xfs_ilock(ip, XFS_IOLOCK_SHARED);
> +	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
>  
>  	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
>  
> @@ -317,7 +350,7 @@ xfs_file_splice_read(
>  	if (ret > 0)
>  		XFS_STATS_ADD(xs_read_bytes, ret);
>  
> -	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> +	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
>  	return ret;
>  }
>  
> @@ -338,10 +371,10 @@ xfs_aio_write_isize_update(
>  		*ppos = isize;
>  
>  	if (*ppos > ip->i_size) {
> -		xfs_ilock(ip, XFS_ILOCK_EXCL);
> +		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
>  		if (*ppos > ip->i_size)
>  			ip->i_size = *ppos;
> -		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
>  	}
>  }
>  
> @@ -356,14 +389,22 @@ xfs_aio_write_newsize_update(
>  	struct xfs_inode	*ip)
>  {
>  	if (ip->i_new_size) {
> -		xfs_ilock(ip, XFS_ILOCK_EXCL);
> +		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
>  		ip->i_new_size = 0;
>  		if (ip->i_d.di_size > ip->i_size)
>  			ip->i_d.di_size = ip->i_size;
> -		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
>  	}
>  }
>  
> +/*
> + * xfs_file_splice_write() does not use xfs_rw_ilock() because
> + * generic_file_splice_write() takes the i_mutex itself. This, in theory,
> + * couuld cause lock inversions between the aio_write path and the splice path
> + * if someone is doing concurrent splice(2) based writes and write(2) based
> + * writes to the same inode. The only real way to fix this is to re-implement
> + * the generic code here with correct locking orders.
> + */
>  STATIC ssize_t
>  xfs_file_splice_write(
>  	struct pipe_inode_info	*pipe,
> @@ -386,14 +427,13 @@ xfs_file_splice_write(
>  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
>  		return -EIO;
>  
> -	xfs_ilock(ip, XFS_IOLOCK_EXCL);
> +	xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL);
>  
>  	new_size = *ppos + count;
>  
> -	xfs_ilock(ip, XFS_ILOCK_EXCL);
>  	if (new_size > ip->i_size)
>  		ip->i_new_size = new_size;
> -	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);

While using the xfs_rw_iunlock here actually is correct I think it's
highly confusing, given that we didn't use it to take the ilock.  What
about just leaving all the code in xfs_file_splice_write as-is and not
touching it at all?

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs