Re: [RFC PATCH 1/6] xfs: introduce mmap/truncate lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 08, 2015 at 09:25:38AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Right now we cannot serialise mmap against truncate or hole punch
> sanely. ->page_mkwrite is not able to take locks that the read IO
> path normally takes (i.e. the inode iolock) because that could
> result in lock inversions (read - iolock - page fault - page_mkwrite
> - iolock) and so we cannot use an IO path lock to serialise page
> write faults against truncate operations.
> 
> Instead, introduce a new lock that is used *only* in the
> ->page_mkwrite path that is the equivalent of the iolock. The lock
> ordering in a page fault is i_mmaplock -> page lock -> i_ilock,
> and so in truncate we can i_iolock -> i_mmaplock and so lock out
> new write faults during the process of truncation.
> 
> Because i_mmap_lock is outside the page lock, we can hold it across
> all the same operations we hold the i_iolock for. The only
> difference is that we never hold the i_mmaplock in the normal IO
> path and so do not ever have the possibility that we can page fault
> inside it. Hence there are no recursion issues on the i_mmap_lock
> and so we can use it to serialise page fault IO against inode
> modification operations that affect the IO path.
> 
> This patch introduces the i_mmaplock infrastructure, lockdep
> annotations and initialisation/destruction code. Use of the new lock
> will be in subsequent patches.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_inode.c | 86 ++++++++++++++++++++++++++++++++++++++++++++----------
>  fs/xfs/xfs_inode.h | 29 +++++++++++++-----
>  fs/xfs/xfs_super.c |  2 ++
>  3 files changed, 95 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 400791a..573b49c 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -150,6 +150,8 @@ xfs_ilock(
>  	 */
>  	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
>  	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
> +	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
> +	       (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));

The comment that precedes xfs_ilock() explains the locks that exist
within the inode, locking order, etc. We should probably update it to
explain how i_mmap_lock fits in as well (e.g., text from the commit log
description would suffice, imo).

>  	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
>  	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
>  	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
> @@ -159,6 +161,11 @@ xfs_ilock(
>  	else if (lock_flags & XFS_IOLOCK_SHARED)
>  		mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
>  
> +	if (lock_flags & XFS_MMAPLOCK_EXCL)
> +		mrupdate_nested(&ip->i_mmaplock, XFS_IOLOCK_DEP(lock_flags));
> +	else if (lock_flags & XFS_MMAPLOCK_SHARED)
> +		mraccess_nested(&ip->i_mmaplock, XFS_IOLOCK_DEP(lock_flags));
> +

XFS_MMAPLOCK_DEP()?

>  	if (lock_flags & XFS_ILOCK_EXCL)
>  		mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
>  	else if (lock_flags & XFS_ILOCK_SHARED)
> @@ -191,6 +198,8 @@ xfs_ilock_nowait(
>  	 */
>  	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
>  	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
> +	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
> +	       (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
>  	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
>  	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
>  	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
> @@ -202,21 +211,35 @@ xfs_ilock_nowait(
>  		if (!mrtryaccess(&ip->i_iolock))
>  			goto out;
>  	}
> +
> +	if (lock_flags & XFS_MMAPLOCK_EXCL) {
> +		if (!mrtryupdate(&ip->i_mmaplock))
> +			goto out_undo_iolock;
> +	} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
> +		if (!mrtryaccess(&ip->i_mmaplock))
> +			goto out_undo_iolock;
> +	}
> +
>  	if (lock_flags & XFS_ILOCK_EXCL) {
>  		if (!mrtryupdate(&ip->i_lock))
> -			goto out_undo_iolock;
> +			goto out_undo_mmaplock;
>  	} else if (lock_flags & XFS_ILOCK_SHARED) {
>  		if (!mrtryaccess(&ip->i_lock))
> -			goto out_undo_iolock;
> +			goto out_undo_mmaplock;
>  	}
>  	return 1;
>  
> - out_undo_iolock:
> +out_undo_mmaplock:
> +	if (lock_flags & XFS_MMAPLOCK_EXCL)
> +		mrunlock_excl(&ip->i_mmaplock);
> +	else if (lock_flags & XFS_MMAPLOCK_SHARED)
> +		mrunlock_shared(&ip->i_mmaplock);
> +out_undo_iolock:
>  	if (lock_flags & XFS_IOLOCK_EXCL)
>  		mrunlock_excl(&ip->i_iolock);
>  	else if (lock_flags & XFS_IOLOCK_SHARED)
>  		mrunlock_shared(&ip->i_iolock);
> - out:
> +out:
>  	return 0;
>  }
>  
> @@ -244,6 +267,8 @@ xfs_iunlock(
>  	 */
>  	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
>  	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
> +	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
> +	       (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
>  	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
>  	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
>  	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
> @@ -254,6 +279,11 @@ xfs_iunlock(
>  	else if (lock_flags & XFS_IOLOCK_SHARED)
>  		mrunlock_shared(&ip->i_iolock);
>  
> +	if (lock_flags & XFS_MMAPLOCK_EXCL)
> +		mrunlock_excl(&ip->i_mmaplock);
> +	else if (lock_flags & XFS_MMAPLOCK_SHARED)
> +		mrunlock_shared(&ip->i_mmaplock);
> +
>  	if (lock_flags & XFS_ILOCK_EXCL)
>  		mrunlock_excl(&ip->i_lock);
>  	else if (lock_flags & XFS_ILOCK_SHARED)
> @@ -271,11 +301,14 @@ xfs_ilock_demote(
>  	xfs_inode_t		*ip,
>  	uint			lock_flags)
>  {
> -	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
> -	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
> +	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
> +	ASSERT((lock_flags &
> +		~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
>  
>  	if (lock_flags & XFS_ILOCK_EXCL)
>  		mrdemote(&ip->i_lock);
> +	if (lock_flags & XFS_MMAPLOCK_EXCL)
> +		mrdemote(&ip->i_mmaplock);
>  	if (lock_flags & XFS_IOLOCK_EXCL)
>  		mrdemote(&ip->i_iolock);
>  
> @@ -294,6 +327,12 @@ xfs_isilocked(
>  		return rwsem_is_locked(&ip->i_lock.mr_lock);
>  	}
>  
> +	if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
> +		if (!(lock_flags & XFS_MMAPLOCK_SHARED))
> +			return !!ip->i_mmaplock.mr_writer;
> +		return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
> +	}
> +
>  	if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
>  		if (!(lock_flags & XFS_IOLOCK_SHARED))
>  			return !!ip->i_iolock.mr_writer;
> @@ -314,14 +353,27 @@ int xfs_lock_delays;
>  #endif
>  
>  /*
> - * Bump the subclass so xfs_lock_inodes() acquires each lock with
> - * a different value
> + * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
> + * value. This shouldn't be called for page fault locking, but we also need to
> + * ensure we don't overrun the number of lockdep subclasses for the iolock or
> + * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
>   */
>  static inline int
>  xfs_lock_inumorder(int lock_mode, int subclass)
>  {
> -	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
> +	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
> +		ASSERT(subclass + XFS_LOCK_INUMORDER <
> +			(1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
>  		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
> +	}
> +
> +	if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
> +		ASSERT(subclass + XFS_LOCK_INUMORDER <
> +			(1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
> +		lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
> +							XFS_MMAPLOCK_SHIFT;
> +	}
> +
>  	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
>  		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
>  
> @@ -440,10 +492,10 @@ again:
>  }
>  
>  /*
> - * xfs_lock_two_inodes() can only be used to lock one type of lock
> - * at a time - the iolock or the ilock, but not both at once. If
> - * we lock both at once, lockdep will report false positives saying
> - * we have violated locking orders.
> + * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
> + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
> + * lock more than one at a time, lockdep will report false positives saying we
> + * have violated locking orders.
>   */
>  void
>  xfs_lock_two_inodes(
> @@ -455,8 +507,12 @@ xfs_lock_two_inodes(
>  	int			attempts = 0;
>  	xfs_log_item_t		*lp;
>  
> -	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
> -		ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
> +	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
> +		ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
> +		ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
> +	} else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
> +		ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
> +

Should this last branch not also check for iolock flags? If not, how is
that consistent with the function comment above?

Brian

>  	ASSERT(ip0->i_ino != ip1->i_ino);
>  
>  	if (ip0->i_ino > ip1->i_ino) {
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index de97ccc..8e7a12a 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -56,6 +56,7 @@ typedef struct xfs_inode {
>  	struct xfs_inode_log_item *i_itemp;	/* logging information */
>  	mrlock_t		i_lock;		/* inode lock */
>  	mrlock_t		i_iolock;	/* inode IO lock */
> +	mrlock_t		i_mmaplock;	/* inode mmap IO lock */
>  	atomic_t		i_pincount;	/* inode pin count */
>  	spinlock_t		i_flags_lock;	/* inode i_flags lock */
>  	/* Miscellaneous state. */
> @@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
>  #define	XFS_IOLOCK_SHARED	(1<<1)
>  #define	XFS_ILOCK_EXCL		(1<<2)
>  #define	XFS_ILOCK_SHARED	(1<<3)
> +#define	XFS_MMAPLOCK_EXCL	(1<<4)
> +#define	XFS_MMAPLOCK_SHARED	(1<<5)
>  
>  #define XFS_LOCK_MASK		(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
> -				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
> +				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
> +				| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
>  
>  #define XFS_LOCK_FLAGS \
>  	{ XFS_IOLOCK_EXCL,	"IOLOCK_EXCL" }, \
>  	{ XFS_IOLOCK_SHARED,	"IOLOCK_SHARED" }, \
>  	{ XFS_ILOCK_EXCL,	"ILOCK_EXCL" }, \
> -	{ XFS_ILOCK_SHARED,	"ILOCK_SHARED" }
> +	{ XFS_ILOCK_SHARED,	"ILOCK_SHARED" }, \
> +	{ XFS_MMAPLOCK_EXCL,	"MMAPLOCK_EXCL" }, \
> +	{ XFS_MMAPLOCK_SHARED,	"MMAPLOCK_SHARED" }
>  
>  
>  /*
> @@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
>  #define XFS_IOLOCK_SHIFT	16
>  #define	XFS_IOLOCK_PARENT	(XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
>  
> +#define XFS_MMAPLOCK_SHIFT	20
> +
>  #define XFS_ILOCK_SHIFT		24
>  #define	XFS_ILOCK_PARENT	(XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
>  #define	XFS_ILOCK_RTBITMAP	(XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
>  #define	XFS_ILOCK_RTSUM		(XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
>  
> -#define XFS_IOLOCK_DEP_MASK	0x00ff0000
> +#define XFS_IOLOCK_DEP_MASK	0x000f0000
> +#define XFS_MMAPLOCK_DEP_MASK	0x00f00000
>  #define XFS_ILOCK_DEP_MASK	0xff000000
> -#define XFS_LOCK_DEP_MASK	(XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
> -
> -#define XFS_IOLOCK_DEP(flags)	(((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
> -#define XFS_ILOCK_DEP(flags)	(((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
> +#define XFS_LOCK_DEP_MASK	(XFS_IOLOCK_DEP_MASK | \
> +				 XFS_MMAPLOCK_DEP_MASK | \
> +				 XFS_ILOCK_DEP_MASK)
> +
> +#define XFS_IOLOCK_DEP(flags)	(((flags) & XFS_IOLOCK_DEP_MASK) \
> +					>> XFS_IOLOCK_SHIFT)
> +#define XFS_MMAPLOCK_DEP(flags)	(((flags) & XFS_MMAPLOCK_DEP_MASK) \
> +					>> XFS_MMAPLOCK_SHIFT)
> +#define XFS_ILOCK_DEP(flags)	(((flags) & XFS_ILOCK_DEP_MASK) \
> +					>> XFS_ILOCK_SHIFT)
>  
>  /*
>   * For multiple groups support: if S_ISGID bit is set in the parent
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index afd6bae..40d2ac5 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -986,6 +986,8 @@ xfs_fs_inode_init_once(
>  	atomic_set(&ip->i_pincount, 0);
>  	spin_lock_init(&ip->i_flags_lock);
>  
> +	mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
> +		     "xfsino", ip->i_ino);
>  	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
>  		     "xfsino", ip->i_ino);
>  }
> -- 
> 2.0.0
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux