Re: [PATCH 4/4] xfs: introduce inode unlink log item

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jun 23, 2020 at 07:50:15PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Tracking dirty inodes via cluster buffers creates lock ordering
> issues with logging unlinked inode updates direct to the cluster
> buffer. The unlinked inode list is unordered, so we can lock cluster
> buffers in random orders and that causes deadlocks.
> 
> To solve this problem, we really want to dealy locking the cluster
> buffers until the pre-commit phase where we can order the buffers
> correctly along with all the other inode cluster buffers that are
> locked by the transaction. However, to do this we need to be able to
> tell the transaction which inodes need to have there unlinked list
> updated and what it should be updated to.
> 
> We can delay the buffer update to the pre-commit phase based on the
> fact taht all unlinked inode list updates are serialised by the AGI
> buffer. It will be locked into the transaction before the list
> update starts, and will remain locked until the transaction commits.
> Hence we can lock and update the cluster buffers safely any time
> during the transaction and we are still safe from other racing
> unlinked list updates.
> 
> The iunlink log item currently only exists in memory. we need a log
> item to attach information to the transaction, but it's context
> is completely owned by the transaction. Hence it is never formatted
> or inserted into the CIL, nor is it seen by the journal, the AIL or
> log recovery.
> 
> This makes it a very simple log item, and the changes makes results
> in adding addition buffer log items to the transaction. Hence once
> the iunlink log item has run it's pre-commit operation, it can be
> dropped by the transaction and released.
> 
> The creation of this in-memory intent does not prevent us from
> extending it in future to the journal to replace buffer based
> logging of the unlinked list. Changing the format of the items we
> write to the on disk journal is beyond the scope of this patchset,
> hence we limit it to being in-memory only.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/Makefile           |   1 +
>  fs/xfs/xfs_inode.c        |  70 +++----------------
>  fs/xfs/xfs_inode_item.c   |   3 +-
>  fs/xfs/xfs_iunlink_item.c | 141 ++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_iunlink_item.h |  24 +++++++
>  fs/xfs/xfs_super.c        |  10 +++
>  6 files changed, 189 insertions(+), 60 deletions(-)
>  create mode 100644 fs/xfs/xfs_iunlink_item.c
>  create mode 100644 fs/xfs/xfs_iunlink_item.h
> 
...
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index 0494b907c63d..bc1970c37edc 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -488,8 +488,9 @@ xfs_inode_item_push(
>  	ASSERT(iip->ili_item.li_buf);
>  
>  	if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp) ||
> -	    (ip->i_flags & XFS_ISTALE))
> +	    (ip->i_flags & XFS_ISTALE)) {
>  		return XFS_ITEM_PINNED;
> +	}

Spurious change..?

>  
>  	/* If the inode is already flush locked, we're already flushing. */
>  	if (xfs_iflags_test(ip, XFS_IFLUSHING))
> diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c
> new file mode 100644
> index 000000000000..83f1dc81133b
> --- /dev/null
> +++ b/fs/xfs/xfs_iunlink_item.c
> @@ -0,0 +1,141 @@
...
> +
> +static const struct xfs_item_ops xfs_iunlink_item_ops = {
> +	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED,
> +	.iop_release	= xfs_iunlink_item_release,

Presumably we need the release callback for transaction abort, but the
flag looks unnecessary. That triggers a release on commit to the on-disk
log, which IIUC should never happen for this item.

> +	.iop_sort	= xfs_iunlink_item_sort,
> +	.iop_precommit	= xfs_iunlink_item_precommit,
> +};
> +
> +
> +/*
> + * Initialize the inode log item for a newly allocated (in-core) inode.
> + *
> + * Inode extents can only reside within an AG. Hence specify the starting
> + * block for the inode chunk by offset within an AG as well as the
> + * length of the allocated extent.
> + *
> + * This joins the item to the transaction and marks it dirty so
> + * that we don't need a separate call to do this, nor does the
> + * caller need to know anything about the iunlink item.
> + */

Looks like some copy/paste remnants in the comment.

Brian

> +void
> +xfs_iunlink_log(
> +	struct xfs_trans	*tp,
> +	struct xfs_inode	*ip)
> +{
> +	struct xfs_iunlink_item	*iup;
> +
> +	iup = kmem_zone_zalloc(xfs_iunlink_zone, 0);
> +
> +	xfs_log_item_init(tp->t_mountp, &iup->iu_item, XFS_LI_IUNLINK,
> +			  &xfs_iunlink_item_ops);
> +
> +	iup->iu_ino = ip->i_ino;
> +	iup->iu_next_unlinked = ip->i_next_unlinked;
> +	iup->iu_imap = ip->i_imap;
> +
> +	xfs_trans_add_item(tp, &iup->iu_item);
> +	tp->t_flags |= XFS_TRANS_DIRTY;
> +	set_bit(XFS_LI_DIRTY, &iup->iu_item.li_flags);
> +}
> diff --git a/fs/xfs/xfs_iunlink_item.h b/fs/xfs/xfs_iunlink_item.h
> new file mode 100644
> index 000000000000..c9e58acf4ccf
> --- /dev/null
> +++ b/fs/xfs/xfs_iunlink_item.h
> @@ -0,0 +1,24 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2020, Red Hat, Inc.
> + * All Rights Reserved.
> + */
> +#ifndef XFS_IUNLINK_ITEM_H
> +#define XFS_IUNLINK_ITEM_H	1
> +
> +struct xfs_trans;
> +struct xfs_inode;
> +
> +/* in memory log item structure */
> +struct xfs_iunlink_item {
> +	struct xfs_log_item	iu_item;
> +	struct xfs_imap		iu_imap;
> +	xfs_ino_t		iu_ino;
> +	xfs_agino_t		iu_next_unlinked;
> +};
> +
> +extern kmem_zone_t *xfs_iunlink_zone;
> +
> +void xfs_iunlink_log(struct xfs_trans *tp, struct xfs_inode *ip);
> +
> +#endif	/* XFS_IUNLINK_ITEM_H */
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 5a5d9453cf51..a36dfb0e7e5b 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -35,6 +35,7 @@
>  #include "xfs_refcount_item.h"
>  #include "xfs_bmap_item.h"
>  #include "xfs_reflink.h"
> +#include "xfs_iunlink_item.h"
>  
>  #include <linux/magic.h>
>  #include <linux/fs_context.h>
> @@ -1955,8 +1956,16 @@ xfs_init_zones(void)
>  	if (!xfs_bui_zone)
>  		goto out_destroy_bud_zone;
>  
> +	xfs_iunlink_zone = kmem_cache_create("xfs_iul_item",
> +					     sizeof(struct xfs_iunlink_item),
> +					     0, 0, NULL);
> +	if (!xfs_iunlink_zone)
> +		goto out_destroy_bui_zone;
> +
>  	return 0;
>  
> + out_destroy_bui_zone:
> +	kmem_cache_destroy(xfs_bui_zone);
>   out_destroy_bud_zone:
>  	kmem_cache_destroy(xfs_bud_zone);
>   out_destroy_cui_zone:
> @@ -2003,6 +2012,7 @@ xfs_destroy_zones(void)
>  	 * destroy caches.
>  	 */
>  	rcu_barrier();
> +	kmem_cache_destroy(xfs_iunlink_zone);
>  	kmem_cache_destroy(xfs_bui_zone);
>  	kmem_cache_destroy(xfs_bud_zone);
>  	kmem_cache_destroy(xfs_cui_zone);
> -- 
> 2.26.2.761.g0e0b3e54be
> 




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux