Re: [PATCH v21 06/13] xfs: Implement attr logging and replay

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 7/9/21 6:08 PM, Darrick J. Wong wrote:
On Wed, Jul 07, 2021 at 03:21:04PM -0700, Allison Henderson wrote:
This patch adds the needed routines to create, log and replay attr
intents

I might call that last part "...and recover logged extended attribute
intents."
Sure, will update the commit message here



Signed-off-by: Allison Henderson <allison.henderson@xxxxxxxxxx>
---
  fs/xfs/libxfs/xfs_defer.c  |   1 +
  fs/xfs/libxfs/xfs_defer.h  |   1 +
  fs/xfs/libxfs/xfs_format.h |   4 +-
  fs/xfs/xfs_attr_item.c     | 394 +++++++++++++++++++++++++++++++++++++++++++++
  4 files changed, 399 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index eff4a12..e9caff7 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -178,6 +178,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
  	[XFS_DEFER_OPS_TYPE_RMAP]	= &xfs_rmap_update_defer_type,
  	[XFS_DEFER_OPS_TYPE_FREE]	= &xfs_extent_free_defer_type,
  	[XFS_DEFER_OPS_TYPE_AGFL_FREE]	= &xfs_agfl_free_defer_type,
+	[XFS_DEFER_OPS_TYPE_ATTR]	= &xfs_attr_defer_type,
  };
static void
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 0ed9dfa..72a5789 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -19,6 +19,7 @@ enum xfs_defer_ops_type {
  	XFS_DEFER_OPS_TYPE_RMAP,
  	XFS_DEFER_OPS_TYPE_FREE,
  	XFS_DEFER_OPS_TYPE_AGFL_FREE,
+	XFS_DEFER_OPS_TYPE_ATTR,
  	XFS_DEFER_OPS_TYPE_MAX,
  };
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 3a4da111..477e815 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -485,7 +485,9 @@ xfs_sb_has_incompat_feature(
  	return (sbp->sb_features_incompat & feature) != 0;
  }
-#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
+#define XFS_SB_FEAT_INCOMPAT_LOG_DELATTR   (1 << 0)	/* Delayed Attributes */
+#define XFS_SB_FEAT_INCOMPAT_LOG_ALL \
+	(XFS_SB_FEAT_INCOMPAT_LOG_DELATTR)
  #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
  static inline bool
  xfs_sb_has_incompat_log_feature(
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index c9033e2..eda6ae3 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -282,6 +282,183 @@ xfs_attrd_item_release(
  	xfs_attrd_item_free(attrdp);
  }
+/*
+ * Performs one step of an attribute update intent and marks the attrd item
+ * dirty..  An attr operation may be a set or a remove.  Note that the
+ * transaction is marked dirty regardless of whether the operation succeeds or
+ * fails to support the ATTRI/ATTRD lifecycle rules.
+ */
+int
+xfs_trans_attr_finish_update(
+	struct xfs_delattr_context	*dac,
+	struct xfs_attrd_log_item	*attrdp,
+	struct xfs_buf			**leaf_bp,
+	uint32_t			op_flags)
+{
+	struct xfs_da_args		*args = dac->da_args;
+	int				error;
+
+	error = xfs_qm_dqattach_locked(args->dp, 0);

Hmm, this function is called at runtime and recovery time.  It's
/really/ late to be attaching dquots.

OH, this is for the recovery case, isn't it?

Hmm, xfs_attri_item_recover really ought to share the same
iget/dqattach/set-irecovery thing that bmap and swapext use.  I should
really post that cleanup as a patch.

Sure, I'll put it on my todo was well, that one I think can happen independantly of this set.


+	if (error)
+		return error;
+
+	switch (op_flags) {

op_flags ought to be masked with XFS_ATTR_OP_FLAGS_TYPE_MASK.
Will fix


+	case XFS_ATTR_OP_FLAGS_SET:
+		args->op_flags |= XFS_DA_OP_ADDNAME;
+		error = xfs_attr_set_iter(dac, leaf_bp);
+		break;
+	case XFS_ATTR_OP_FLAGS_REMOVE:
+		ASSERT(XFS_IFORK_Q(args->dp));
+		error = xfs_attr_remove_iter(dac);
+		break;
+	default:
+		error = -EFSCORRUPTED;
+		break;
+	}
+
+	/*
+	 * Mark the transaction dirty, even on error. This ensures the
+	 * transaction is aborted, which:
+	 *
+	 * 1.) releases the ATTRI and frees the ATTRD
+	 * 2.) shuts down the filesystem
+	 */
+	args->trans->t_flags |= XFS_TRANS_DIRTY;
+
+	/*
+	 * attr intent/done items are null when delayed attributes are disabled
+	 */
+	if (attrdp)
+		set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
+
+	return error;
+}
+
+/* Log an attr to the intent item. */
+STATIC void
+xfs_attr_log_item(
+	struct xfs_trans		*tp,
+	struct xfs_attri_log_item	*attrip,
+	struct xfs_attr_item		*attr)
+{
+	struct xfs_attri_log_format	*attrp;
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
+
+	/*
+	 * At this point the xfs_attr_item has been constructed, and we've
+	 * created the log intent. Fill in the attri log item and log format
+	 * structure with fields from this xfs_attr_item
+	 */
+	attrp = &attrip->attri_format;
+	attrp->alfi_ino = attr->xattri_dac.da_args->dp->i_ino;
+	attrp->alfi_op_flags = attr->xattri_op_flags;
+	attrp->alfi_value_len = attr->xattri_dac.da_args->valuelen;
+	attrp->alfi_name_len = attr->xattri_dac.da_args->namelen;
+	attrp->alfi_attr_flags = attr->xattri_dac.da_args->attr_filter;
+
+	attrip->attri_name = (void *)attr->xattri_dac.da_args->name;
+	attrip->attri_value = attr->xattri_dac.da_args->value;
+	attrip->attri_name_len = attr->xattri_dac.da_args->namelen;
+	attrip->attri_value_len = attr->xattri_dac.da_args->valuelen;
+}
+
+/* Get an ATTRI. */
+static struct xfs_log_item *
+xfs_attr_create_intent(
+	struct xfs_trans		*tp,
+	struct list_head		*items,
+	unsigned int			count,
+	bool				sort)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_attri_log_item	*attrip;
+	struct xfs_attr_item		*attr;
+
+	ASSERT(count == 1);
+
+	if (!xfs_hasdelattr(mp))
+		return NULL;
+
+	xfs_sb_add_incompat_log_features(&mp->m_sb,
+					 XFS_SB_FEAT_INCOMPAT_LOG_DELATTR);

Um, I think I see a bug here--

I don't see anything in this series that calls xfs_add_incompat_log_feature.

Adding a log incompat feature on a filesystem is a tricky dance, because
one has to write the primary superblock (which means logging the sb
change, forcing the log, and pushing the AIL) /before/ logging the intent.

All this is to prevent an old kernel from tripping over a logged item
that it doesn't understand, because an earlier recovered transaction
contained the bit it needed to conclude that it shouldn't have tried to
recover.


/*
  * Get permission to use log-assisted atomic exchange of file extents.
  *
  * Callers must not be running any transactions or hold any inode locks, and
  * they must release the permission by calling xlog_drop_incompat_feat
  * when they're done.
  */
int
xfs_attr_use_log_assist(
	struct xfs_mount	*mp)
{
	int			error = 0;

	/*
	 * Protect ourselves from an idle log clearing the logged xattrs
	 * log incompat feature bit.
	 */
	xlog_use_incompat_feat(mp->m_log);

	/*
	 * If log-assisted xattrs are already enabled, the caller can use the
	 * log assisted swap functions with the log-incompat reference we got.
	 */
	if (xfs_sb_version_hasdelattr(&mp->m_sb))
		return 0;

	/* Enable log-assisted xattrs. */
	xfs_warn_once(mp,
  "EXPERIMENTAL logged extended attributes feature added. Use at your own risk!");
	error = xfs_add_incompat_log_feature(mp,
			XFS_SB_FEAT_INCOMPAT_LOG_DELATTR);
	if (error)
		goto drop_incompat;

	return 0;
drop_incompat:
	xlog_drop_incompat_feat(mp->m_log);
	return error;
}

...and then in xfs_attr_set, you need something like:

	/* Signal the log that we want to use logged xattrs. */
	use_log_items = xfs_hasdelattr(mp);
	if (use_log_items) {
		error = xfs_attr_use_log_assist(mp);
		if (error)
			goto barf;
	}

	error = xfs_trans_alloc(..., &tp);

	if (valuelen > 0)
		error = xfs_attr_set_args(...);
	else
		error = xfs_attr_remove_args(...);

	error = xfs_trans_commit(...);

	/* Signal the log that we're done with using logged xattrs. */
	if (use_log_items)
		xlog_drop_incompat_feat(mp->m_log);

The xfs_add_incompat_log_feature function ensures that the log has been
pushed out to disk and that the ondisk primary superblock has been
written (manually!) with the new incompat feature set.  The
xlog_use_incompat_feat prevents the log from clearing the feature bit
until we're done using it.

Once xfs_attr_use_log_assist completes, we know that it is safe to begin
using deferred xattrs, because there's no possible way that an old
kernel can screw things up.

Ah, ok. I'll see if I can find a place to work that in. Maybe patch 8 is a good place to do this since that is where the xfs_attr_set modifications happen.


+
+	attrip = xfs_attri_init(mp, 0);
+	if (attrip == NULL)
+		return NULL;
+
+	xfs_trans_add_item(tp, &attrip->attri_item);
+	list_for_each_entry(attr, items, xattri_list)
+		xfs_attr_log_item(tp, attrip, attr);
+	return &attrip->attri_item;
+}
+
+/* Process an attr. */
+STATIC int
+xfs_attr_finish_item(
+	struct xfs_trans		*tp,
+	struct xfs_log_item		*done,
+	struct list_head		*item,
+	struct xfs_btree_cur		**state)
+{
+	struct xfs_attr_item		*attr;
+	struct xfs_attrd_log_item	*done_item = NULL;
+	int				error;
+	struct xfs_delattr_context	*dac;
+
+	attr = container_of(item, struct xfs_attr_item, xattri_list);
+	dac = &attr->xattri_dac;
+	if (done)
+		done_item = ATTRD_ITEM(done);
+
+	/*
+	 * Corner case that can happen during a recovery.  Because the first
+	 * iteration of a multi part delay op happens in xfs_attri_item_recover
+	 * to maintain the order of the log replay items.  But the new
+	 * transactions do not automatically rejoin during a recovery as they do
+	 * in a standard delay op, so we need to catch this here and rejoin the
+	 * leaf to the new transaction
+	 */
+	if (attr->xattri_dac.leaf_bp &&
+	    attr->xattri_dac.leaf_bp->b_transp != tp) {
+		xfs_trans_bjoin(tp, attr->xattri_dac.leaf_bp);
+		xfs_trans_bhold(tp, attr->xattri_dac.leaf_bp);
+	}
+
+	/*
+	 * Always reset trans after EAGAIN cycle
+	 * since the transaction is new
+	 */
+	dac->da_args->trans = tp;
+
+	error = xfs_trans_attr_finish_update(dac, done_item, &dac->leaf_bp,
+					     attr->xattri_op_flags);
+	if (error != -EAGAIN)
+		kmem_free(attr);
+
+	return error;
+}
+
+/* Abort all pending ATTRs. */
+STATIC void
+xfs_attr_abort_intent(
+	struct xfs_log_item		*intent)
+{
+	xfs_attri_release(ATTRI_ITEM(intent));
+}
+
+/* Cancel an attr */
+STATIC void
+xfs_attr_cancel_item(
+	struct list_head		*item)
+{
+	struct xfs_attr_item		*attr;
+
+	attr = container_of(item, struct xfs_attr_item, xattri_list);
+	kmem_free(attr);
+}
+
  STATIC xfs_lsn_t
  xfs_attri_item_committed(
  	struct xfs_log_item		*lip,
@@ -313,6 +490,30 @@ xfs_attri_item_match(
  	return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id;
  }
+/*
+ * This routine is called to allocate an "attr free done" log item.
+ */
+struct xfs_attrd_log_item *
+xfs_trans_get_attrd(struct xfs_trans		*tp,
+		  struct xfs_attri_log_item	*attrip)
+{
+	struct xfs_attrd_log_item		*attrdp;
+	uint					size;
+
+	ASSERT(tp != NULL);
+
+	size = sizeof(struct xfs_attrd_log_item);
+	attrdp = kmem_zalloc(size, 0);
+
+	xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD,
+			  &xfs_attrd_item_ops);
+	attrdp->attrd_attrip = attrip;
+	attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id;
+
+	xfs_trans_add_item(tp, &attrdp->attrd_item);
+	return attrdp;
+}
+
  static const struct xfs_item_ops xfs_attrd_item_ops = {
  	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED,
  	.iop_size	= xfs_attrd_item_size,
@@ -320,6 +521,29 @@ static const struct xfs_item_ops xfs_attrd_item_ops = {
  	.iop_release    = xfs_attrd_item_release,
  };
+
+/* Get an ATTRD so we can process all the attrs. */
+static struct xfs_log_item *
+xfs_attr_create_done(
+	struct xfs_trans		*tp,
+	struct xfs_log_item		*intent,
+	unsigned int			count)
+{
+	if (!intent)
+		return NULL;
+
+	return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item;
+}
+
+const struct xfs_defer_op_type xfs_attr_defer_type = {
+	.max_items	= 1,
+	.create_intent	= xfs_attr_create_intent,
+	.abort_intent	= xfs_attr_abort_intent,
+	.create_done	= xfs_attr_create_done,
+	.finish_item	= xfs_attr_finish_item,
+	.cancel_item	= xfs_attr_cancel_item,
+};
+
  /* Is this recovered ATTRI ok? */
  static inline bool
  xfs_attri_validate(
@@ -346,13 +570,183 @@ xfs_attri_validate(
  	return xfs_hasdelattr(mp);
  }
+/*
+ * Process an attr intent item that was recovered from the log.  We need to
+ * delete the attr that it describes.
+ */
+STATIC int
+xfs_attri_item_recover(
+	struct xfs_log_item		*lip,
+	struct list_head		*capture_list)
+{
+	struct xfs_attri_log_item	*attrip = ATTRI_ITEM(lip);
+	struct xfs_attr_item		*new_attr;
+	struct xfs_mount		*mp = lip->li_mountp;
+	struct xfs_inode		*ip;
+	struct xfs_da_args		args;
+	struct xfs_da_args		*new_args;
+	struct xfs_trans_res		tres;
+	bool				rsvd;
+	struct xfs_attri_log_format	*attrp;
+	int				error;
+	int				total;
+	int				local;
+	struct xfs_attrd_log_item	*done_item = NULL;
+	struct xfs_attr_item		attr = {
+		.xattri_op_flags	= attrip->attri_format.alfi_op_flags,
+		.xattri_dac.da_args	= &args,
+	};
+
+	/*
+	 * First check the validity of the attr described by the ATTRI.  If any
+	 * are bad, then assume that all are bad and just toss the ATTRI.
+	 */
+	attrp = &attrip->attri_format;
+	if (!xfs_attri_validate(mp, attrip))
+		return -EFSCORRUPTED;
+

Everything from here...

+	error = xfs_iget(mp, 0, attrp->alfi_ino, 0, 0, &ip);
+	if (error)
+		return error;
+
+	if (VFS_I(ip)->i_nlink == 0)
+		xfs_iflags_set(ip, XFS_IRECOVERY);

...to here should be a call to xlog_recover_iget(), which is in this
patch:

https://djwong.org/docs/kernel/171-xfs-log-recovery-refactor-iget.patch
Alrighty, I will pick up that patch as well and add it to the set


+
+	memset(&args, 0, sizeof(struct xfs_da_args));
+	args.dp = ip;
+	args.geo = mp->m_attr_geo;
+	args.op_flags = attrp->alfi_op_flags;
+	args.whichfork = XFS_ATTR_FORK;
+	args.name = attrip->attri_name;
+	args.namelen = attrp->alfi_name_len;
+	args.hashval = xfs_da_hashname(args.name, args.namelen);
+	args.attr_filter = attrp->alfi_attr_flags;
+
+	if (attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_SET) {
+		args.value = attrip->attri_value;
+		args.valuelen = attrp->alfi_value_len;
+		args.total = xfs_attr_calc_size(&args, &local);
+
+		tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+				 M_RES(mp)->tr_attrsetrt.tr_logres *
+					args.total;
+		tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+		tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+		total = args.total;
+	} else {
+		tres = M_RES(mp)->tr_attrrm;
+		total = XFS_ATTRRM_SPACE_RES(mp);
+	}
+	error = xfs_trans_alloc(mp, &tres, total, 0,
+				rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
+	if (error)
+		return error;
+
+	done_item = xfs_trans_get_attrd(args.trans, attrip);
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(args.trans, ip, 0);
+
+	error = xfs_trans_attr_finish_update(&attr.xattri_dac, done_item,
+					     &attr.xattri_dac.leaf_bp,
+					     attrp->alfi_op_flags);
+	if (error == -EAGAIN) {
+		/*
+		 * There's more work to do, so make a new xfs_attr_item and add
+		 * it to this transaction.  We don't use xfs_attr_item_init here
+		 * because we need the info stored in the current attr to
+		 * continue with this multi-part operation.  So, alloc space
+		 * for it and the args and copy everything there.
+		 */
+		new_attr = kmem_zalloc(sizeof(struct xfs_attr_item) +
+				       sizeof(struct xfs_da_args), KM_NOFS);
+		new_args = (struct xfs_da_args *)((char *)new_attr +
+			   sizeof(struct xfs_attr_item));
+
+		memcpy(new_args, &args, sizeof(struct xfs_da_args));
+		memcpy(new_attr, &attr, sizeof(struct xfs_attr_item));
+
+		new_attr->xattri_dac.da_args = new_args;
+		memset(&new_attr->xattri_list, 0, sizeof(struct list_head));
+
+		xfs_defer_add(args.trans, XFS_DEFER_OPS_TYPE_ATTR,
+			      &new_attr->xattri_list);

Rather than allocating a huge struct xfs_attr_item (aka the incore state
tracking mechanism) on the stack, filling it out, and copying it if
there's more work to do, why not kmem_zalloc it from the start, and
either attach it to the transaction if there's more work to do, or free
it if (unlikely) we actually finished the xattr update?
That should work too. I guess I just assumed people would want to see alloc/free minimized if it can be, but most likley there will be some back and forth with most attr operations. will update.


+
+		/* Do not send -EAGAIN back to caller */
+		error = 0;
+	} else if (error) {
+		xfs_trans_cancel(args.trans);
+		goto out;
+	}
+
+	xfs_defer_ops_capture_and_commit(args.trans, ip, capture_list);

No error checking here?
will fix.

Thanks for the reviews!
Allison


--D

+
+out:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_irele(ip);
+	return error;
+}
+
+/* Re-log an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_attri_item_relog(
+	struct xfs_log_item		*intent,
+	struct xfs_trans		*tp)
+{
+	struct xfs_attrd_log_item	*attrdp;
+	struct xfs_attri_log_item	*old_attrip;
+	struct xfs_attri_log_item	*new_attrip;
+	struct xfs_attri_log_format	*new_attrp;
+	struct xfs_attri_log_format	*old_attrp;
+	int				buffer_size;
+
+	old_attrip = ATTRI_ITEM(intent);
+	old_attrp = &old_attrip->attri_format;
+	buffer_size = old_attrp->alfi_value_len + old_attrp->alfi_name_len;
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	attrdp = xfs_trans_get_attrd(tp, old_attrip);
+	set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
+
+	new_attrip = xfs_attri_init(tp->t_mountp, buffer_size);
+	new_attrp = &new_attrip->attri_format;
+
+	new_attrp->alfi_ino = old_attrp->alfi_ino;
+	new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
+	new_attrp->alfi_value_len = old_attrp->alfi_value_len;
+	new_attrp->alfi_name_len = old_attrp->alfi_name_len;
+	new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags;
+
+	new_attrip->attri_name_len = old_attrip->attri_name_len;
+	new_attrip->attri_name = ((char *)new_attrip) +
+				 sizeof(struct xfs_attri_log_item);
+	memcpy(new_attrip->attri_name, old_attrip->attri_name,
+		new_attrip->attri_name_len);
+
+	new_attrip->attri_value_len = old_attrip->attri_value_len;
+	if (new_attrip->attri_value_len > 0) {
+		new_attrip->attri_value = new_attrip->attri_name +
+					  new_attrip->attri_name_len;
+
+		memcpy(new_attrip->attri_value, old_attrip->attri_value,
+		       new_attrip->attri_value_len);
+	}
+
+	xfs_trans_add_item(tp, &new_attrip->attri_item);
+	set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags);
+
+	return &new_attrip->attri_item;
+}
+
  static const struct xfs_item_ops xfs_attri_item_ops = {
  	.iop_size	= xfs_attri_item_size,
  	.iop_format	= xfs_attri_item_format,
  	.iop_unpin	= xfs_attri_item_unpin,
  	.iop_committed	= xfs_attri_item_committed,
  	.iop_release    = xfs_attri_item_release,
+	.iop_recover	= xfs_attri_item_recover,
  	.iop_match	= xfs_attri_item_match,
+	.iop_relog	= xfs_attri_item_relog,
  };
--
2.7.4




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux