On Fri, Apr 02, 2021 at 02:01:20AM -0700, Allison Henderson wrote: > > > On 4/1/21 9:57 AM, Brian Foster wrote: > > On Thu, Mar 25, 2021 at 05:33:08PM -0700, Allison Henderson wrote: > > > This patch modifies the attr set routines to be delay ready. This means > > > they no longer roll or commit transactions, but instead return -EAGAIN > > > to have the calling routine roll and refresh the transaction. In this > > > series, xfs_attr_set_args has become xfs_attr_set_iter, which uses a > > > state machine like switch to keep track of where it was when EAGAIN was > > > returned. See xfs_attr.h for a more detailed diagram of the states. > > > > > > Two new helper functions have been added: xfs_attr_rmtval_find_space and > > > xfs_attr_rmtval_set_blk. They provide a subset of logic similar to > > > xfs_attr_rmtval_set, but they store the current block in the delay attr > > > context to allow the caller to roll the transaction between allocations. > > > This helps to simplify and consolidate code used by > > > xfs_attr_leaf_addname and xfs_attr_node_addname. xfs_attr_set_args has > > > now become a simple loop to refresh the transaction until the operation > > > is completed. Lastly, xfs_attr_rmtval_remove is no longer used, and is > > > removed. > > > > > > Signed-off-by: Allison Henderson <allison.henderson@xxxxxxxxxx> > > > Reviewed-by: Chandan Babu R <chandanrlinux@xxxxxxxxx> > > > Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> > > > --- > > > fs/xfs/libxfs/xfs_attr.c | 446 ++++++++++++++++++++++++---------------- > > > fs/xfs/libxfs/xfs_attr.h | 241 +++++++++++++++++++++- > > > fs/xfs/libxfs/xfs_attr_remote.c | 100 ++++++--- > > > fs/xfs/libxfs/xfs_attr_remote.h | 5 +- > > > fs/xfs/xfs_trace.h | 1 - > > > 5 files changed, 582 insertions(+), 211 deletions(-) > > > > > > diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c > > > index 4a73691..6a86b62 100644 > > > --- a/fs/xfs/libxfs/xfs_attr.c > > > +++ b/fs/xfs/libxfs/xfs_attr.c > > ... > > > @@ -246,29 +247,55 @@ xfs_attr_trans_roll( > > > return error; > > > } > > > +/* > > > + * Set the attribute specified in @args. > > > + */ > > > +int > > > +xfs_attr_set_args( > > > + struct xfs_da_args *args) > > > +{ > > > + struct xfs_buf *leaf_bp = NULL; > > > + int error = 0; > > > + struct xfs_delattr_context dac = { > > > + .da_args = args, > > > + }; > > > + > > > + do { > > > + error = xfs_attr_set_iter(&dac, &leaf_bp); > > > + if (error != -EAGAIN) > > > + break; > > > + > > > + error = xfs_attr_trans_roll(&dac); > > > + if (error) > > > + return error; > > > + } while (true); > > > + > > > + return error; > > > +} > > > + > > > STATIC int > > > xfs_attr_set_fmt( > > > - struct xfs_da_args *args) > > > + struct xfs_delattr_context *dac, > > > + struct xfs_buf **leaf_bp) > > > { > > > - struct xfs_buf *leaf_bp = NULL; > > > - struct xfs_inode *dp = args->dp; > > > - int error2, error = 0; > > > + struct xfs_da_args *args = dac->da_args; > > > + struct xfs_inode *dp = args->dp; > > > + int error = 0; > > > /* > > > * Try to add the attr to the attribute list in the inode. > > > */ > > > error = xfs_attr_try_sf_addname(dp, args); > > > - if (error != -ENOSPC) { > > > - error2 = xfs_trans_commit(args->trans); > > > - args->trans = NULL; > > > - return error ? error : error2; > > > - } > > > + > > > + /* Should only be 0, -EEXIST or -ENOSPC */ > > > + if (error != -ENOSPC) > > > + return error; > > > > Ok, so it looks like the commit that goes away here is replaced by one > > up the call stack. > > > > > /* > > > * It won't fit in the shortform, transform to a leaf block. > > > * GROT: another possible req'mt for a double-split btree op. > > > */ > > > - error = xfs_attr_shortform_to_leaf(args, &leaf_bp); > > > + error = xfs_attr_shortform_to_leaf(args, leaf_bp); > > > if (error) > > > return error; > > > @@ -277,95 +304,139 @@ xfs_attr_set_fmt( > > > * concurrent AIL push cannot grab the half-baked leaf buffer > > > * and run into problems with the write verifier. > > > */ > > > - xfs_trans_bhold(args->trans, leaf_bp); > > > - error = xfs_defer_finish(&args->trans); > > > - xfs_trans_bhold_release(args->trans, leaf_bp); > > > - if (error) { > > > - xfs_trans_brelse(args->trans, leaf_bp); > > > > What about the xfs_trans_brelse() in the error case that seems to > > disappear? > Hmm, maybe we could add some handler in xfs_attr_set_args such that if the > roll fails, we release the leaf_bp if it is not null? I suspect we need it somewhere. I don't recall the details of this code off the top of my head, but I remember it being tricky so we should probably work to preserve current logic unless there's a clear reason to do otherwise. > > > > > - return error; > > > - } > > > + xfs_trans_bhold(args->trans, *leaf_bp); > > > + /* > > > + * We're still in XFS_DAS_UNINIT state here. We've converted > > > + * the attr fork to leaf format and will restart with the leaf > > > + * add. > > > + */ > > > + dac->flags |= XFS_DAC_DEFER_FINISH; > > > return -EAGAIN; > > > } > > > /* > > > * Set the attribute specified in @args. > > > + * This routine is meant to function as a delayed operation, and may return > > > + * -EAGAIN when the transaction needs to be rolled. Calling functions will need > > > + * to handle this, and recall the function until a successful error code is > > > + * returned. > > > */ > > > int > > > -xfs_attr_set_args( > > > - struct xfs_da_args *args) > > > +xfs_attr_set_iter( > > > + struct xfs_delattr_context *dac, > > > + struct xfs_buf **leaf_bp) > > > { > > > - struct xfs_inode *dp = args->dp; > > > - struct xfs_buf *bp = NULL; > > > - struct xfs_da_state *state = NULL; > > > - int forkoff, error = 0; > > > + struct xfs_da_args *args = dac->da_args; > > > + struct xfs_inode *dp = args->dp; > > > + struct xfs_buf *bp = NULL; > > > + struct xfs_da_state *state = NULL; > > > + int forkoff, error = 0; > > > - /* > > > - * If the attribute list is already in leaf format, jump straight to > > > - * leaf handling. Otherwise, try to add the attribute to the shortform > > > - * list; if there's no room then convert the list to leaf format and try > > > - * again. > > > - */ > > > - if (xfs_attr_is_shortform(dp)) { > > > - error = xfs_attr_set_fmt(args); > > > - if (error != -EAGAIN) > > > - return error; > > > - } > > > + /* State machine switch */ > > > + switch (dac->dela_state) { > > > + case XFS_DAS_UNINIT: > > > + if (xfs_attr_is_shortform(dp)) > > > + return xfs_attr_set_fmt(dac, leaf_bp); > > > - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { > > > - error = xfs_attr_leaf_try_add(args, bp); > > > - if (error == -ENOSPC) { > > > - /* > > > - * Promote the attribute list to the Btree format. > > > - */ > > > - error = xfs_attr3_leaf_to_node(args); > > > + /* > > > + * After a shortform to leaf conversion, we need to hold the > > > + * leaf and cycle out the transaction. When we get back, > > > + * we need to release the leaf to release the hold on the leaf > > > + * buffer. > > > + */ > > > + if (*leaf_bp != NULL) { > > > + xfs_trans_bhold_release(args->trans, *leaf_bp); > > > + *leaf_bp = NULL; > > > + } > > > + > > > + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { > > > + error = xfs_attr_leaf_try_add(args, *leaf_bp); > > > + if (error == -ENOSPC) { > > > + /* > > > + * Promote the attribute list to the Btree > > > + * format. > > > + */ > > > + error = xfs_attr3_leaf_to_node(args); > > > + if (error) > > > + return error; > > > + > > > + /* > > > + * Finish any deferred work items and roll the > > > + * transaction once more. The goal here is to > > > + * call node_addname with the inode and > > > + * transaction in the same state (inode locked > > > + * and joined, transaction clean) no matter how > > > + * we got to this step. > > > + * > > > + * At this point, we are still in > > > + * XFS_DAS_UNINIT, but when we come back, we'll > > > + * be a node, so we'll fall down into the node > > > + * handling code below > > > + */ > > > + dac->flags |= XFS_DAC_DEFER_FINISH; > > > + return -EAGAIN; > > > + } > > > + else if (error) > > > > Nit: } else if (error) > > > Sure, will fix > > > > + return error; > > > + } > > > + else { > > > > Nit: } else { > will fix > > > > > > + error = xfs_attr_node_addname_find_attr(dac); > > > if (error) > > > return error; > > > - /* > > > - * Finish any deferred work items and roll the transaction once > > > - * more. The goal here is to call node_addname with the inode > > > - * and transaction in the same state (inode locked and joined, > > > - * transaction clean) no matter how we got to this step. > > > - */ > > > - error = xfs_defer_finish(&args->trans); > > > + error = xfs_attr_node_addname(dac); > > > if (error) > > > return error; > > > > Ok, so these couple of node calls get peeled out of the loop that > > existed prior to this patch and xfs_attr_node_addname() returns -EAGAIN > > to trigger reentry, if necessary. > Right, that is the idea > > > > > > /* > > > - * Commit the current trans (including the inode) and > > > - * start a new one. > > > + * If addname was successful, and we dont need to alloc > > > + * anymore blks, we're done. > > > */ > > > - error = xfs_trans_roll_inode(&args->trans, dp); > > > - if (error) > > > + if (!args->rmtblkno && !args->rmtblkno2) > > > return error; > > > > Is this check new? What about clearing flags and whatnot? > gosh, I remember putting this in, and now I'm struggling to remember the why > that lead to it. It's needed later in the set, but at this point its sort > of an optimization. In the xfs_attr_leaf_try_add, in the case of a rename, > the blocks are saved for later processing. If no blocks were saved, there's > nothing to flip, so the idea is we can stop here. The extra go around isnt a > big deal at this point, but it's a problem in delayed attrs, because you end > up with an extra empty log entry. in the extended set, we fail an assertion > with out it: > Assertion failed: !list_empty(&cil->xc_cil), file: fs/xfs/xfs_log_cil.c, > line: 907 > > On my set up, we seem to get away without it at this point in the set, but > it does have to go in eventually. I think I must have worked through this > bug at one time, and then placed the fix in this patch, as it seems to be a > requirement for becoming "delay ready" > Hmm, that seems like an odd failure to associate with this code. I guess it's hard to grok because the context (i.e. delayed attrs) comes a bit later. If possible, it might be wise to defer this hunk until where it's necessary so the context/purpose is more clear on review (it also might be helpful to explain the purpose in a bit more detail in the comment). > > > > > > > - goto node; > > > - } > > > - else if (error) { > > > - return error; > > > + dac->dela_state = XFS_DAS_FOUND_NBLK; > > > + return -EAGAIN; > > > } > > > - /* > > > - * Commit the transaction that added the attr name so that > > > - * later routines can manage their own transactions. > > > - */ > > > - error = xfs_trans_roll_inode(&args->trans, dp); > > > - if (error) > > > - return error; > > > + dac->dela_state = XFS_DAS_FOUND_LBLK; > > > + return -EAGAIN; > > > > Is there a reason the node branch sets the state in the branch but the > > leaf branch falls out to here? I.e., could we not be consistent and have > > > > if (one_block()) { > > ... > > dac->dela_state = XFS_DAS_FOUND_LBLK; > > } else { > > ... > > dac->dela_state = XFS_DAS_FOUND_NBLK; > > } > > > > return -EAGAIN; > It's just sort of left over from its originally linear code flow that just > sort of fell through in the the leaf logic. I think what you are proposing > is logically analogous tho. Does your example mean to add an extra if/else > at the end here? Or to tuck the return up into the the existing if/else? > Both work, I am fine with either. > Yes, the suggestion was intended to be logically equivalent. I think it improves readability and is slightly less fragile to make the duplicated code (i.e. return -EAGAIN) common and the state assignment as part of the associated branch. Brian > > > > > + case XFS_DAS_FOUND_LBLK: > > > /* > > > * If there was an out-of-line value, allocate the blocks we > > > * identified for its storage and copy the value. This is done > > > * after we create the attribute so that we don't overflow the > > > * maximum size of a transaction and/or hit a deadlock. > > > */ > > > - if (args->rmtblkno > 0) { > > > - error = xfs_attr_rmtval_set(args); > > > + > > > + /* Open coded xfs_attr_rmtval_set without trans handling */ > > > + if ((dac->flags & XFS_DAC_LEAF_ADDNAME_INIT) == 0) { > > > + dac->flags |= XFS_DAC_LEAF_ADDNAME_INIT; > > > + if (args->rmtblkno > 0) { > > > + error = xfs_attr_rmtval_find_space(dac); > > > + if (error) > > > + return error; > > > + } > > > + } > > > + > > > + /* > > > + * Roll through the "value", allocating blocks on disk as > > > + * required. > > > + */ > > > + if (dac->blkcnt > 0) { > > > + error = xfs_attr_rmtval_set_blk(dac); > > > if (error) > > > return error; > > > + > > > + return -EAGAIN; > > > } > > > + error = xfs_attr_rmtval_set_value(args); > > > + if (error) > > > + return error; > > > + > > > if (!(args->op_flags & XFS_DA_OP_RENAME)) { > > > /* > > > * Added a "remote" value, just clear the incomplete > > > @@ -394,22 +465,26 @@ xfs_attr_set_args( > > > * Commit the flag value change and start the next trans in > > > * series. > > > */ > > > - error = xfs_trans_roll_inode(&args->trans, args->dp); > > > - if (error) > > > - return error; > > > - > > > + dac->dela_state = XFS_DAS_FLIP_LFLAG; > > > + return -EAGAIN; > > > + case XFS_DAS_FLIP_LFLAG: > > > /* > > > * Dismantle the "old" attribute/value pair by removing a > > > * "remote" value (if it exists). > > > */ > > > xfs_attr_restore_rmt_blk(args); > > > - if (args->rmtblkno) { > > > - error = xfs_attr_rmtval_invalidate(args); > > > - if (error) > > > - return error; > > > + error = xfs_attr_rmtval_invalidate(args); > > > + if (error) > > > + return error; > > > + > > > + /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ > > > + dac->dela_state = XFS_DAS_RM_LBLK; > > > - error = xfs_attr_rmtval_remove(args); > > > + /* fallthrough */ > > > + case XFS_DAS_RM_LBLK: > > > + if (args->rmtblkno) { > > > + error = __xfs_attr_rmtval_remove(dac); > > > if (error) > > > return error; > > > } > > > > This looks like a similar scenario as before where > > xfs_attr_rmtval_remove() would have returned with a clean transaction > > after the final unmap, but here __xfs_attr_rmtval_remove() just returns > > 0 if done == 1. We probably need to roll one more time out of this > > branch and land in a subsequent state..? > I see, ok will add extra -EAGAIN here. > > > > > > > @@ -434,91 +509,114 @@ xfs_attr_set_args( > > > /* bp is gone due to xfs_da_shrink_inode */ > > > return error; > > > - } > > > -node: > > > + case XFS_DAS_FOUND_NBLK: > > > + /* > > > + * If there was an out-of-line value, allocate the blocks we > > > + * identified for its storage and copy the value. This is done > > > + * after we create the attribute so that we don't overflow the > > > + * maximum size of a transaction and/or hit a deadlock. > > > + */ > > > + if (args->rmtblkno > 0) { > > > + /* > > > + * Open coded xfs_attr_rmtval_set without trans > > > + * handling > > > + */ > > > + error = xfs_attr_rmtval_find_space(dac); > > > + if (error) > > > + return error; > > > - do { > > > - error = xfs_attr_node_addname_find_attr(args, &state); > > > - if (error) > > > - return error; > > > - error = xfs_attr_node_addname(args, state); > > > - } while (error == -EAGAIN); > > > - if (error) > > > - return error; > > > + /* > > > + * Roll through the "value", allocating blocks on disk > > > + * as required. Set the state in case of -EAGAIN return > > > + * code > > > + */ > > > + dac->dela_state = XFS_DAS_ALLOC_NODE; > > > + } > > > - /* > > > - * Commit the leaf addition or btree split and start the next > > > - * trans in the chain. > > > - */ > > > - error = xfs_trans_roll_inode(&args->trans, dp); > > > - if (error) > > > - goto out; > > > + /* fallthrough */ > > > + case XFS_DAS_ALLOC_NODE: > > > + if (args->rmtblkno > 0) { > > > + if (dac->blkcnt > 0) { > > > + error = xfs_attr_rmtval_set_blk(dac); > > > + if (error) > > > + return error; > > > - /* > > > - * If there was an out-of-line value, allocate the blocks we > > > - * identified for its storage and copy the value. This is done > > > - * after we create the attribute so that we don't overflow the > > > - * maximum size of a transaction and/or hit a deadlock. > > > - */ > > > - if (args->rmtblkno > 0) { > > > - error = xfs_attr_rmtval_set(args); > > > - if (error) > > > - return error; > > > - } > > > + return -EAGAIN; > > > + } > > > + > > > + error = xfs_attr_rmtval_set_value(args); > > > + if (error) > > > + return error; > > > + } > > > + > > > + if (!(args->op_flags & XFS_DA_OP_RENAME)) { > > > + /* > > > + * Added a "remote" value, just clear the incomplete > > > + * flag. > > > + */ > > > + if (args->rmtblkno > 0) > > > + error = xfs_attr3_leaf_clearflag(args); > > > + goto out; > > > + } > > > - if (!(args->op_flags & XFS_DA_OP_RENAME)) { > > > /* > > > - * Added a "remote" value, just clear the incomplete flag. > > > + * If this is an atomic rename operation, we must "flip" the > > > + * incomplete flags on the "new" and "old" attribute/value pairs > > > + * so that one disappears and one appears atomically. Then we > > > + * must remove the "old" attribute/value pair. > > > + * > > > + * In a separate transaction, set the incomplete flag on the > > > + * "old" attr and clear the incomplete flag on the "new" attr. > > > */ > > > - if (args->rmtblkno > 0) > > > - error = xfs_attr3_leaf_clearflag(args); > > > - goto out; > > > - } > > > - > > > - /* > > > - * If this is an atomic rename operation, we must "flip" the incomplete > > > - * flags on the "new" and "old" attribute/value pairs so that one > > > - * disappears and one appears atomically. Then we must remove the "old" > > > - * attribute/value pair. > > > - * > > > - * In a separate transaction, set the incomplete flag on the "old" attr > > > - * and clear the incomplete flag on the "new" attr. > > > - */ > > > - error = xfs_attr3_leaf_flipflags(args); > > > - if (error) > > > - goto out; > > > - /* > > > - * Commit the flag value change and start the next trans in series > > > - */ > > > - error = xfs_trans_roll_inode(&args->trans, args->dp); > > > - if (error) > > > - goto out; > > > + error = xfs_attr3_leaf_flipflags(args); > > > + if (error) > > > + goto out; > > > + /* > > > + * Commit the flag value change and start the next trans in > > > + * series > > > + */ > > > + dac->dela_state = XFS_DAS_FLIP_NFLAG; > > > + return -EAGAIN; > > > - /* > > > - * Dismantle the "old" attribute/value pair by removing a "remote" value > > > - * (if it exists). > > > - */ > > > - xfs_attr_restore_rmt_blk(args); > > > + case XFS_DAS_FLIP_NFLAG: > > > + /* > > > + * Dismantle the "old" attribute/value pair by removing a > > > + * "remote" value (if it exists). > > > + */ > > > + xfs_attr_restore_rmt_blk(args); > > > - if (args->rmtblkno) { > > > error = xfs_attr_rmtval_invalidate(args); > > > if (error) > > > return error; > > > - error = xfs_attr_rmtval_remove(args); > > > - if (error) > > > - return error; > > > - } > > > + /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ > > > + dac->dela_state = XFS_DAS_RM_NBLK; > > > + > > > + /* fallthrough */ > > > + case XFS_DAS_RM_NBLK: > > > + if (args->rmtblkno) { > > > + error = __xfs_attr_rmtval_remove(dac); > > > + if (error) > > > + return error; > > > + } > > > > Similar thing here with __xfs_attr_rmtval_remove()..? > Sure, will add one more -EAGAIN > > > > > > + > > > + error = xfs_attr_node_addname_clear_incomplete(dac); > > > - error = xfs_attr_node_addname_clear_incomplete(args); > > > out: > > > - if (state) > > > - xfs_da_state_free(state); > > > - return error; > > > + if (state) > > > + xfs_da_state_free(state); > > > + return error; > > > > Can we avoid this out label landing inside the switch statement? That > > looks like a landmine. Even if we just duplicated an 'done_out' path > > after the last return in the function, I think that would be preferable. > Sure, can do, that seems like a simple thing to tack on > > > > > All previous feedback aside, I think this patch now looks much more > > digestable in general. Most of the state code is isolated to the _iter() > > function and so it's much easier to follow along and compare against the > > current code flow. I did still have some thoughts with regard to further > > cleanups, possibly clearing up some the logic and/or tweaking the states > > and whatnot, but I think this is at a point where it might be reasonable > > to make such changes on top of this patch instead of continuing to make > > significant changes to it. If I get a chance perhaps I'll take a closer > > look at that once the remaining kinks are worked out.. > > > > Brian > Ok, I will get these last bits updated here. I still need to check into the > issues Darrick is seeing on his set up, but it sounds like we've found an > arrangement people like. And yes, I think switching to cleanups on top is a > good next step. Thanks for the reviews! > > Allison > > > > > > + > > > + default: > > > + ASSERT(dac->dela_state != XFS_DAS_RM_SHRINK); > > > + break; > > > + } > > > + return error; > > > } > > > + > > > /* > > > * Return EEXIST if attr is found, or ENOATTR if not > > > */ > > > @@ -984,18 +1082,18 @@ xfs_attr_node_hasname( > > > STATIC int > > > xfs_attr_node_addname_find_attr( > > > - struct xfs_da_args *args, > > > - struct xfs_da_state **state) > > > + struct xfs_delattr_context *dac) > > > { > > > - int retval; > > > + struct xfs_da_args *args = dac->da_args; > > > + int retval; > > > /* > > > * Search to see if name already exists, and get back a pointer > > > * to where it should go. > > > */ > > > - retval = xfs_attr_node_hasname(args, state); > > > + retval = xfs_attr_node_hasname(args, &dac->da_state); > > > if (retval != -ENOATTR && retval != -EEXIST) > > > - goto error; > > > + return retval; > > > if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) > > > goto error; > > > @@ -1021,8 +1119,8 @@ xfs_attr_node_addname_find_attr( > > > return 0; > > > error: > > > - if (*state) > > > - xfs_da_state_free(*state); > > > + if (dac->da_state) > > > + xfs_da_state_free(dac->da_state); > > > return retval; > > > } > > > @@ -1035,20 +1133,24 @@ xfs_attr_node_addname_find_attr( > > > * > > > * "Remote" attribute values confuse the issue and atomic rename operations > > > * add a whole extra layer of confusion on top of that. > > > + * > > > + * This routine is meant to function as a delayed operation, and may return > > > + * -EAGAIN when the transaction needs to be rolled. Calling functions will need > > > + * to handle this, and recall the function until a successful error code is > > > + *returned. > > > */ > > > STATIC int > > > xfs_attr_node_addname( > > > - struct xfs_da_args *args, > > > - struct xfs_da_state *state) > > > + struct xfs_delattr_context *dac) > > > { > > > - struct xfs_da_state_blk *blk; > > > - struct xfs_inode *dp; > > > - int error; > > > + struct xfs_da_args *args = dac->da_args; > > > + struct xfs_da_state *state = dac->da_state; > > > + struct xfs_da_state_blk *blk; > > > + int error; > > > trace_xfs_attr_node_addname(args); > > > - dp = args->dp; > > > - blk = &state->path.blk[state->path.active-1]; > > > + blk = &state->path.blk[ state->path.active-1 ]; > > > ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); > > > error = xfs_attr3_leaf_add(blk->bp, state->args); > > > @@ -1064,18 +1166,15 @@ xfs_attr_node_addname( > > > error = xfs_attr3_leaf_to_node(args); > > > if (error) > > > goto out; > > > - error = xfs_defer_finish(&args->trans); > > > - if (error) > > > - goto out; > > > /* > > > - * Commit the node conversion and start the next > > > - * trans in the chain. > > > + * Now that we have converted the leaf to a node, we can > > > + * roll the transaction, and try xfs_attr3_leaf_add > > > + * again on re-entry. No need to set dela_state to do > > > + * this. dela_state is still unset by this function at > > > + * this point. > > > */ > > > - error = xfs_trans_roll_inode(&args->trans, dp); > > > - if (error) > > > - goto out; > > > - > > > + dac->flags |= XFS_DAC_DEFER_FINISH; > > > return -EAGAIN; > > > } > > > @@ -1088,9 +1187,7 @@ xfs_attr_node_addname( > > > error = xfs_da3_split(state); > > > if (error) > > > goto out; > > > - error = xfs_defer_finish(&args->trans); > > > - if (error) > > > - goto out; > > > + dac->flags |= XFS_DAC_DEFER_FINISH; > > > } else { > > > /* > > > * Addition succeeded, update Btree hashvals. > > > @@ -1105,8 +1202,9 @@ xfs_attr_node_addname( > > > STATIC > > > int xfs_attr_node_addname_clear_incomplete( > > > - struct xfs_da_args *args) > > > + struct xfs_delattr_context *dac) > > > { > > > + struct xfs_da_args *args = dac->da_args; > > > struct xfs_da_state *state = NULL; > > > struct xfs_da_state_blk *blk; > > > int retval = 0; > > > diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h > > > index 92a6a50..4e4233d 100644 > > > --- a/fs/xfs/libxfs/xfs_attr.h > > > +++ b/fs/xfs/libxfs/xfs_attr.h > > > @@ -159,6 +159,233 @@ struct xfs_attr_list_context { > > > * v > > > * done > > > * > > > + * > > > + * Below is a state machine diagram for attr set operations. > > > + * > > > + * It seems the challenge with understanding this system comes from trying to > > > + * absorb the state machine all at once, when really one should only be looking > > > + * at it with in the context of a single function. Once a state sensitive > > > + * function is called, the idea is that it "takes ownership" of the > > > + * state machine. It isn't concerned with the states that may have belonged to > > > + * it's calling parent. Only the states relevant to itself or any other > > > + * subroutines there in. Once a calling function hands off the state machine to > > > + * a subroutine, it needs to respect the simple rule that it doesn't "own" the > > > + * state machine anymore, and it's the responsibility of that calling function > > > + * to propagate the -EAGAIN back up the call stack. Upon reentry, it is > > > + * committed to re-calling that subroutine until it returns something other than > > > + * -EAGAIN. Once that subroutine signals completion (by returning anything other > > > + * than -EAGAIN), the calling function can resume using the state machine. > > > + * > > > + * xfs_attr_set_iter() > > > + * │ > > > + * v > > > + * ┌─y─ has an attr fork? > > > + * │ | > > > + * │ n > > > + * │ | > > > + * │ V > > > + * │ add a fork > > > + * │ │ > > > + * └──────────┤ > > > + * │ > > > + * V > > > + * ┌─y─ is shortform? > > > + * │ │ > > > + * │ V > > > + * │ xfs_attr_set_fmt > > > + * │ | > > > + * │ V > > > + * │ xfs_attr_try_sf_addname > > > + * │ │ > > > + * │ V > > > + * │ had enough ──y──> done > > > + * │ space? > > > + * n │ > > > + * │ n > > > + * │ │ > > > + * │ V > > > + * │ transform to leaf > > > + * │ │ > > > + * │ V > > > + * │ hold the leaf buffer > > > + * │ │ > > > + * │ V > > > + * │ return -EAGAIN > > > + * │ Re-enter in > > > + * │ leaf form > > > + * │ > > > + * └─> release leaf buffer > > > + * if needed > > > + * │ > > > + * V > > > + * ┌───n── fork has > > > + * │ only 1 blk? > > > + * │ │ > > > + * │ y > > > + * │ │ > > > + * │ v > > > + * │ xfs_attr_leaf_try_add() > > > + * │ │ > > > + * │ v > > > + * │ had enough ──────────────y───────────────??? > > > + * │ space? │ > > > + * │ │ │ > > > + * │ n │ > > > + * │ │ │ > > > + * │ v │ > > > + * │ return -EAGAIN │ > > > + * │ re-enter in │ > > > + * │ node form │ > > > + * │ │ │ > > > + * └──────────┤ │ > > > + * │ │ > > > + * V │ > > > + * xfs_attr_node_addname_find_attr │ > > > + * determines if this │ > > > + * is create or rename │ > > > + * find space to store attr │ > > > + * │ │ > > > + * v │ > > > + * xfs_attr_node_addname │ > > > + * │ │ > > > + * v │ > > > + * fits in a node leaf? ────n─────??? │ > > > + * │ ^ v │ > > > + * │ │ single leaf node? │ > > > + * │ │ │ │ │ > > > + * y │ y n │ > > > + * │ │ │ │ │ > > > + * v │ v v │ > > > + * update │ grow the leaf split if │ > > > + * hashvals └─── return -EAGAIN needed │ > > > + * │ retry leaf add │ │ > > > + * │ on reentry │ │ > > > + * ├─────────────────────────────┘ │ > > > + * │ │ > > > + * v │ > > > + * need to alloc │ > > > + * ┌─y── or flip flag? │ > > > + * │ │ │ > > > + * │ n │ > > > + * │ │ │ > > > + * │ v │ > > > + * │ done │ > > > + * │ │ > > > + * │ │ > > > + * │ XFS_DAS_FOUND_LBLK <──────────────────┘ > > > + * │ │ > > > + * │ V > > > + * │ xfs_attr_leaf_addname() > > > + * │ │ > > > + * │ v > > > + * │ ┌──first time through? > > > + * │ │ │ > > > + * │ │ y > > > + * │ │ │ > > > + * │ n v > > > + * │ │ if we have rmt blks > > > + * │ │ find space for them > > > + * │ │ │ > > > + * │ └──────────┤ > > > + * │ │ > > > + * │ v > > > + * │ still have > > > + * │ ┌─n─ blks to alloc? <──??? > > > + * │ │ │ │ > > > + * │ │ y │ > > > + * │ │ │ │ > > > + * │ │ v │ > > > + * │ │ alloc one blk │ > > > + * │ │ return -EAGAIN ──┘ > > > + * │ │ re-enter with one > > > + * │ │ less blk to alloc > > > + * │ │ > > > + * │ │ > > > + * │ └───> set the rmt > > > + * │ value > > > + * │ │ > > > + * │ v > > > + * │ was this > > > + * │ a rename? ──n─??? > > > + * │ │ │ > > > + * │ y │ > > > + * │ │ │ > > > + * │ v │ > > > + * │ flip incomplete │ > > > + * │ flag │ > > > + * │ │ │ > > > + * │ v │ > > > + * │ XFS_DAS_FLIP_LFLAG │ > > > + * │ │ │ > > > + * │ v │ > > > + * │ remove │ > > > + * │ ┌───> old name │ > > > + * │ │ │ │ > > > + * │ XFS_DAS_RM_LBLK │ │ > > > + * │ ^ │ │ > > > + * │ │ v │ > > > + * │ └──y── more to │ > > > + * │ remove │ > > > + * │ │ │ > > > + * │ n │ > > > + * │ │ │ > > > + * │ v │ > > > + * │ done <──────┘ > > > + * │ > > > + * └──────> XFS_DAS_FOUND_NBLK > > > + * │ > > > + * v > > > + * ┌─────n── need to > > > + * │ alloc blks? > > > + * │ │ > > > + * │ y > > > + * │ │ > > > + * │ v > > > + * │ find space > > > + * │ │ > > > + * │ v > > > + * │ ┌─>XFS_DAS_ALLOC_NODE > > > + * │ │ │ > > > + * │ │ v > > > + * │ │ alloc blk > > > + * │ │ │ > > > + * │ │ v > > > + * │ └──y── need to alloc > > > + * │ more blocks? > > > + * │ │ > > > + * │ n > > > + * │ │ > > > + * │ v > > > + * │ set the rmt value > > > + * │ │ > > > + * │ v > > > + * │ was this > > > + * └────────> a rename? ──n─??? > > > + * │ │ > > > + * y │ > > > + * │ │ > > > + * v │ > > > + * flip incomplete │ > > > + * flag │ > > > + * │ │ > > > + * v │ > > > + * XFS_DAS_FLIP_NFLAG │ > > > + * │ │ > > > + * v │ > > > + * remove │ > > > + * ┌────────> old name │ > > > + * │ │ │ > > > + * XFS_DAS_RM_NBLK │ │ > > > + * ^ │ │ > > > + * │ v │ > > > + * └──────y── more to │ > > > + * remove │ > > > + * │ │ > > > + * n │ > > > + * │ │ > > > + * v │ > > > + * done <──────┘ > > > + * > > > */ > > > /* > > > @@ -174,12 +401,20 @@ enum xfs_delattr_state { > > > XFS_DAS_UNINIT = 0, /* No state has been set yet */ > > > XFS_DAS_RMTBLK, /* Removing remote blks */ > > > XFS_DAS_RM_SHRINK, /* We are shrinking the tree */ > > > + XFS_DAS_FOUND_LBLK, /* We found leaf blk for attr */ > > > + XFS_DAS_FOUND_NBLK, /* We found node blk for attr */ > > > + XFS_DAS_FLIP_LFLAG, /* Flipped leaf INCOMPLETE attr flag */ > > > + XFS_DAS_RM_LBLK, /* A rename is removing leaf blocks */ > > > + XFS_DAS_ALLOC_NODE, /* We are allocating node blocks */ > > > + XFS_DAS_FLIP_NFLAG, /* Flipped node INCOMPLETE attr flag */ > > > + XFS_DAS_RM_NBLK, /* A rename is removing node blocks */ > > > }; > > > /* > > > * Defines for xfs_delattr_context.flags > > > */ > > > #define XFS_DAC_DEFER_FINISH 0x01 /* finish the transaction */ > > > +#define XFS_DAC_LEAF_ADDNAME_INIT 0x02 /* xfs_attr_leaf_addname init*/ > > > /* > > > * Context used for keeping track of delayed attribute operations > > > @@ -187,6 +422,11 @@ enum xfs_delattr_state { > > > struct xfs_delattr_context { > > > struct xfs_da_args *da_args; > > > + /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */ > > > + struct xfs_bmbt_irec map; > > > + xfs_dablk_t lblkno; > > > + int blkcnt; > > > + > > > /* Used in xfs_attr_node_removename to roll through removing blocks */ > > > struct xfs_da_state *da_state; > > > @@ -213,7 +453,6 @@ int xfs_attr_set_args(struct xfs_da_args *args); > > > int xfs_has_attr(struct xfs_da_args *args); > > > int xfs_attr_remove_args(struct xfs_da_args *args); > > > int xfs_attr_remove_iter(struct xfs_delattr_context *dac); > > > -int xfs_attr_trans_roll(struct xfs_delattr_context *dac); > > > bool xfs_attr_namecheck(const void *name, size_t length); > > > void xfs_delattr_context_init(struct xfs_delattr_context *dac, > > > struct xfs_da_args *args); > > > diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c > > > index 908521e7..fc71f10 100644 > > > --- a/fs/xfs/libxfs/xfs_attr_remote.c > > > +++ b/fs/xfs/libxfs/xfs_attr_remote.c > > > @@ -439,9 +439,9 @@ xfs_attr_rmtval_get( > > > /* > > > * Find a "hole" in the attribute address space large enough for us to drop the > > > - * new attribute's value into > > > + * new attributes value into > > > */ > > > -STATIC int > > > +int > > > xfs_attr_rmt_find_hole( > > > struct xfs_da_args *args) > > > { > > > @@ -468,7 +468,7 @@ xfs_attr_rmt_find_hole( > > > return 0; > > > } > > > -STATIC int > > > +int > > > xfs_attr_rmtval_set_value( > > > struct xfs_da_args *args) > > > { > > > @@ -628,6 +628,69 @@ xfs_attr_rmtval_set( > > > } > > > /* > > > + * Find a hole for the attr and store it in the delayed attr context. This > > > + * initializes the context to roll through allocating an attr extent for a > > > + * delayed attr operation > > > + */ > > > +int > > > +xfs_attr_rmtval_find_space( > > > + struct xfs_delattr_context *dac) > > > +{ > > > + struct xfs_da_args *args = dac->da_args; > > > + struct xfs_bmbt_irec *map = &dac->map; > > > + int error; > > > + > > > + dac->lblkno = 0; > > > + dac->blkcnt = 0; > > > + args->rmtblkcnt = 0; > > > + args->rmtblkno = 0; > > > + memset(map, 0, sizeof(struct xfs_bmbt_irec)); > > > + > > > + error = xfs_attr_rmt_find_hole(args); > > > + if (error) > > > + return error; > > > + > > > + dac->blkcnt = args->rmtblkcnt; > > > + dac->lblkno = args->rmtblkno; > > > + > > > + return 0; > > > +} > > > + > > > +/* > > > + * Write one block of the value associated with an attribute into the > > > + * out-of-line buffer that we have defined for it. This is similar to a subset > > > + * of xfs_attr_rmtval_set, but records the current block to the delayed attr > > > + * context, and leaves transaction handling to the caller. > > > + */ > > > +int > > > +xfs_attr_rmtval_set_blk( > > > + struct xfs_delattr_context *dac) > > > +{ > > > + struct xfs_da_args *args = dac->da_args; > > > + struct xfs_inode *dp = args->dp; > > > + struct xfs_bmbt_irec *map = &dac->map; > > > + int nmap; > > > + int error; > > > + > > > + nmap = 1; > > > + error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)dac->lblkno, > > > + dac->blkcnt, XFS_BMAPI_ATTRFORK, args->total, > > > + map, &nmap); > > > + if (error) > > > + return error; > > > + > > > + ASSERT(nmap == 1); > > > + ASSERT((map->br_startblock != DELAYSTARTBLOCK) && > > > + (map->br_startblock != HOLESTARTBLOCK)); > > > + > > > + /* roll attribute extent map forwards */ > > > + dac->lblkno += map->br_blockcount; > > > + dac->blkcnt -= map->br_blockcount; > > > + > > > + return 0; > > > +} > > > + > > > +/* > > > * Remove the value associated with an attribute by deleting the > > > * out-of-line buffer that it is stored on. > > > */ > > > @@ -669,37 +732,6 @@ xfs_attr_rmtval_invalidate( > > > } > > > /* > > > - * Remove the value associated with an attribute by deleting the > > > - * out-of-line buffer that it is stored on. > > > - */ > > > -int > > > -xfs_attr_rmtval_remove( > > > - struct xfs_da_args *args) > > > -{ > > > - int error; > > > - struct xfs_delattr_context dac = { > > > - .da_args = args, > > > - }; > > > - > > > - trace_xfs_attr_rmtval_remove(args); > > > - > > > - /* > > > - * Keep de-allocating extents until the remote-value region is gone. > > > - */ > > > - do { > > > - error = __xfs_attr_rmtval_remove(&dac); > > > - if (error != -EAGAIN) > > > - break; > > > - > > > - error = xfs_attr_trans_roll(&dac); > > > - if (error) > > > - return error; > > > - } while (true); > > > - > > > - return error; > > > -} > > > - > > > -/* > > > * Remove the value associated with an attribute by deleting the out-of-line > > > * buffer that it is stored on. Returns -EAGAIN for the caller to refresh the > > > * transaction and re-call the function > > > diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h > > > index 002fd30..8ad68d5 100644 > > > --- a/fs/xfs/libxfs/xfs_attr_remote.h > > > +++ b/fs/xfs/libxfs/xfs_attr_remote.h > > > @@ -10,9 +10,12 @@ int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); > > > int xfs_attr_rmtval_get(struct xfs_da_args *args); > > > int xfs_attr_rmtval_set(struct xfs_da_args *args); > > > -int xfs_attr_rmtval_remove(struct xfs_da_args *args); > > > int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, > > > xfs_buf_flags_t incore_flags); > > > int xfs_attr_rmtval_invalidate(struct xfs_da_args *args); > > > int __xfs_attr_rmtval_remove(struct xfs_delattr_context *dac); > > > +int xfs_attr_rmt_find_hole(struct xfs_da_args *args); > > > +int xfs_attr_rmtval_set_value(struct xfs_da_args *args); > > > +int xfs_attr_rmtval_set_blk(struct xfs_delattr_context *dac); > > > +int xfs_attr_rmtval_find_space(struct xfs_delattr_context *dac); > > > #endif /* __XFS_ATTR_REMOTE_H__ */ > > > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > > > index e74bbb6..0c16d46 100644 > > > --- a/fs/xfs/xfs_trace.h > > > +++ b/fs/xfs/xfs_trace.h > > > @@ -1944,7 +1944,6 @@ DEFINE_ATTR_EVENT(xfs_attr_refillstate); > > > DEFINE_ATTR_EVENT(xfs_attr_rmtval_get); > > > DEFINE_ATTR_EVENT(xfs_attr_rmtval_set); > > > -DEFINE_ATTR_EVENT(xfs_attr_rmtval_remove); > > > #define DEFINE_DA_EVENT(name) \ > > > DEFINE_EVENT(xfs_da_class, name, \ > > > -- > > > 2.7.4 > > > > > >