On Sat, Jun 11, 2022 at 02:41:54AM -0700, Allison Henderson wrote: > This patch modifies xfs_link to add a parent pointer to the inode. > > [bfoster: rebase, use VFS inode fields, fix xfs_bmap_finish() usage] > [achender: rebased, changed __unint32_t to xfs_dir2_dataptr_t, > fixed null pointer bugs] > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > Signed-off-by: Allison Henderson <allison.henderson@xxxxxxxxxx> > --- > fs/xfs/xfs_inode.c | 78 ++++++++++++++++++++++++++++++++++++---------- > fs/xfs/xfs_trans.c | 7 +++-- > fs/xfs/xfs_trans.h | 2 +- > 3 files changed, 67 insertions(+), 20 deletions(-) > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index 6b1e4cb11b5c..41c58df8e568 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c > @@ -1254,14 +1254,28 @@ xfs_create_tmpfile( > > int > xfs_link( > - xfs_inode_t *tdp, > - xfs_inode_t *sip, > - struct xfs_name *target_name) > -{ > - xfs_mount_t *mp = tdp->i_mount; > - xfs_trans_t *tp; > - int error, nospace_error = 0; > - int resblks; > + xfs_inode_t *tdp, > + xfs_inode_t *sip, > + struct xfs_name *target_name) > +{ > + xfs_mount_t *mp = tdp->i_mount; > + xfs_trans_t *tp; > + int error, nospace_error = 0; > + int resblks; > + struct xfs_parent_name_rec rec; > + xfs_dir2_dataptr_t diroffset; > + > + struct xfs_da_args args = { > + .dp = sip, > + .geo = mp->m_attr_geo, > + .whichfork = XFS_ATTR_FORK, > + .attr_filter = XFS_ATTR_PARENT, > + .op_flags = XFS_DA_OP_OKNOENT, > + .name = (const uint8_t *)&rec, > + .namelen = sizeof(rec), > + .value = (void *)target_name->name, > + .valuelen = target_name->len, > + }; Now that I've had a bit of a think about this, this pattern of placing the rec on the stack and then using it as a buffer that is then accessed in xfs_tran_commit() processing feels like a landmine. That is, we pass transaction contexts around functions as they are largely independent constructs, but adding this stack variable to the defer ops attached to the transaction means that the transaction cannot be passed back to a caller for it to be committed - that will corrupt the stack buffer and hence silently corrupt the parent attr that is going to be logged when the transaction is finally committed. Hence I think this needs to be wrapped up as a dynamically allocated structure that is freed when the defer ops are done with it. e.g. struct xfs_parent_defer { struct xfs_parent_name_rec rec; xfs_dir2_dataptr_t diroffset; struct xfs_da_args args; }; and then here: > > trace_xfs_link(tdp, target_name); > > @@ -1278,11 +1292,17 @@ xfs_link( > if (error) > goto std_return; > > + if (xfs_has_larp(mp)) { > + error = xfs_attr_grab_log_assist(mp); > + if (error) > + goto std_return; > + } struct xfs_parent_defer *parent = NULL; ..... error = xfs_parent_init(mp, target_name, &parent); if (error) goto std_return; and xfs_parent_init() looks something like this: int xfs_parent_init( ..... struct xfs_parent_defer **parentp) { struct xfs_parent_defer *parent; if (!xfs_has_parent_pointers(mp)) return 0; error = xfs_attr_grab_log_assist(mp); if (error) return error; parent = kzalloc(sizeof(*parent), GFP_KERNEL); if (!parent) return -ENOMEM; /* init parent da_args */ *parentp = parent; return 0; } With that in place, we then can wrap all this up: > > + /* > + * If we have parent pointers, we now need to add the parent record to > + * the attribute fork of the inode. If this is the initial parent > + * attribute, we need to create it correctly, otherwise we can just add > + * the parent to the inode. > + */ > + if (xfs_sb_version_hasparent(&mp->m_sb)) { > + args.trans = tp; > + xfs_init_parent_name_rec(&rec, tdp, diroffset); > + args.hashval = xfs_da_hashname(args.name, > + args.namelen); > + error = xfs_attr_defer_add(&args); > + if (error) > + goto out_defer_cancel; > + } with: if (parent) { error = xfs_parent_defer_add(tp, tdp, parent, diroffset); if (error) goto out_defer_cancel; } and implement it something like: int xfs_parent_defer_add( struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_parent_defer *parent, xfs_dir2_dataptr_t diroffset) { struct xfs_da_args *args = &parent->args; xfs_init_parent_name_rec(&parent->rec, ip, diroffset) args->trans = tp; args->hashval = xfs_da_hashname(args->name, args->namelen); return xfs_attr_defer_add(args); } > + > /* > * If this is a synchronous mount, make sure that the > * link transaction goes to disk before returning to > @@ -1331,11 +1367,21 @@ xfs_link( > if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) > xfs_trans_set_sync(tp); > > - return xfs_trans_commit(tp); > + error = xfs_trans_commit(tp); > + xfs_iunlock(tdp, XFS_ILOCK_EXCL); > + xfs_iunlock(sip, XFS_ILOCK_EXCL); with a xfs_parent_free(parent) added here now that we are done with the parent update. > + return error; > > - error_return: > +out_defer_cancel: > + xfs_defer_cancel(tp); > +error_return: > xfs_trans_cancel(tp); > - std_return: > + xfs_iunlock(tdp, XFS_ILOCK_EXCL); > + xfs_iunlock(sip, XFS_ILOCK_EXCL); > +drop_incompat: > + if (xfs_has_larp(mp)) > + xlog_drop_incompat_feat(mp->m_log); And this can be replace with xfs_parent_cancel(mp, parent); that drops the log incompat featuer and frees the parent if it is not null. > +std_return: > if (error == -ENOSPC && nospace_error) > error = nospace_error; > return error; > @@ -2819,7 +2865,7 @@ xfs_remove( > */ > resblks = XFS_REMOVE_SPACE_RES(mp); > error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks, > - &tp, &dontcare); > + &tp, &dontcare, XFS_ILOCK_EXCL); So you add this flag here so that link and remove can do different things in xfs_trans_alloc_dir(), but in the very next patch this gets changed to zero, so both callers only pass 0 to the function. Ideally there should be a patch prior to this one that converts the locking and joining of both link and remove to use external inode locking in a single patch, similar to the change in the second patch that changed the inode locking around xfs_init_new_inode() to require manual unlock. Then all the locking mods in this and the next patch go away, leaving just the parent pointer mods in this patch.... Cheers, Dave. -- Dave Chinner david@xxxxxxxxxxxxx