On Wed, Aug 26, 2020 at 05:35:14PM -0700, Allison Collins wrote: > Currently attributes are modified directly across one or more > transactions. But they are not logged or replayed in the event of an > error. The goal of delayed attributes is to enable logging and replaying > of attribute operations using the existing delayed operations > infrastructure. This will later enable the attributes to become part of > larger multi part operations that also must first be recorded to the > log. This is mostly of interest in the scheme of parent pointers which > would need to maintain an attribute containing parent inode information > any time an inode is moved, created, or removed. Parent pointers would > then be of interest to any feature that would need to quickly derive an > inode path from the mount point. Online scrub, nfs lookups and fs grow > or shrink operations are all features that could take advantage of this. > > This patch adds two new log item types for setting or removing > attributes as deferred operations. The xfs_attri_log_item logs an > intent to set or remove an attribute. The corresponding > xfs_attrd_log_item holds a reference to the xfs_attri_log_item and is > freed once the transaction is done. Both log items use a generic > xfs_attr_log_format structure that contains the attribute name, value, > flags, inode, and an op_flag that indicates if the operations is a set > or remove. > > Signed-off-by: Allison Collins <allison.henderson@xxxxxxxxxx> > --- > fs/xfs/Makefile | 1 + > fs/xfs/libxfs/xfs_attr.c | 7 +- > fs/xfs/libxfs/xfs_attr.h | 39 ++ > fs/xfs/libxfs/xfs_defer.c | 1 + > fs/xfs/libxfs/xfs_defer.h | 3 + > fs/xfs/libxfs/xfs_log_format.h | 44 ++- > fs/xfs/libxfs/xfs_log_recover.h | 2 + > fs/xfs/libxfs/xfs_types.h | 1 + > fs/xfs/scrub/common.c | 2 + > fs/xfs/xfs_acl.c | 2 + > fs/xfs/xfs_attr_item.c | 829 ++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_attr_item.h | 76 ++++ > fs/xfs/xfs_attr_list.c | 1 + > fs/xfs/xfs_ioctl.c | 2 + > fs/xfs/xfs_ioctl32.c | 2 + > fs/xfs/xfs_iops.c | 2 + > fs/xfs/xfs_log.c | 4 + > fs/xfs/xfs_log_recover.c | 2 + > fs/xfs/xfs_ondisk.h | 2 + > fs/xfs/xfs_xattr.c | 1 + > 20 files changed, 1017 insertions(+), 6 deletions(-) > > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile > index 04611a1..b056cfc 100644 > --- a/fs/xfs/Makefile > +++ b/fs/xfs/Makefile > @@ -102,6 +102,7 @@ xfs-y += xfs_log.o \ > xfs_buf_item_recover.o \ > xfs_dquot_item_recover.o \ > xfs_extfree_item.o \ > + xfs_attr_item.o \ > xfs_icreate_item.o \ > xfs_inode_item.o \ > xfs_inode_item_recover.o \ > diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c > index a8cfe62..cf75742 100644 > --- a/fs/xfs/libxfs/xfs_attr.c > +++ b/fs/xfs/libxfs/xfs_attr.c > @@ -24,6 +24,7 @@ > #include "xfs_quota.h" > #include "xfs_trans_space.h" > #include "xfs_trace.h" > +#include "xfs_attr_item.h" > > /* > * xfs_attr.c > @@ -59,8 +60,6 @@ STATIC int xfs_attr_node_hasname(xfs_da_args_t *args, > STATIC int xfs_attr_fillstate(xfs_da_state_t *state); > STATIC int xfs_attr_refillstate(xfs_da_state_t *state); > STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp); > -STATIC int xfs_attr_set_iter(struct xfs_delattr_context *dac, > - struct xfs_buf **leaf_bp); > > int > xfs_inode_hasattr( > @@ -142,7 +141,7 @@ xfs_attr_get( > /* > * Calculate how many blocks we need for the new attribute, > */ > -STATIC int > +int > xfs_attr_calc_size( > struct xfs_da_args *args, > int *local) > @@ -327,7 +326,7 @@ xfs_attr_set_args( > * to handle this, and recall the function until a successful error code is > * returned. > */ > -STATIC int > +int > xfs_attr_set_iter( > struct xfs_delattr_context *dac, > struct xfs_buf **leaf_bp) > diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h > index 4f6bba8..23b8308 100644 > --- a/fs/xfs/libxfs/xfs_attr.h > +++ b/fs/xfs/libxfs/xfs_attr.h > @@ -247,6 +247,7 @@ enum xfs_delattr_state { > #define XFS_DAC_DEFER_FINISH 0x01 /* finish the transaction */ > #define XFS_DAC_NODE_RMVNAME_INIT 0x02 /* xfs_attr_node_removename init */ > #define XFS_DAC_LEAF_ADDNAME_INIT 0x04 /* xfs_attr_leaf_addname init*/ > +#define XFS_DAC_DELAYED_OP_INIT 0x08 /* delayed operations init*/ > > /* > * Context used for keeping track of delayed attribute operations > @@ -254,6 +255,9 @@ enum xfs_delattr_state { > struct xfs_delattr_context { > struct xfs_da_args *da_args; > > + /* Used by delayed attributes to hold leaf across transactions */ > + struct xfs_buf *leaf_bp; > + > /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */ > struct xfs_bmbt_irec map; > xfs_dablk_t lblkno; > @@ -268,6 +272,38 @@ struct xfs_delattr_context { > enum xfs_delattr_state dela_state; > }; I'll start by pasting in the full xfs_delattr_context definition for easier reading: /* * Context used for keeping track of delayed attribute operations */ struct xfs_delattr_context { struct xfs_da_args *da_args; /* Used by delayed attributes to hold leaf across transactions */ struct xfs_buf *leaf_bp; /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */ struct xfs_bmbt_irec map; xfs_dablk_t lblkno; int blkcnt; /* Used in xfs_attr_node_removename to roll through removing blocks */ struct xfs_da_state *da_state; struct xfs_da_state_blk *blk; /* Used to keep track of current state of delayed operation */ unsigned int flags; enum xfs_delattr_state dela_state; }; Admittedly, I /am/ conducting a backwards review and zeroing in on the data structures first. > +/* > + * List of attrs to commit later. > + */ > +struct xfs_attr_item { > + struct xfs_inode *xattri_ip; > + void *xattri_value; /* attr value */ > + void *xattri_name; /* attr name */ > + uint32_t xattri_op_flags; /* attr op set or rm */ > + uint32_t xattri_value_len; /* length of value */ > + uint32_t xattri_name_len; /* length of name */ > + uint32_t xattri_flags; /* attr flags */ > + > + /* used to log this item to an intent */ > + struct list_head xattri_list; > + > + /* > + * xfs_delattr_context and xfs_da_args need to remain instantiated > + * across transaction rolls during the defer finish, so store them here > + */ > + struct xfs_da_args xattri_args; > + struct xfs_delattr_context xattri_dac; > + > + /* > + * A byte array follows the header containing the file name and > + * attribute value. > + */ > +}; These two structures (xfs_delattr_context and xfs_attr_item) duplicate a lot of information considering that they both track incore state during an xattr set/remove operation. There's also a lot of duplication between the do-while loop in xfs_attr_set_args and the inner loop of the defer attr set code. To make sure I'm understanding this correctly, let me start by repeating back to you what I think is the code flow through the hasdelattr path and then the !hasdelattr path. Let's call the hasdelattr path (A). First, the caller allocates an xfs_da_args structure and partially initializes it with dp, attr_filter, attr_flags, name, namelen, value, and valuelen set appropriately for the operation it wants. The rest of the struct should be zeroed, because the uninitialized parts are internal state. Second, the *args are passed to xfs_attr_set, which after setting up a transaction calls xfs_attr_set_deferred. This calls xfs_attr_item_init to allocate and initialize a struct xfs_attr_item with dp, name, namelen, attr_filter, value, and valuelen, and passes this incore state tracking structure to the defer ops machinery. Third, the defer ops machinery calls xfs_attr_finish_item to deal with the attr request. If the xfs_delattr_context within the xfs_attr_item is uninitialized it willl set the xfs_da_args state that's within the xfs_attr_item to the values already stored in the xfs_attr_item. Fourth, xfs_attr_finish_item calls xfs_trans_attr to dispatch based on op_flags. For setting, this means we call xfs_attr_set_iter. Fifth, xfs_attr_set_iter dispatches functions based on whatever dela_state in the delattr_context is set to. The functions it calls can set DAC_DEFER_FINISH and/or return -EAGAIN to signal the defer ops machinery that it needs to roll the transaction so that we can repeat steps 3-5 until we're done. The defer ops machinery ought to honor DEFER_FINISH and complete whatever work items we've put on the queue, but... it's buggy and doesn't. I'll come back to this later. Sixth, once we're done, we return out to xfs_attr_set to commit the transaction and exit. Did I understand that correctly? If so, I'll move on to the !hasdelattr case, which we'll call (B). First, the caller allocates an xfs_da_args structure and partially initializes it with dp, attr_filter, attr_flags, name, namelen, value, and valuelen set appropriately for the operation it wants. The rest of the struct should be zeroed, because the uninitialized parts are internal state. This is the same as step A1 above. Second, the *args are passed to xfs_attr_set, which after setting up a transaction calls xfs_attr_set_args. This calls xfs_attr_set_iter, which is the dela_state function dispatcher mentioned in step A5 above. The functions it calls can set DAC_DEFER_FINISH to signal to xfs_attr_set_args that it needs to complete whatever work items we've attached to the transaction. They can also return -EAGAIN to signal to xfs_attr_set_args that it's time to roll the transaction. Third, once we're done, we return out of xfs_attr_set, same as step A6 above. Assuming I understood those two code paths correctly, I'll move on to the attr item recovery case. Call this (C). First, xfs_attri_item_recover is called with a recovered incore log item. It allocates an xfs_da_args and fills out most of the same fields that xfs_attr_set does in A1-A2 and B1-B2 above; and then it allocates a transaction. Second, _recover has its own while loop(!) to call xfs_trans_attr, which calls xfs_attr_set_iter, sort of like what A4 does. I'll come back to this later as well. Third, xfs_attr_set_iter uses dela_state to dispatch functions, similar to what A5 does above. If those functions set DAC_DEFER_FINISH or return -EAGAIN, we'll pass that out to xfs_attr_set_iter to get the transaction rolled so we can move on to the next state. Fourth, when the loop is done we commit the transaction and move on with whatever is next in log recovery. Does that sound right? If so, let's move on to the issues I noted above. I think the first problem is that this patchset adds two more xattr operation state structures. Current xfs_da_args store both the operation arguments (inode, name, value, other flags) and most of the state of the operation (whichfork, hashval, geo, block indices, rmt block indices). The series then adds a xfs_delattr_context that holds more state that needs to survive a transaction roll (leafbp, rmt mappings, da btree state, and dela_state). Then, it adds yet another xfs_attr_item that contains its own xfs_da_args and xfs_delattr_context, and has a bunch more fields xattri_(ip, value, name, opflags, value) that duplicate the fields that already exist in xfs_da_args. This is hard to follow. I don't know what's the difference between xfs_attr_item.xattri_name and xfs_attr_item.xattri_args.name, and I suspect this makes xfs_attr_item much larger than it needs to be. Question 1: Can we break up struct xfs_da_args? Right now its field definition is the union set of everything needed to track both a directory operation and an xattr operation. What do you think of creating separate xfs_dirop_state and xfs_attrop_state structures that each embed an xfs_da_args, and then move the dir and attr-specific pieces out of xfs_da_args and into xfs_{dir,attr}op_state as appropriate? I think Christoph has suggested this elsewhere on the list in the past. (Note that xfs_da_state is its own separate thing for dealing with dabtree operations; that doesn't change.) Question 2: Should we revise the arguments to xfs_attr_[gs]et? Right now the callers of these functions have to initialize the entire xfs_da_state structure even though they only care about 7 of the 26 fields. What do you think of changing the xfs_attr_[gs]et function declarations to pass in the 7 arguments directly? Or you could create a new arguments struct? If you did that, then xfs_args_[gs]et would be responsible for allocating and initializing their internal state. This is cleaner interface-wise, and leads me into... Question 3: Instead of creating separate xfs_delattr_context andxfs_attr_item structs, can you put all the stuff those structures track into xfs_attrop_state? I sense that the duplication and pointer indirection in _delattr_context and _attr_item might be a result of it not being all that clear where the xfs_da_args is actually allocated, and therefore the scoping rules. Would all that be clearer if all the new state was thrown into the same xfs_attrop_state that we dynamically allocate at the start of xfs_attr_[gs]et()? (Yes, this question's existence depends on your answer to Q2.) Question 4: Does xfs_attr_item_init need to allocate space to hold the name and value buffers when it is called from xfs_attr_set? xfs_attr_set does not return until we're completely finished with the deferred xattr processing, which means that the buffers passed into xfs_attr_set cannot go out of scope, right? (I think you /do/ need to allocate separate buffers for log recovery.) My second set of questions revolve around the duplication of attr operation loops between xfs_attr_set_args() and the defer ops code. AFAICT there's no reason to have xfs_attr_set_args, since there is no requirement in the deferred ops machinery to create log intent or log done items. Question 5: Instead of open-coding a do {attrset roll hold} loop in xfs_attr_set_args, what do you think about setting up the deferred op code (xfs_attr_defer_type and the functions assigned to it in patch 4) to do that from the start? By adding the defer op code early, patch 2 would create xfs_attr_set_iter as it does now, and xfs_attr_finish_item would call it directly. Since there's no log item defined yet, the other defer ops functions (create_intent, abort_intent, create_done) can return NULL log item pointers. Once you get to the point whre you have defined the log items, you can add in all the other log item handling (i.e. xfs_attr[id]_item_ops). As an example of a defer op that optionally records changes to its incore operation state with log items, see xfs_swapext_defer_type[1]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/commit/?h=atomic-file-updates&id=53c7233842969347174e8d68c8486dbf3efb734c Moving along to the DEFER_FINISH question that I said I'd get back to later -- there's a subtle difference to the order in which deferred log items that are created while trying to make progress on an xattr op are finished. This is due to a design wart of the original defer ops machinery, and Brian and I have discussed this previously. In a nutshell, let's pretend that step 1 of an xattr operation creates new deferred ops ABCD and step 2 creates new deferred ops EFGH. Let's also pretend that step 1 and step 2 both set DEFER_FINISH. In the !delattr case, xfs_attr_set_args -> xfs_attr_trans_roll will run step 1, process A->B->C->D, roll, run step 2, and then process E->F->G->H and commit. In the delattr case, however, the defer ops machinery shoves all the new defer ops to the end of the queue, which means that we run step 1, roll, run step 2, and then run A->B->C->D->E->F->G->H and commit. I would like to fix that, since it seems more logical to me that you'd finish A-D before moving on to the second phase; and the atomic swapext code is going to require that. Question 6: So, uh, can you go have a look at the latest patches[2]? I'll post them soon if I can get past the bigtime review. I don't think this wart of the defer ops mechanism affects your patchset, but you know how deferred attrs work better than I. :) [2] https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/log/?h=defer-ops-stalls I also had a couple questions (observations?) about how log recovery works for attr items, because I noticed that xfs_attri_item_recover also has a do {attrset, roll} loop. HAH, I just realized (while writing Q7) that xfs_defer_move needs to log intent items for each newly scheduled work item because if log recovery crashes after finishing the existing intent items but before it gets to the new intent items, the next attempt at log recovery will not see the missing intents and will /never/ even be aware that it should have finished a chain. That leads to fs corruption! So that series has more work to do, and you can set Q6 aside for now. Question 7: Why is there a do {attrset, roll} loop in the recovery routine? Log intent item recovery functions are only supposed to complete a single transaction's worth of work. If there's more work to do, the recovery function should attach a new defer ops item to the transaction to schedule the rest of the work, and use xfs_defer_move to attach the list of new defer ops to *parent_tp. The reason for this is that log recovery has to finish every unfinished intent item that was in the log before it can move on to new log items that were created as a result of recovering log items. Ok, that's probably enough questions for now. --D > + > +#define XFS_ATTR_ITEM_SIZEOF(namelen, valuelen) \ > + (sizeof(struct xfs_attr_item) + (namelen) + (valuelen)) > + > + > /*======================================================================== > * Function prototypes for the kernel. > *========================================================================*/ > @@ -283,11 +319,14 @@ int xfs_attr_get_ilocked(struct xfs_da_args *args); > int xfs_attr_get(struct xfs_da_args *args); > int xfs_attr_set(struct xfs_da_args *args); > int xfs_attr_set_args(struct xfs_da_args *args); > +int xfs_attr_set_iter(struct xfs_delattr_context *dac, > + struct xfs_buf **leaf_bp); > int xfs_has_attr(struct xfs_da_args *args); > int xfs_attr_remove_args(struct xfs_da_args *args); > int xfs_attr_remove_iter(struct xfs_delattr_context *dac); > bool xfs_attr_namecheck(const void *name, size_t length); > void xfs_delattr_context_init(struct xfs_delattr_context *dac, > struct xfs_da_args *args); > +int xfs_attr_calc_size(struct xfs_da_args *args, int *local); > > #endif /* __XFS_ATTR_H__ */ > diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c > index d8f5862..4392279 100644 > --- a/fs/xfs/libxfs/xfs_defer.c > +++ b/fs/xfs/libxfs/xfs_defer.c > @@ -176,6 +176,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = { > [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type, > [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type, > [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type, > + [XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type, > }; > > static void > diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h > index 6b2ca58..193d3bb 100644 > --- a/fs/xfs/libxfs/xfs_defer.h > +++ b/fs/xfs/libxfs/xfs_defer.h > @@ -18,6 +18,7 @@ enum xfs_defer_ops_type { > XFS_DEFER_OPS_TYPE_RMAP, > XFS_DEFER_OPS_TYPE_FREE, > XFS_DEFER_OPS_TYPE_AGFL_FREE, > + XFS_DEFER_OPS_TYPE_ATTR, > XFS_DEFER_OPS_TYPE_MAX, > }; > > @@ -62,5 +63,7 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type; > extern const struct xfs_defer_op_type xfs_rmap_update_defer_type; > extern const struct xfs_defer_op_type xfs_extent_free_defer_type; > extern const struct xfs_defer_op_type xfs_agfl_free_defer_type; > +extern const struct xfs_defer_op_type xfs_attr_defer_type; > + > > #endif /* __XFS_DEFER_H__ */ > diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h > index e3400c9..33b26b6 100644 > --- a/fs/xfs/libxfs/xfs_log_format.h > +++ b/fs/xfs/libxfs/xfs_log_format.h > @@ -117,7 +117,12 @@ struct xfs_unmount_log_format { > #define XLOG_REG_TYPE_CUD_FORMAT 24 > #define XLOG_REG_TYPE_BUI_FORMAT 25 > #define XLOG_REG_TYPE_BUD_FORMAT 26 > -#define XLOG_REG_TYPE_MAX 26 > +#define XLOG_REG_TYPE_ATTRI_FORMAT 27 > +#define XLOG_REG_TYPE_ATTRD_FORMAT 28 > +#define XLOG_REG_TYPE_ATTR_NAME 29 > +#define XLOG_REG_TYPE_ATTR_VALUE 30 > +#define XLOG_REG_TYPE_MAX 30 > + > > /* > * Flags to log operation header > @@ -240,6 +245,8 @@ typedef struct xfs_trans_header { > #define XFS_LI_CUD 0x1243 > #define XFS_LI_BUI 0x1244 /* bmbt update intent */ > #define XFS_LI_BUD 0x1245 > +#define XFS_LI_ATTRI 0x1246 /* attr set/remove intent*/ > +#define XFS_LI_ATTRD 0x1247 /* attr set/remove done */ > > #define XFS_LI_TYPE_DESC \ > { XFS_LI_EFI, "XFS_LI_EFI" }, \ > @@ -255,7 +262,9 @@ typedef struct xfs_trans_header { > { XFS_LI_CUI, "XFS_LI_CUI" }, \ > { XFS_LI_CUD, "XFS_LI_CUD" }, \ > { XFS_LI_BUI, "XFS_LI_BUI" }, \ > - { XFS_LI_BUD, "XFS_LI_BUD" } > + { XFS_LI_BUD, "XFS_LI_BUD" }, \ > + { XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \ > + { XFS_LI_ATTRD, "XFS_LI_ATTRD" } > > /* > * Inode Log Item Format definitions. > @@ -860,4 +869,35 @@ struct xfs_icreate_log { > __be32 icl_gen; /* inode generation number to use */ > }; > > +/* > + * Flags for deferred attribute operations. > + * Upper bits are flags, lower byte is type code > + */ > +#define XFS_ATTR_OP_FLAGS_SET 1 /* Set the attribute */ > +#define XFS_ATTR_OP_FLAGS_REMOVE 2 /* Remove the attribute */ > +#define XFS_ATTR_OP_FLAGS_TYPE_MASK 0x0FF /* Flags type mask */ > + > +/* > + * This is the structure used to lay out an attr log item in the > + * log. > + */ > +struct xfs_attri_log_format { > + uint16_t alfi_type; /* attri log item type */ > + uint16_t alfi_size; /* size of this item */ > + uint32_t __pad; /* pad to 64 bit aligned */ > + uint64_t alfi_id; /* attri identifier */ > + xfs_ino_t alfi_ino; /* the inode for this attr operation */ > + uint32_t alfi_op_flags; /* marks the op as a set or remove */ > + uint32_t alfi_name_len; /* attr name length */ > + uint32_t alfi_value_len; /* attr value length */ > + uint32_t alfi_attr_flags;/* attr flags */ > +}; > + > +struct xfs_attrd_log_format { > + uint16_t alfd_type; /* attrd log item type */ > + uint16_t alfd_size; /* size of this item */ > + uint32_t __pad; /* pad to 64 bit aligned */ > + uint64_t alfd_alf_id; /* id of corresponding attrd */ > +}; > + > #endif /* __XFS_LOG_FORMAT_H__ */ > diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h > index 641132d..b0b8e94 100644 > --- a/fs/xfs/libxfs/xfs_log_recover.h > +++ b/fs/xfs/libxfs/xfs_log_recover.h > @@ -72,6 +72,8 @@ extern const struct xlog_recover_item_ops xlog_rui_item_ops; > extern const struct xlog_recover_item_ops xlog_rud_item_ops; > extern const struct xlog_recover_item_ops xlog_cui_item_ops; > extern const struct xlog_recover_item_ops xlog_cud_item_ops; > +extern const struct xlog_recover_item_ops xlog_attri_item_ops; > +extern const struct xlog_recover_item_ops xlog_attrd_item_ops; > > /* > * Macros, structures, prototypes for internal log manager use. > diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h > index 397d947..860cdd2 100644 > --- a/fs/xfs/libxfs/xfs_types.h > +++ b/fs/xfs/libxfs/xfs_types.h > @@ -11,6 +11,7 @@ typedef uint32_t prid_t; /* project ID */ > typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ > typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ > typedef uint32_t xfs_extlen_t; /* extent length in blocks */ > +typedef uint32_t xfs_attrlen_t; /* attr length */ > typedef uint32_t xfs_agnumber_t; /* allocation group number */ > typedef int32_t xfs_extnum_t; /* # of extents in a file */ > typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */ > diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c > index 1887605..9a649d1 100644 > --- a/fs/xfs/scrub/common.c > +++ b/fs/xfs/scrub/common.c > @@ -24,6 +24,8 @@ > #include "xfs_rmap_btree.h" > #include "xfs_log.h" > #include "xfs_trans_priv.h" > +#include "xfs_da_format.h" > +#include "xfs_da_btree.h" > #include "xfs_attr.h" > #include "xfs_reflink.h" > #include "scrub/scrub.h" > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c > index d4c687b5c..2fa173a 100644 > --- a/fs/xfs/xfs_acl.c > +++ b/fs/xfs/xfs_acl.c > @@ -10,6 +10,8 @@ > #include "xfs_trans_resv.h" > #include "xfs_mount.h" > #include "xfs_inode.h" > +#include "xfs_da_format.h" > +#include "xfs_da_btree.h" > #include "xfs_attr.h" > #include "xfs_trace.h" > #include "xfs_error.h" > diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c > new file mode 100644 > index 0000000..923c288 > --- /dev/null > +++ b/fs/xfs/xfs_attr_item.c > @@ -0,0 +1,829 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (C) 2019 Oracle. All Rights Reserved. > + * Author: Allison Collins <allison.henderson@xxxxxxxxxx> > + */ > + > +#include "xfs.h" > +#include "xfs_fs.h" > +#include "xfs_format.h" > +#include "xfs_log_format.h" > +#include "xfs_trans_resv.h" > +#include "xfs_bit.h" > +#include "xfs_shared.h" > +#include "xfs_mount.h" > +#include "xfs_defer.h" > +#include "xfs_trans.h" > +#include "xfs_trans_priv.h" > +#include "xfs_buf_item.h" > +#include "xfs_attr_item.h" > +#include "xfs_log.h" > +#include "xfs_btree.h" > +#include "xfs_rmap.h" > +#include "xfs_inode.h" > +#include "xfs_icache.h" > +#include "xfs_da_format.h" > +#include "xfs_da_btree.h" > +#include "xfs_attr.h" > +#include "xfs_shared.h" > +#include "xfs_attr_item.h" > +#include "xfs_alloc.h" > +#include "xfs_bmap.h" > +#include "xfs_trace.h" > +#include "libxfs/xfs_da_format.h" > +#include "xfs_inode.h" > +#include "xfs_quota.h" > +#include "xfs_log_priv.h" > +#include "xfs_log_recover.h" > + > +static const struct xfs_item_ops xfs_attri_item_ops; > +static const struct xfs_item_ops xfs_attrd_item_ops; > + > +static inline struct xfs_attri_log_item *ATTRI_ITEM(struct xfs_log_item *lip) > +{ > + return container_of(lip, struct xfs_attri_log_item, attri_item); > +} > + > +STATIC void > +xfs_attri_item_free( > + struct xfs_attri_log_item *attrip) > +{ > + kmem_free(attrip->attri_item.li_lv_shadow); > + kmem_free(attrip); > +} > + > +/* > + * Freeing the attrip requires that we remove it from the AIL if it has already > + * been placed there. However, the ATTRI may not yet have been placed in the > + * AIL when called by xfs_attri_release() from ATTRD processing due to the > + * ordering of committed vs unpin operations in bulk insert operations. Hence > + * the reference count to ensure only the last caller frees the ATTRI. > + */ > +STATIC void > +xfs_attri_release( > + struct xfs_attri_log_item *attrip) > +{ > + ASSERT(atomic_read(&attrip->attri_refcount) > 0); > + if (atomic_dec_and_test(&attrip->attri_refcount)) { > + xfs_trans_ail_delete(&attrip->attri_item, > + SHUTDOWN_LOG_IO_ERROR); > + xfs_attri_item_free(attrip); > + } > +} > + > +/* > + * This returns the number of iovecs needed to log the given attri item. We > + * only need 1 iovec for an attri item. It just logs the attr_log_format > + * structure. > + */ > +static inline int > +xfs_attri_item_sizeof( > + struct xfs_attri_log_item *attrip) > +{ > + return sizeof(struct xfs_attri_log_format); > +} > + > +STATIC void > +xfs_attri_item_size( > + struct xfs_log_item *lip, > + int *nvecs, > + int *nbytes) > +{ > + struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); > + > + *nvecs += 1; > + *nbytes += xfs_attri_item_sizeof(attrip); > + > + /* Attr set and remove operations require a name */ > + ASSERT(attrip->attri_name_len > 0); > + > + *nvecs += 1; > + *nbytes += ATTR_NVEC_SIZE(attrip->attri_name_len); > + > + /* > + * Set ops can accept a value of 0 len to clear an attr value. Remove > + * ops do not need a value at all. So only account for the value > + * when it is needed. > + */ > + if (attrip->attri_value_len > 0) { > + *nvecs += 1; > + *nbytes += ATTR_NVEC_SIZE(attrip->attri_value_len); > + } > +} > + > +/* > + * This is called to fill in the log iovecs for the given attri log > + * item. We use 1 iovec for the attri_format_item, 1 for the name, and > + * another for the value if it is present > + */ > +STATIC void > +xfs_attri_item_format( > + struct xfs_log_item *lip, > + struct xfs_log_vec *lv) > +{ > + struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); > + struct xfs_log_iovec *vecp = NULL; > + > + attrip->attri_format.alfi_type = XFS_LI_ATTRI; > + attrip->attri_format.alfi_size = 1; > + > + /* > + * This size accounting must be done before copying the attrip into the > + * iovec. If we do it after, the wrong size will be recorded to the log > + * and we trip across assertion checks for bad region sizes later during > + * the log recovery. > + */ > + > + ASSERT(attrip->attri_name_len > 0); > + attrip->attri_format.alfi_size++; > + > + if (attrip->attri_value_len > 0) > + attrip->attri_format.alfi_size++; > + > + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT, > + &attrip->attri_format, > + xfs_attri_item_sizeof(attrip)); > + if (attrip->attri_name_len > 0) > + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME, > + attrip->attri_name, > + ATTR_NVEC_SIZE(attrip->attri_name_len)); > + > + if (attrip->attri_value_len > 0) > + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE, > + attrip->attri_value, > + ATTR_NVEC_SIZE(attrip->attri_value_len)); > +} > + > +/* > + * The unpin operation is the last place an ATTRI is manipulated in the log. It > + * is either inserted in the AIL or aborted in the event of a log I/O error. In > + * either case, the ATTRI transaction has been successfully committed to make > + * it this far. Therefore, we expect whoever committed the ATTRI to either > + * construct and commit the ATTRD or drop the ATTRD's reference in the event of > + * error. Simply drop the log's ATTRI reference now that the log is done with > + * it. > + */ > +STATIC void > +xfs_attri_item_unpin( > + struct xfs_log_item *lip, > + int remove) > +{ > + struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); > + > + xfs_attri_release(attrip); > +} > + > + > +STATIC void > +xfs_attri_item_release( > + struct xfs_log_item *lip) > +{ > + xfs_attri_release(ATTRI_ITEM(lip)); > +} > + > +/* > + * Allocate and initialize an attri item > + */ > +STATIC struct xfs_attri_log_item * > +xfs_attri_init( > + struct xfs_mount *mp) > + > +{ > + struct xfs_attri_log_item *attrip; > + uint size; > + > + size = (uint)(sizeof(struct xfs_attri_log_item)); > + attrip = kmem_zalloc(size, 0); > + > + xfs_log_item_init(mp, &attrip->attri_item, XFS_LI_ATTRI, > + &xfs_attri_item_ops); > + attrip->attri_format.alfi_id = (uintptr_t)(void *)attrip; > + atomic_set(&attrip->attri_refcount, 2); > + > + return attrip; > +} > + > +/* > + * Copy an attr format buffer from the given buf, and into the destination attr > + * format structure. > + */ > +STATIC int > +xfs_attri_copy_format(struct xfs_log_iovec *buf, > + struct xfs_attri_log_format *dst_attr_fmt) > +{ > + struct xfs_attri_log_format *src_attr_fmt = buf->i_addr; > + uint len = sizeof(struct xfs_attri_log_format); > + > + if (buf->i_len != len) > + return -EFSCORRUPTED; > + > + memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len); > + return 0; > +} > + > +static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip) > +{ > + return container_of(lip, struct xfs_attrd_log_item, attrd_item); > +} > + > +STATIC void > +xfs_attrd_item_free(struct xfs_attrd_log_item *attrdp) > +{ > + kmem_free(attrdp->attrd_item.li_lv_shadow); > + kmem_free(attrdp); > +} > + > +/* > + * This returns the number of iovecs needed to log the given attrd item. > + * We only need 1 iovec for an attrd item. It just logs the attr_log_format > + * structure. > + */ > +static inline int > +xfs_attrd_item_sizeof( > + struct xfs_attrd_log_item *attrdp) > +{ > + return sizeof(struct xfs_attrd_log_format); > +} > + > +STATIC void > +xfs_attrd_item_size( > + struct xfs_log_item *lip, > + int *nvecs, > + int *nbytes) > +{ > + struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); > + *nvecs += 1; > + *nbytes += xfs_attrd_item_sizeof(attrdp); > +} > + > +/* > + * This is called to fill in the log iovecs for the given attrd log item. We use > + * only 1 iovec for the attrd_format, and we point that at the attr_log_format > + * structure embedded in the attrd item. > + */ > +STATIC void > +xfs_attrd_item_format( > + struct xfs_log_item *lip, > + struct xfs_log_vec *lv) > +{ > + struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); > + struct xfs_log_iovec *vecp = NULL; > + > + attrdp->attrd_format.alfd_type = XFS_LI_ATTRD; > + attrdp->attrd_format.alfd_size = 1; > + > + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRD_FORMAT, > + &attrdp->attrd_format, xfs_attrd_item_sizeof(attrdp)); > +} > + > +/* > + * The ATTRD is either committed or aborted if the transaction is cancelled. If > + * the transaction is cancelled, drop our reference to the ATTRI and free the > + * ATTRD. > + */ > +STATIC void > +xfs_attrd_item_release( > + struct xfs_log_item *lip) > +{ > + struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); > + xfs_attri_release(attrdp->attrd_attrip); > + xfs_attrd_item_free(attrdp); > +} > + > +/* > + * Log an ATTRI it to the ATTRD when the attr op is done. An attr operation > + * may be a set or a remove. Note that the transaction is marked dirty > + * regardless of whether the operation succeeds or fails to support the > + * ATTRI/ATTRD lifecycle rules. > + */ > +int > +xfs_trans_attr( > + struct xfs_delattr_context *dac, > + struct xfs_attrd_log_item *attrdp, > + struct xfs_buf **leaf_bp, > + uint32_t op_flags) > +{ > + struct xfs_da_args *args = dac->da_args; > + int error; > + > + error = xfs_qm_dqattach_locked(args->dp, 0); > + if (error) > + return error; > + > + switch (op_flags) { > + case XFS_ATTR_OP_FLAGS_SET: > + args->op_flags |= XFS_DA_OP_ADDNAME; > + error = xfs_attr_set_iter(dac, leaf_bp); > + break; > + case XFS_ATTR_OP_FLAGS_REMOVE: > + ASSERT(XFS_IFORK_Q((args->dp))); > + error = xfs_attr_remove_iter(dac); > + break; > + default: > + error = -EFSCORRUPTED; > + break; > + } > + > + /* > + * Mark the transaction dirty, even on error. This ensures the > + * transaction is aborted, which: > + * > + * 1.) releases the ATTRI and frees the ATTRD > + * 2.) shuts down the filesystem > + */ > + args->trans->t_flags |= XFS_TRANS_DIRTY; > + set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags); > + > + return error; > +} > + > +/* Log an attr to the intent item. */ > +STATIC void > +xfs_attr_log_item( > + struct xfs_trans *tp, > + struct xfs_attri_log_item *attrip, > + struct xfs_attr_item *attr) > +{ > + struct xfs_attri_log_format *attrp; > + char *name_value; > + > + name_value = ((char *)attr) + sizeof(struct xfs_attr_item); > + > + tp->t_flags |= XFS_TRANS_DIRTY; > + set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags); > + > + /* > + * At this point the xfs_attr_item has been constructed, and we've > + * created the log intent. Fill in the attri log item and log format > + * structure with fields from this xfs_attr_item > + */ > + attrp = &attrip->attri_format; > + attrp->alfi_ino = attr->xattri_ip->i_ino; > + attrp->alfi_op_flags = attr->xattri_op_flags; > + attrp->alfi_value_len = attr->xattri_value_len; > + attrp->alfi_name_len = attr->xattri_name_len; > + attrp->alfi_attr_flags = attr->xattri_flags; > + > + attrip->attri_name = name_value; > + attrip->attri_value = &name_value[attr->xattri_name_len]; > + attrip->attri_name_len = attr->xattri_name_len; > + attrip->attri_value_len = attr->xattri_value_len; > +} > + > +/* Get an ATTRI. */ > +static struct xfs_log_item * > +xfs_attr_create_intent( > + struct xfs_trans *tp, > + struct list_head *items, > + unsigned int count, > + bool sort) > +{ > + struct xfs_mount *mp = tp->t_mountp; > + struct xfs_attri_log_item *attrip = xfs_attri_init(mp); > + struct xfs_attr_item *attr; > + > + ASSERT(count == 1); > + > + xfs_trans_add_item(tp, &attrip->attri_item); > + list_for_each_entry(attr, items, xattri_list) > + xfs_attr_log_item(tp, attrip, attr); > + return &attrip->attri_item; > +} > + > +/* Process an attr. */ > +STATIC int > +xfs_attr_finish_item( > + struct xfs_trans *tp, > + struct xfs_log_item *done, > + struct list_head *item, > + struct xfs_btree_cur **state) > +{ > + struct xfs_attr_item *attr; > + int error; > + int local; > + struct xfs_delattr_context *dac; > + struct xfs_da_args *args; > + struct xfs_attrd_log_item *attrdp; > + struct xfs_attri_log_item *attrip; > + > + attr = container_of(item, struct xfs_attr_item, xattri_list); > + dac = &attr->xattri_dac; > + args = &attr->xattri_args; > + > + if (!(dac->flags & XFS_DAC_DELAYED_OP_INIT)) { > + /* Only need to initialize args context once */ > + memset(args, 0, sizeof(*args)); > + args->geo = attr->xattri_ip->i_mount->m_attr_geo; > + args->whichfork = XFS_ATTR_FORK; > + args->dp = attr->xattri_ip; > + args->name = ((const unsigned char *)attr) + > + sizeof(struct xfs_attr_item); > + args->namelen = attr->xattri_name_len; > + args->attr_filter = attr->xattri_flags; > + args->hashval = xfs_da_hashname(args->name, args->namelen); > + args->value = (void *)&args->name[attr->xattri_name_len]; > + args->valuelen = attr->xattri_value_len; > + args->op_flags = XFS_DA_OP_OKNOENT; > + > + /* must match existing transaction block res */ > + args->total = xfs_attr_calc_size(args, &local); > + > + memset(dac, 0, sizeof(struct xfs_delattr_context)); > + dac->flags |= XFS_DAC_DELAYED_OP_INIT; > + dac->da_args = args; > + } > + > + /* > + * Always reset trans after EAGAIN cycle > + * since the transaction is new > + */ > + args->trans = tp; > + > + error = xfs_trans_attr(dac, ATTRD_ITEM(done), &dac->leaf_bp, > + attr->xattri_op_flags); > + /* > + * The attrip refers to xfs_attr_item memory to log the name and value > + * with the intent item. This already occurred when the intent was > + * committed so these fields are no longer accessed. Clear them out of > + * caution since we're about to free the xfs_attr_item. > + */ > + attrdp = (struct xfs_attrd_log_item *)done; > + attrip = attrdp->attrd_attrip; > + attrip->attri_name = NULL; > + attrip->attri_value = NULL; > + > + if (error != -EAGAIN) > + kmem_free(attr); > + > + return error; > +} > + > +/* Abort all pending ATTRs. */ > +STATIC void > +xfs_attr_abort_intent( > + struct xfs_log_item *intent) > +{ > + xfs_attri_release(ATTRI_ITEM(intent)); > +} > + > +/* Cancel an attr */ > +STATIC void > +xfs_attr_cancel_item( > + struct list_head *item) > +{ > + struct xfs_attr_item *attr; > + > + attr = container_of(item, struct xfs_attr_item, xattri_list); > + kmem_free(attr); > +} > + > +/* > + * The ATTRI is logged only once and cannot be moved in the log, so simply > + * return the lsn at which it's been logged. > + */ > +STATIC xfs_lsn_t > +xfs_attri_item_committed( > + struct xfs_log_item *lip, > + xfs_lsn_t lsn) > +{ > + return lsn; > +} > + > +STATIC void > +xfs_attri_item_committing( > + struct xfs_log_item *lip, > + xfs_lsn_t lsn) > +{ > +} > + > +STATIC bool > +xfs_attri_item_match( > + struct xfs_log_item *lip, > + uint64_t intent_id) > +{ > + return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id; > +} > + > +/* > + * When the attrd item is committed to disk, all we need to do is delete our > + * reference to our partner attri item and then free ourselves. Since we're > + * freeing ourselves we must return -1 to keep the transaction code from > + * further referencing this item. > + */ > +STATIC xfs_lsn_t > +xfs_attrd_item_committed( > + struct xfs_log_item *lip, > + xfs_lsn_t lsn) > +{ > + struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); > + > + /* > + * Drop the ATTRI reference regardless of whether the ATTRD has been > + * aborted. Once the ATTRD transaction is constructed, it is the sole > + * responsibility of the ATTRD to release the ATTRI (even if the ATTRI > + * is aborted due to log I/O error). > + */ > + xfs_attri_release(attrdp->attrd_attrip); > + xfs_attrd_item_free(attrdp); > + > + return NULLCOMMITLSN; > +} > + > +STATIC void > +xfs_attrd_item_committing( > + struct xfs_log_item *lip, > + xfs_lsn_t lsn) > +{ > +} > + > + > +/* > + * Allocate and initialize an attrd item > + */ > +struct xfs_attrd_log_item * > +xfs_attrd_init( > + struct xfs_mount *mp, > + struct xfs_attri_log_item *attrip) > + > +{ > + struct xfs_attrd_log_item *attrdp; > + uint size; > + > + size = (uint)(sizeof(struct xfs_attrd_log_item)); > + attrdp = kmem_zalloc(size, 0); > + memset(attrdp, 0, size); > + > + xfs_log_item_init(mp, &attrdp->attrd_item, XFS_LI_ATTRD, > + &xfs_attrd_item_ops); > + attrdp->attrd_attrip = attrip; > + attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id; > + > + return attrdp; > +} > + > +/* > + * This routine is called to allocate an "attr free done" log item. > + */ > +struct xfs_attrd_log_item * > +xfs_trans_get_attrd(struct xfs_trans *tp, > + struct xfs_attri_log_item *attrip) > +{ > + struct xfs_attrd_log_item *attrdp; > + > + ASSERT(tp != NULL); > + > + attrdp = xfs_attrd_init(tp->t_mountp, attrip); > + ASSERT(attrdp != NULL); > + > + xfs_trans_add_item(tp, &attrdp->attrd_item); > + return attrdp; > +} > + > +static const struct xfs_item_ops xfs_attrd_item_ops = { > + .iop_size = xfs_attrd_item_size, > + .iop_format = xfs_attrd_item_format, > + .iop_release = xfs_attrd_item_release, > + .iop_committing = xfs_attrd_item_committing, > + .iop_committed = xfs_attrd_item_committed, > +}; > + > + > +/* Get an ATTRD so we can process all the attrs. */ > +static struct xfs_log_item * > +xfs_attr_create_done( > + struct xfs_trans *tp, > + struct xfs_log_item *intent, > + unsigned int count) > +{ > + return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item; > +} > + > +const struct xfs_defer_op_type xfs_attr_defer_type = { > + .max_items = 1, > + .create_intent = xfs_attr_create_intent, > + .abort_intent = xfs_attr_abort_intent, > + .create_done = xfs_attr_create_done, > + .finish_item = xfs_attr_finish_item, > + .cancel_item = xfs_attr_cancel_item, > +}; > + > +/* > + * Process an attr intent item that was recovered from the log. We need to > + * delete the attr that it describes. > + */ > +STATIC int > +xfs_attri_item_recover( > + struct xfs_log_item *lip, > + struct xfs_trans *parent_tp) > +{ > + struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); > + struct xfs_mount *mp = parent_tp->t_mountp; > + struct xfs_inode *ip; > + struct xfs_attrd_log_item *attrdp; > + struct xfs_da_args args; > + struct xfs_attri_log_format *attrp; > + struct xfs_trans_res tres; > + int local; > + int error, err2 = 0; > + int rsvd = 0; > + struct xfs_buf *leaf_bp = NULL; > + struct xfs_delattr_context dac = { > + .da_args = &args, > + }; > + > + /* > + * First check the validity of the attr described by the ATTRI. If any > + * are bad, then assume that all are bad and just toss the ATTRI. > + */ > + attrp = &attrip->attri_format; > + if (!(attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_SET || > + attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_REMOVE) || > + (attrp->alfi_value_len > XATTR_SIZE_MAX) || > + (attrp->alfi_name_len > XATTR_NAME_MAX) || > + (attrp->alfi_name_len == 0)) { > + /* > + * This will pull the ATTRI from the AIL and free the memory > + * associated with it. > + */ > + xfs_attri_release(attrip); > + return -EFSCORRUPTED; > + } > + > + error = xfs_iget(mp, 0, attrp->alfi_ino, 0, 0, &ip); > + if (error) > + return error; > + > + memset(&args, 0, sizeof(args)); > + args.geo = ip->i_mount->m_attr_geo; > + args.whichfork = XFS_ATTR_FORK; > + args.dp = ip; > + args.name = attrip->attri_name; > + args.namelen = attrp->alfi_name_len; > + args.attr_filter = attrp->alfi_attr_flags; > + args.hashval = xfs_da_hashname(attrip->attri_name, > + attrp->alfi_name_len); > + args.value = attrip->attri_value; > + args.valuelen = attrp->alfi_value_len; > + args.op_flags = XFS_DA_OP_OKNOENT; > + args.total = xfs_attr_calc_size(&args, &local); > + > + tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + > + M_RES(mp)->tr_attrsetrt.tr_logres * args.total; > + tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; > + tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; > + > + error = xfs_trans_alloc(mp, &tres, args.total, 0, > + rsvd ? XFS_TRANS_RESERVE : 0, &args.trans); > + if (error) > + goto out_rele; > + attrdp = xfs_trans_get_attrd(args.trans, attrip); > + > + xfs_ilock(ip, XFS_ILOCK_EXCL); > + > + xfs_trans_ijoin(args.trans, ip, 0); > + > + do { > + error = xfs_trans_attr(&dac, attrdp, &leaf_bp, > + attrp->alfi_op_flags); > + if (error && error != -EAGAIN) > + goto abort_error; > + > + xfs_trans_log_inode(args.trans, ip, > + XFS_ILOG_CORE | XFS_ILOG_ADATA); > + > + err2 = xfs_trans_roll(&args.trans); > + if (err2) { > + error = err2; > + goto abort_error; > + } > + > + /* Rejoin inode and leaf if needed */ > + xfs_trans_ijoin(args.trans, ip, 0); > + if (leaf_bp) { > + xfs_trans_bjoin(args.trans, leaf_bp); > + xfs_trans_bhold(args.trans, leaf_bp); > + } > + > + } while (error == -EAGAIN); > + > + error = xfs_trans_commit(args.trans); > + if (error) > + goto abort_error; > + > + xfs_iunlock(ip, XFS_ILOCK_EXCL); > + xfs_irele(ip); > + return error; > + > +abort_error: > + xfs_trans_cancel(args.trans); > + xfs_iunlock(ip, XFS_ILOCK_EXCL); > +out_rele: > + xfs_irele(ip); > + return error; > +} > + > +static const struct xfs_item_ops xfs_attri_item_ops = { > + .iop_size = xfs_attri_item_size, > + .iop_format = xfs_attri_item_format, > + .iop_unpin = xfs_attri_item_unpin, > + .iop_committed = xfs_attri_item_committed, > + .iop_committing = xfs_attri_item_committing, > + .iop_release = xfs_attri_item_release, > + .iop_recover = xfs_attri_item_recover, > + .iop_match = xfs_attri_item_match, > +}; > + > + > + > +STATIC int > +xlog_recover_attri_commit_pass2( > + struct xlog *log, > + struct list_head *buffer_list, > + struct xlog_recover_item *item, > + xfs_lsn_t lsn) > +{ > + int error; > + struct xfs_mount *mp = log->l_mp; > + struct xfs_attri_log_item *attrip; > + struct xfs_attri_log_format *attri_formatp; > + char *name = NULL; > + char *value = NULL; > + int region = 0; > + > + attri_formatp = item->ri_buf[region].i_addr; > + > + attrip = xfs_attri_init(mp); > + error = xfs_attri_copy_format(&item->ri_buf[region], > + &attrip->attri_format); > + if (error) { > + xfs_attri_item_free(attrip); > + return error; > + } > + > + attrip->attri_name_len = attri_formatp->alfi_name_len; > + attrip->attri_value_len = attri_formatp->alfi_value_len; > + attrip = kmem_realloc(attrip, sizeof(struct xfs_attri_log_item) + > + attrip->attri_name_len + attrip->attri_value_len, > + 0); > + > + ASSERT(attrip->attri_name_len > 0); > + region++; > + name = ((char *)attrip) + sizeof(struct xfs_attri_log_item); > + memcpy(name, item->ri_buf[region].i_addr, > + attrip->attri_name_len); > + attrip->attri_name = name; > + > + if (attrip->attri_value_len > 0) { > + region++; > + value = ((char *)attrip) + sizeof(struct xfs_attri_log_item) + > + attrip->attri_name_len; > + memcpy(value, item->ri_buf[region].i_addr, > + attrip->attri_value_len); > + attrip->attri_value = value; > + } > + > + /* > + * The ATTRI has two references. One for the ATTRD and one for ATTRI to > + * ensure it makes it into the AIL. Insert the ATTRI into the AIL > + * directly and drop the ATTRI reference. Note that > + * xfs_trans_ail_update() drops the AIL lock. > + */ > + xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn); > + xfs_attri_release(attrip); > + return 0; > +} > + > +const struct xlog_recover_item_ops xlog_attri_item_ops = { > + .item_type = XFS_LI_ATTRI, > + .commit_pass2 = xlog_recover_attri_commit_pass2, > +}; > + > +/* > + * This routine is called when an ATTRD format structure is found in a committed > + * transaction in the log. Its purpose is to cancel the corresponding ATTRI if > + * it was still in the log. To do this it searches the AIL for the ATTRI with > + * an id equal to that in the ATTRD format structure. If we find it we drop > + * the ATTRD reference, which removes the ATTRI from the AIL and frees it. > + */ > +STATIC int > +xlog_recover_attrd_commit_pass2( > + struct xlog *log, > + struct list_head *buffer_list, > + struct xlog_recover_item *item, > + xfs_lsn_t lsn) > +{ > + struct xfs_attrd_log_format *attrd_formatp; > + > + attrd_formatp = item->ri_buf[0].i_addr; > + ASSERT((item->ri_buf[0].i_len == > + (sizeof(struct xfs_attrd_log_format)))); > + > + xlog_recover_release_intent(log, XFS_LI_ATTRI, > + attrd_formatp->alfd_alf_id); > + return 0; > +} > + > +const struct xlog_recover_item_ops xlog_attrd_item_ops = { > + .item_type = XFS_LI_ATTRD, > + .commit_pass2 = xlog_recover_attrd_commit_pass2, > +}; > diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h > new file mode 100644 > index 0000000..7dd2572 > --- /dev/null > +++ b/fs/xfs/xfs_attr_item.h > @@ -0,0 +1,76 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later > + * > + * Copyright (C) 2019 Oracle. All Rights Reserved. > + * Author: Allison Collins <allison.henderson@xxxxxxxxxx> > + */ > +#ifndef __XFS_ATTR_ITEM_H__ > +#define __XFS_ATTR_ITEM_H__ > + > +/* kernel only ATTRI/ATTRD definitions */ > + > +struct xfs_mount; > +struct kmem_zone; > + > +/* > + * Define ATTR flag bits. Manipulated by set/clear/test_bit operators. > + */ > +#define XFS_ATTRI_RECOVERED 1 > + > + > +/* iovec length must be 32-bit aligned */ > +#define ATTR_NVEC_SIZE(size) (size == sizeof(int32_t) ? sizeof(int32_t) : \ > + size + sizeof(int32_t) - \ > + (size % sizeof(int32_t))) > + > +/* > + * This is the "attr intention" log item. It is used to log the fact that some > + * attribute operations need to be processed. An operation is currently either > + * a set or remove. Set or remove operations are described by the xfs_attr_item > + * which may be logged to this intent. Intents are used in conjunction with the > + * "attr done" log item described below. > + * > + * The ATTRI is reference counted so that it is not freed prior to both the > + * ATTRI and ATTRD being committed and unpinned. This ensures the ATTRI is > + * inserted into the AIL even in the event of out of order ATTRI/ATTRD > + * processing. In other words, an ATTRI is born with two references: > + * > + * 1.) an ATTRI held reference to track ATTRI AIL insertion > + * 2.) an ATTRD held reference to track ATTRD commit > + * > + * On allocation, both references are the responsibility of the caller. Once the > + * ATTRI is added to and dirtied in a transaction, ownership of reference one > + * transfers to the transaction. The reference is dropped once the ATTRI is > + * inserted to the AIL or in the event of failure along the way (e.g., commit > + * failure, log I/O error, etc.). Note that the caller remains responsible for > + * the ATTRD reference under all circumstances to this point. The caller has no > + * means to detect failure once the transaction is committed, however. > + * Therefore, an ATTRD is required after this point, even in the event of > + * unrelated failure. > + * > + * Once an ATTRD is allocated and dirtied in a transaction, reference two > + * transfers to the transaction. The ATTRD reference is dropped once it reaches > + * the unpin handler. Similar to the ATTRI, the reference also drops in the > + * event of commit failure or log I/O errors. Note that the ATTRD is not > + * inserted in the AIL, so at this point both the ATTRI and ATTRD are freed. > + */ > +struct xfs_attri_log_item { > + struct xfs_log_item attri_item; > + atomic_t attri_refcount; > + int attri_name_len; > + void *attri_name; > + int attri_value_len; > + void *attri_value; > + struct xfs_attri_log_format attri_format; > +}; > + > +/* > + * This is the "attr done" log item. It is used to log the fact that some attrs > + * earlier mentioned in an attri item have been freed. > + */ > +struct xfs_attrd_log_item { > + struct xfs_attri_log_item *attrd_attrip; > + struct xfs_log_item attrd_item; > + struct xfs_attrd_log_format attrd_format; > +}; > + > +#endif /* __XFS_ATTR_ITEM_H__ */ > diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c > index 50f922c..166b680 100644 > --- a/fs/xfs/xfs_attr_list.c > +++ b/fs/xfs/xfs_attr_list.c > @@ -15,6 +15,7 @@ > #include "xfs_inode.h" > #include "xfs_trans.h" > #include "xfs_bmap.h" > +#include "xfs_da_btree.h" > #include "xfs_attr.h" > #include "xfs_attr_sf.h" > #include "xfs_attr_leaf.h" > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > index 6f22a66..edc05af 100644 > --- a/fs/xfs/xfs_ioctl.c > +++ b/fs/xfs/xfs_ioctl.c > @@ -15,6 +15,8 @@ > #include "xfs_iwalk.h" > #include "xfs_itable.h" > #include "xfs_error.h" > +#include "xfs_da_format.h" > +#include "xfs_da_btree.h" > #include "xfs_attr.h" > #include "xfs_bmap.h" > #include "xfs_bmap_util.h" > diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c > index c1771e7..62e1534 100644 > --- a/fs/xfs/xfs_ioctl32.c > +++ b/fs/xfs/xfs_ioctl32.c > @@ -17,6 +17,8 @@ > #include "xfs_itable.h" > #include "xfs_fsops.h" > #include "xfs_rtalloc.h" > +#include "xfs_da_format.h" > +#include "xfs_da_btree.h" > #include "xfs_attr.h" > #include "xfs_ioctl.h" > #include "xfs_ioctl32.h" > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > index 80a13c8..fe60da1 100644 > --- a/fs/xfs/xfs_iops.c > +++ b/fs/xfs/xfs_iops.c > @@ -13,6 +13,8 @@ > #include "xfs_inode.h" > #include "xfs_acl.h" > #include "xfs_quota.h" > +#include "xfs_da_format.h" > +#include "xfs_da_btree.h" > #include "xfs_attr.h" > #include "xfs_trans.h" > #include "xfs_trace.h" > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c > index ad0c69ee..6405ce33 100644 > --- a/fs/xfs/xfs_log.c > +++ b/fs/xfs/xfs_log.c > @@ -1975,6 +1975,10 @@ xlog_print_tic_res( > REG_TYPE_STR(CUD_FORMAT, "cud_format"), > REG_TYPE_STR(BUI_FORMAT, "bui_format"), > REG_TYPE_STR(BUD_FORMAT, "bud_format"), > + REG_TYPE_STR(ATTRI_FORMAT, "attri_format"), > + REG_TYPE_STR(ATTRD_FORMAT, "attrd_format"), > + REG_TYPE_STR(ATTR_NAME, "attr_name"), > + REG_TYPE_STR(ATTR_VALUE, "attr_value"), > }; > BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1); > #undef REG_TYPE_STR > diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c > index e2ec91b..ec31db0 100644 > --- a/fs/xfs/xfs_log_recover.c > +++ b/fs/xfs/xfs_log_recover.c > @@ -1811,6 +1811,8 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { > &xlog_cud_item_ops, > &xlog_bui_item_ops, > &xlog_bud_item_ops, > + &xlog_attri_item_ops, > + &xlog_attrd_item_ops, > }; > > static const struct xlog_recover_item_ops * > diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h > index 5f04d8a..0597a04 100644 > --- a/fs/xfs/xfs_ondisk.h > +++ b/fs/xfs/xfs_ondisk.h > @@ -126,6 +126,8 @@ xfs_check_ondisk_structs(void) > XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); > XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); > XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); > + XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40); > + XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); > > /* > * The v5 superblock format extended several v4 header structures with > diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c > index bca48b3..9b0c790 100644 > --- a/fs/xfs/xfs_xattr.c > +++ b/fs/xfs/xfs_xattr.c > @@ -10,6 +10,7 @@ > #include "xfs_log_format.h" > #include "xfs_da_format.h" > #include "xfs_inode.h" > +#include "xfs_da_btree.h" > #include "xfs_attr.h" > #include "xfs_acl.h" > #include "xfs_da_btree.h" > -- > 2.7.4 >