Re: [PATCH v12 4/8] xfs: Set up infastructure for deferred attribute operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 9/1/20 5:46 PM, Allison Collins wrote:


On 8/28/20 2:27 PM, Darrick J. Wong wrote:
On Wed, Aug 26, 2020 at 05:35:14PM -0700, Allison Collins wrote:
Currently attributes are modified directly across one or more
transactions. But they are not logged or replayed in the event of an
error. The goal of delayed attributes is to enable logging and replaying
of attribute operations using the existing delayed operations
infrastructure.  This will later enable the attributes to become part of
larger multi part operations that also must first be recorded to the
log.  This is mostly of interest in the scheme of parent pointers which
would need to maintain an attribute containing parent inode information
any time an inode is moved, created, or removed.  Parent pointers would
then be of interest to any feature that would need to quickly derive an
inode path from the mount point. Online scrub, nfs lookups and fs grow
or shrink operations are all features that could take advantage of this.

This patch adds two new log item types for setting or removing
attributes as deferred operations.  The xfs_attri_log_item logs an
intent to set or remove an attribute.  The corresponding
xfs_attrd_log_item holds a reference to the xfs_attri_log_item and is
freed once the transaction is done.  Both log items use a generic
xfs_attr_log_format structure that contains the attribute name, value,
flags, inode, and an op_flag that indicates if the operations is a set
or remove.

Signed-off-by: Allison Collins <allison.henderson@xxxxxxxxxx>
---
  fs/xfs/Makefile                 |   1 +
  fs/xfs/libxfs/xfs_attr.c        |   7 +-
  fs/xfs/libxfs/xfs_attr.h        |  39 ++
  fs/xfs/libxfs/xfs_defer.c       |   1 +
  fs/xfs/libxfs/xfs_defer.h       |   3 +
  fs/xfs/libxfs/xfs_log_format.h  |  44 ++-
  fs/xfs/libxfs/xfs_log_recover.h |   2 +
  fs/xfs/libxfs/xfs_types.h       |   1 +
  fs/xfs/scrub/common.c           |   2 +
  fs/xfs/xfs_acl.c                |   2 +
  fs/xfs/xfs_attr_item.c          | 829 ++++++++++++++++++++++++++++++++++++++++
  fs/xfs/xfs_attr_item.h          |  76 ++++
  fs/xfs/xfs_attr_list.c          |   1 +
  fs/xfs/xfs_ioctl.c              |   2 +
  fs/xfs/xfs_ioctl32.c            |   2 +
  fs/xfs/xfs_iops.c               |   2 +
  fs/xfs/xfs_log.c                |   4 +
  fs/xfs/xfs_log_recover.c        |   2 +
  fs/xfs/xfs_ondisk.h             |   2 +
  fs/xfs/xfs_xattr.c              |   1 +
  20 files changed, 1017 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 04611a1..b056cfc 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -102,6 +102,7 @@ xfs-y                += xfs_log.o \
                     xfs_buf_item_recover.o \
                     xfs_dquot_item_recover.o \
                     xfs_extfree_item.o \
+                   xfs_attr_item.o \
                     xfs_icreate_item.o \
                     xfs_inode_item.o \
                     xfs_inode_item_recover.o \
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index a8cfe62..cf75742 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -24,6 +24,7 @@
  #include "xfs_quota.h"
  #include "xfs_trans_space.h"
  #include "xfs_trace.h"
+#include "xfs_attr_item.h"
  /*
   * xfs_attr.c
@@ -59,8 +60,6 @@ STATIC int xfs_attr_node_hasname(xfs_da_args_t *args,
  STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
  STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
  STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp);
-STATIC int xfs_attr_set_iter(struct xfs_delattr_context *dac,
-                 struct xfs_buf **leaf_bp);
  int
  xfs_inode_hasattr(
@@ -142,7 +141,7 @@ xfs_attr_get(
  /*
   * Calculate how many blocks we need for the new attribute,
   */
-STATIC int
+int
  xfs_attr_calc_size(
      struct xfs_da_args    *args,
      int            *local)
@@ -327,7 +326,7 @@ xfs_attr_set_args(
   * to handle this, and recall the function until a successful error code is
   * returned.
   */
-STATIC int
+int
  xfs_attr_set_iter(
      struct xfs_delattr_context    *dac,
      struct xfs_buf            **leaf_bp)
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 4f6bba8..23b8308 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -247,6 +247,7 @@ enum xfs_delattr_state {
  #define XFS_DAC_DEFER_FINISH        0x01 /* finish the transaction */
  #define XFS_DAC_NODE_RMVNAME_INIT    0x02 /* xfs_attr_node_removename init */   #define XFS_DAC_LEAF_ADDNAME_INIT    0x04 /* xfs_attr_leaf_addname init*/ +#define XFS_DAC_DELAYED_OP_INIT        0x08 /* delayed operations init*/
  /*
   * Context used for keeping track of delayed attribute operations
@@ -254,6 +255,9 @@ enum xfs_delattr_state {
  struct xfs_delattr_context {
      struct xfs_da_args      *da_args;
+    /* Used by delayed attributes to hold leaf across transactions */
+    struct xfs_buf        *leaf_bp;
+
      /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */
      struct xfs_bmbt_irec    map;
      xfs_dablk_t        lblkno;
@@ -268,6 +272,38 @@ struct xfs_delattr_context {
      enum xfs_delattr_state  dela_state;
  };

I'll start by pasting in the full xfs_delattr_context definition for
easier reading:

/*
  * Context used for keeping track of delayed attribute operations
  */
struct xfs_delattr_context {
    struct xfs_da_args      *da_args;

    /* Used by delayed attributes to hold leaf across transactions */
    struct xfs_buf        *leaf_bp;

    /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */
    struct xfs_bmbt_irec    map;
    xfs_dablk_t        lblkno;
    int            blkcnt;

    /* Used in xfs_attr_node_removename to roll through removing blocks */
    struct xfs_da_state     *da_state;
    struct xfs_da_state_blk *blk;

    /* Used to keep track of current state of delayed operation */
    unsigned int            flags;
    enum xfs_delattr_state  dela_state;
};

Admittedly, I /am/ conducting a backwards review and zeroing in on the
data structures first.

+/*
+ * List of attrs to commit later.
+ */
+struct xfs_attr_item {
+    struct xfs_inode    *xattri_ip;
+    void            *xattri_value;        /* attr value */
+    void            *xattri_name;        /* attr name */
+    uint32_t        xattri_op_flags;    /* attr op set or rm */
+    uint32_t        xattri_value_len;    /* length of value */
+    uint32_t        xattri_name_len;    /* length of name */
+    uint32_t        xattri_flags;        /* attr flags */
+
+    /* used to log this item to an intent */
+    struct list_head    xattri_list;
+
+    /*
+     * xfs_delattr_context and xfs_da_args need to remain instantiated
+     * across transaction rolls during the defer finish, so store them here
+     */
+    struct xfs_da_args        xattri_args;
+    struct xfs_delattr_context    xattri_dac;
+
+    /*
+     * A byte array follows the header containing the file name and
+     * attribute value.
+     */
+};

These two structures (xfs_delattr_context and xfs_attr_item) duplicate a
lot of information considering that they both track incore state during
an xattr set/remove operation.  There's also a lot of duplication
between the do-while loop in xfs_attr_set_args and the inner loop of the
defer attr set code.
Yes, to clarify a bit of the history: most of this was sort of adopted from the efi/efd code as a sort model.  Most of the fields in the xfs_attr_item are sort of like the parameters needed to kick off a delayed operation.

The xfs_da_args and the xfs_delattr_context are an exception to that model.  Most of the time, they're not even populated.  The reason they are here is because they need to be instantiated somewhere not inside the call stack of the delayed attr operations.  Otherwise we'd loose them every time we come back with -EAGAIN.  In the non delayed attr code, they are kept in the xfs_attr_*_args functions.  In the delayed attr code path, they are kept here.

IOW: I had to plop them somewhere, and discovered that the *_items remain instantiated across their corresponding delayed operations.  So it seemed like a reasonable place?  I suspect attrs are the first delayed operation to require a context of sorts as I did not see any of the other delayed operations needing to deal with this issue.  So attr operations a little unique in this way.



To make sure I'm understanding this correctly, let me start by repeating
back to you what I think is the code flow through the hasdelattr path
and then the !hasdelattr path.  Let's call the hasdelattr path (A).

First, the caller allocates an xfs_da_args structure and partially
initializes it with dp, attr_filter, attr_flags, name, namelen, value,
and valuelen set appropriately for the operation it wants.  The rest of
the struct should be zeroed, because the uninitialized parts are
internal state.

Second, the *args are passed to xfs_attr_set, which after setting up a
transaction calls xfs_attr_set_deferred.  This calls xfs_attr_item_init
to allocate and initialize a struct xfs_attr_item with dp, name,
namelen, attr_filter, value, and valuelen, and passes this incore state
tracking structure to the defer ops machinery.

Third, the defer ops machinery calls xfs_attr_finish_item to deal with
the attr request.  If the xfs_delattr_context within the xfs_attr_item
is uninitialized it willl set the xfs_da_args state that's within the
xfs_attr_item to the values already stored in the xfs_attr_item.

Fourth, xfs_attr_finish_item calls xfs_trans_attr to dispatch based on
op_flags.  For setting, this means we call xfs_attr_set_iter.

Fifth, xfs_attr_set_iter dispatches functions based on whatever
dela_state in the delattr_context is set to.  The functions it calls can
set DAC_DEFER_FINISH and/or return -EAGAIN to signal the defer ops
machinery that it needs to roll the transaction so that we can repeat
steps 3-5 until we're done.  The defer ops machinery ought to honor
DEFER_FINISH and complete whatever work items we've put on the queue,
but... it's buggy and doesn't.  I'll come back to this later.
Oh you are right... this should be updated.  So the history was: at some time during the review of the delayed ready series, it was proposed that we have a top level loop that rolls the transactions, rather than trying to plumb in an "off switch" for the transactions.  This looping concept was already modeled here, so I had adopted it for use in the delay ready series.

I did however, forget to go back and update it with the DEFER_FINISH flags that we added later.  Or consolidate them, which I suspect is where you are going with this... :-)


Sixth, once we're done, we return out to xfs_attr_set to commit the
transaction and exit.

Did I understand that correctly?
That sounds about right

If so, I'll move on to the !hasdelattr
case, which we'll call (B).

First, the caller allocates an xfs_da_args structure and partially
initializes it with dp, attr_filter, attr_flags, name, namelen, value,
and valuelen set appropriately for the operation it wants.  The rest of
the struct should be zeroed, because the uninitialized parts are
internal state.  This is the same as step A1 above.

Second, the *args are passed to xfs_attr_set, which after setting up a
transaction calls xfs_attr_set_args.  This calls xfs_attr_set_iter,
which is the dela_state function dispatcher mentioned in step A5 above.
The functions it calls can set DAC_DEFER_FINISH to signal to
xfs_attr_set_args that it needs to complete whatever work items we've
attached to the transaction.  They can also return -EAGAIN to signal
to xfs_attr_set_args that it's time to roll the transaction.

Third, once we're done, we return out of xfs_attr_set, same as step A6
above.

Assuming I understood those two code paths correctly, I'll move on to
the attr item recovery case.  Call this (C).
Sounds about right


First, xfs_attri_item_recover is called with a recovered incore log
item.  It allocates an xfs_da_args and fills out most of the same
fields that xfs_attr_set does in A1-A2 and B1-B2 above; and then it
allocates a transaction.

Second, _recover has its own while loop(!) to call xfs_trans_attr, which
calls xfs_attr_set_iter, sort of like what A4 does.  I'll come back to
this later as well.

Third, xfs_attr_set_iter uses dela_state to dispatch functions, similar
to what A5 does above.  If those functions set DAC_DEFER_FINISH or
return -EAGAIN, we'll pass that out to xfs_attr_set_iter to get the
transaction rolled so we can move on to the next state.
Mmm, xfs_attr_set_iter doesn't roll transactions. xfs_attr_*_args does. Perhaps the *_recover should follow suit, or be consolidated.


Fourth, when the loop is done we commit the transaction and move on with
whatever is next in log recovery.

Does that sound right?  If so, let's move on to the issues I noted
above.

I think the first problem is that this patchset adds two more xattr
operation state structures.  Current xfs_da_args store both the
operation arguments (inode, name, value, other flags) and most of the
state of the operation (whichfork, hashval, geo, block indices, rmt
block indices).  The series then adds a xfs_delattr_context that holds
more state that needs to survive a transaction roll (leafbp, rmt
mappings, da btree state, and dela_state).  Then, it adds yet another
xfs_attr_item that contains its own xfs_da_args and xfs_delattr_context,
and has a bunch more fields xattri_(ip, value, name, opflags, value)
that duplicate the fields that already exist in xfs_da_args.

This is hard to follow.  I don't know what's the difference between
xfs_attr_item.xattri_name and xfs_attr_item.xattri_args.name, and I
suspect this makes xfs_attr_item much larger than it needs to be.
Hmm, ok.  Let me see if I could get away with having just having args and dac.  That might eliminate some of the overlap.


Question 1: Can we break up struct xfs_da_args?  Right now its field
definition is the union set of everything needed to track both a
directory operation and an xattr operation.  What do you think of
creating separate xfs_dirop_state and xfs_attrop_state structures that
each embed an xfs_da_args, and then move the dir and attr-specific
pieces out of xfs_da_args and into xfs_{dir,attr}op_state as
appropriate?  I think Christoph has suggested this elsewhere on the list
in the past.

(Note that xfs_da_state is its own separate thing for dealing with
dabtree operations; that doesn't change.)
Sure, let me dig around and see if I can better modularize args so that we're not carrying around all the dir op stuff through all the attr op stuff.


Question 2: Should we revise the arguments to xfs_attr_[gs]et?  Right
now the callers of these functions have to initialize the entire
xfs_da_state structure even though they only care about 7 of the 26
fields.  What do you think of changing the xfs_attr_[gs]et function
declarations to pass in the 7 arguments directly?  Or you could create a
new arguments struct?  If you did that, then xfs_args_[gs]et would be
responsible for allocating and initializing their internal state.  This
is cleaner interface-wise,
I can dig around and see if I can get something like that to work.  I like the mini struct idea.  I suspect we'll end up with a few routines with similar set of 7 params, so the struct makes sense

and leads me into...

Question 3: Instead of creating separate xfs_delattr_context
andxfs_attr_item structs, can you put all the stuff those structures
track into xfs_attrop_state?
Where xfs_attrop_state is the combination of xfs_delattr_context, xfs_attr_item, and a subset of xfs_da_args?

I sense that the duplication and pointer
indirection in _delattr_context and _attr_item might be a result of it
not being all that clear where the xfs_da_args is actually allocated,
and therefore the scoping rules.  Would all that be clearer if all the
new state was thrown into the same xfs_attrop_state that we dynamically
allocate at the start of xfs_attr_[gs]et()?  (Yes, this question's
existence depends on your answer to Q2.)
I suppose it could me made to work?  I think we're starting to glob together members that belong to slightly similar scopes, so the real question would be: Are people going to be amicable to seeing it that way?

The xfs_attr_item was sort of modeled from xfs_extent_free_item.  In general these structs are meant to function as sort of items in a list of log items.  Hats why they all have the list_head field at a minimum. (xfs_bmap_intent, xfs_extent_free_item, xfs_refcount_intent, xfs_rmap_intent, are similar in this way.  They all have their own corresponding *_finish_item routines whose purpose is to unpack the item and hand it off to it's corresponding *_trans routines).  It seemed to be a sort of established pattern, so I figured I should fall in line with that.

xfs_delattr_context is a bit of an odd ball.  It doesn't really represent a set of associated properties like names and values and such.  It's really more about bookmarking a position in a sequence of events. So their their contents really have no meaning outside this context. For example, blkcnt is used during attr grow an shrink operations, but an attr doesn't otherwise normally have a block count associated with it.

I personally think it's a bit messy to try and lump all of that together, but it's really an aesthetic thing. Ultimately that is going to come down to the cross section of opinions that people are most comfortable seeing. :-)


Question 4: Does xfs_attr_item_init need to allocate space to hold the
name and value buffers when it is called from xfs_attr_set?
It would if it were ever used in a routine that passed name and value as local params, and then exited before finishing the transaction, or if it in anyway manipulated their values in a way not meant to be reflected in the delayed operation.

I'm not sure if there's an instance where we do this, but I'd really have to try it and see. Will verify.

xfs_attr_set does not return until we're completely finished with the
deferred xattr processing, which means that the buffers passed into
xfs_attr_set cannot go out of scope, right?

(I think you /do/ need to allocate separate buffers for log recovery.)
Yes, the on disk structs are xfs_attri_log_format and xfs_attrd_log_format


My second set of questions revolve around the duplication of attr
operation loops between xfs_attr_set_args() and the defer ops code.
AFAICT there's no reason to have xfs_attr_set_args, since there is no
requirement in the deferred ops machinery to create log intent or log
done items.
Yes, let me see if I can consolidate that, I forgot that I had nabbed the looping code from later in the set and pulled it downwards some time ago.   I'm thinking maybe xfs_attri_item_recover should just unpack the log item and pipe it through to xfs_attr_set_args


Question 5: Instead of open-coding a do {attrset roll hold} loop in
xfs_attr_set_args, what do you think about setting up the deferred op
code (xfs_attr_defer_type and the functions assigned to it in patch 4)
to do that from the start?  By adding the defer op code early, patch 2
would create xfs_attr_set_iter as it does now, and xfs_attr_finish_item
would call it directly.  Since there's no log item defined yet, the
other defer ops functions (create_intent, abort_intent, create_done) can
return NULL log item pointers.

K, this one I'm having a hard time following.... the purpose of xis_tart_finish_item is to unpack the xis_tart_item, and must also adhere to the xfs_defer_op_type.finish_item signature.  It doesn't loop like xfs_attr_set_args does because the calling delayed operation infrastructure already does that (see the loop in xfs_defer_finish_one).  Without the delayed operation machinery in the picture, a non delayed attribute is going to need an equivalent loop somewhere.  We can consolidate it with the loop we see in the recovery path, but we do need to keep at least one loop somewhere, and it cant be in xfs_attr_finish_item.
Sorry xis_tart_finish_item should read xfs_attr_finish_item and xis_tart_item should be xis_attr_item.

need to keep closer eye on spell checker...


Once you get to the point whre you have defined the log items, you can
add in all the other log item handling (i.e. xfs_attr[id]_item_ops).  As
an example of a defer op that optionally records changes to its incore
operation state with log items, see xfs_swapext_defer_type[1].

[1] https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/commit/?h=atomic-file-updates&id=53c7233842969347174e8d68c8486dbf3efb734c__;!!GqivPVa7Brio!N0NsMhrByLcmev5aFDxTBS_VvzIbprn_VmNb4kY_gw4ADR0q1ERQzlaQe-tjnhXmUNhu$
Ok, let me go through that, maybe I'm missing something here


Moving along to the DEFER_FINISH question that I said I'd get back to
later -- there's a subtle difference to the order in which deferred log
items that are created while trying to make progress on an xattr op are
finished.  This is due to a design wart of the original defer ops
machinery, and Brian and I have discussed this previously.

In a nutshell, let's pretend that step 1 of an xattr operation creates
new deferred ops ABCD and step 2 creates new deferred ops EFGH.  Let's
also pretend that step 1 and step 2 both set DEFER_FINISH.  In the
!delattr case, xfs_attr_set_args -> xfs_attr_trans_roll will run step 1,
process A->B->C->D, roll, run step 2, and then process E->F->G->H and
commit.

In the delattr case, however, the defer ops machinery shoves all the new
defer ops to the end of the queue, which means that we run step 1, roll,
run step 2, and then run A->B->C->D->E->F->G->H and commit.  I would
like to fix that, since it seems more logical to me that you'd finish
A-D before moving on to the second phase; and the atomic swapext code is
going to require that.

Question 6: So, uh, can you go have a look at the latest patches[2]?
I'll post them soon if I can get past the bigtime review.  I don't think
this wart of the defer ops mechanism affects your patchset, but you know
how deferred attrs work better than I. :)

[2] https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/log/?h=defer-ops-stalls__;!!GqivPVa7Brio!N0NsMhrByLcmev5aFDxTBS_VvzIbprn_VmNb4kY_gw4ADR0q1ERQzlaQe-tjnqXvaBRR$
Oh ok.  Sure, will take a look


I also had a couple questions (observations?) about how log recovery
works for attr items, because I noticed that xfs_attri_item_recover also
has a do {attrset, roll} loop.

HAH, I just realized (while writing Q7) that xfs_defer_move needs to log
intent items for each newly scheduled work item because if log recovery
crashes after finishing the existing intent items but before it gets to
the new intent items, the next attempt at log recovery will not see the
missing intents and will /never/ even be aware that it should have
finished a chain.  That leads to fs corruption!  So that series has more
work to do, and you can set Q6 aside for now.
:-)  I may take a peek anyway


Question 7: Why is there a do {attrset, roll} loop in the recovery
routine?  Log intent item recovery functions are only supposed to
complete a single transaction's worth of work.  If there's more work to
do, the recovery function should attach a new defer ops item to the
transaction to schedule the rest of the work, and use xfs_defer_move
to attach the list of new defer ops to *parent_tp.
Oh ok.  I wasnt aware of how that was supposed to work.  Will update.


The reason for this is that log recovery has to finish every unfinished
intent item that was in the log before it can move on to new log items
that were created as a result of recovering log items.
Ok, so maybe we dont consolidate the loops since this one will need to go away.  Thanks for the catch though!


Ok, that's probably enough questions for now.

--D

Thanks!  I know it's a lot!!
Allison


+
+#define XFS_ATTR_ITEM_SIZEOF(namelen, valuelen)    \
+    (sizeof(struct xfs_attr_item) + (namelen) + (valuelen))

+
+
/*========================================================================
   * Function prototypes for the kernel.
*========================================================================*/ @@ -283,11 +319,14 @@ int xfs_attr_get_ilocked(struct xfs_da_args *args);
  int xfs_attr_get(struct xfs_da_args *args);
  int xfs_attr_set(struct xfs_da_args *args);
  int xfs_attr_set_args(struct xfs_da_args *args);
+int xfs_attr_set_iter(struct xfs_delattr_context *dac,
+              struct xfs_buf **leaf_bp);
  int xfs_has_attr(struct xfs_da_args *args);
  int xfs_attr_remove_args(struct xfs_da_args *args);
  int xfs_attr_remove_iter(struct xfs_delattr_context *dac);
  bool xfs_attr_namecheck(const void *name, size_t length);
  void xfs_delattr_context_init(struct xfs_delattr_context *dac,
                    struct xfs_da_args *args);
+int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
  #endif    /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index d8f5862..4392279 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -176,6 +176,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
      [XFS_DEFER_OPS_TYPE_RMAP]    = &xfs_rmap_update_defer_type,
      [XFS_DEFER_OPS_TYPE_FREE]    = &xfs_extent_free_defer_type,
      [XFS_DEFER_OPS_TYPE_AGFL_FREE]    = &xfs_agfl_free_defer_type,
+    [XFS_DEFER_OPS_TYPE_ATTR]    = &xfs_attr_defer_type,
  };
  static void
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 6b2ca58..193d3bb 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -18,6 +18,7 @@ enum xfs_defer_ops_type {
      XFS_DEFER_OPS_TYPE_RMAP,
      XFS_DEFER_OPS_TYPE_FREE,
      XFS_DEFER_OPS_TYPE_AGFL_FREE,
+    XFS_DEFER_OPS_TYPE_ATTR,
      XFS_DEFER_OPS_TYPE_MAX,
  };
@@ -62,5 +63,7 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
  extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
  extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
  extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
+extern const struct xfs_defer_op_type xfs_attr_defer_type;
+
  #endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index e3400c9..33b26b6 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -117,7 +117,12 @@ struct xfs_unmount_log_format {
  #define XLOG_REG_TYPE_CUD_FORMAT    24
  #define XLOG_REG_TYPE_BUI_FORMAT    25
  #define XLOG_REG_TYPE_BUD_FORMAT    26
-#define XLOG_REG_TYPE_MAX        26
+#define XLOG_REG_TYPE_ATTRI_FORMAT    27
+#define XLOG_REG_TYPE_ATTRD_FORMAT    28
+#define XLOG_REG_TYPE_ATTR_NAME    29
+#define XLOG_REG_TYPE_ATTR_VALUE    30
+#define XLOG_REG_TYPE_MAX        30
+
  /*
   * Flags to log operation header
@@ -240,6 +245,8 @@ typedef struct xfs_trans_header {
  #define    XFS_LI_CUD        0x1243
  #define    XFS_LI_BUI        0x1244    /* bmbt update intent */
  #define    XFS_LI_BUD        0x1245
+#define    XFS_LI_ATTRI        0x1246  /* attr set/remove intent*/
+#define    XFS_LI_ATTRD        0x1247  /* attr set/remove done */
  #define XFS_LI_TYPE_DESC \
      { XFS_LI_EFI,        "XFS_LI_EFI" }, \
@@ -255,7 +262,9 @@ typedef struct xfs_trans_header {
      { XFS_LI_CUI,        "XFS_LI_CUI" }, \
      { XFS_LI_CUD,        "XFS_LI_CUD" }, \
      { XFS_LI_BUI,        "XFS_LI_BUI" }, \
-    { XFS_LI_BUD,        "XFS_LI_BUD" }
+    { XFS_LI_BUD,        "XFS_LI_BUD" }, \
+    { XFS_LI_ATTRI,        "XFS_LI_ATTRI" }, \
+    { XFS_LI_ATTRD,        "XFS_LI_ATTRD" }
  /*
   * Inode Log Item Format definitions.
@@ -860,4 +869,35 @@ struct xfs_icreate_log {
      __be32        icl_gen;    /* inode generation number to use */
  };
+/*
+ * Flags for deferred attribute operations.
+ * Upper bits are flags, lower byte is type code
+ */
+#define XFS_ATTR_OP_FLAGS_SET        1    /* Set the attribute */
+#define XFS_ATTR_OP_FLAGS_REMOVE    2    /* Remove the attribute */
+#define XFS_ATTR_OP_FLAGS_TYPE_MASK    0x0FF    /* Flags type mask */
+
+/*
+ * This is the structure used to lay out an attr log item in the
+ * log.
+ */
+struct xfs_attri_log_format {
+    uint16_t    alfi_type;    /* attri log item type */
+    uint16_t    alfi_size;    /* size of this item */
+    uint32_t    __pad;        /* pad to 64 bit aligned */
+    uint64_t    alfi_id;    /* attri identifier */
+    xfs_ino_t       alfi_ino;    /* the inode for this attr operation */ +    uint32_t        alfi_op_flags;    /* marks the op as a set or remove */
+    uint32_t        alfi_name_len;    /* attr name length */
+    uint32_t        alfi_value_len;    /* attr value length */
+    uint32_t        alfi_attr_flags;/* attr flags */
+};
+
+struct xfs_attrd_log_format {
+    uint16_t    alfd_type;    /* attrd log item type */
+    uint16_t    alfd_size;    /* size of this item */
+    uint32_t    __pad;        /* pad to 64 bit aligned */
+    uint64_t    alfd_alf_id;    /* id of corresponding attrd */
+};
+
  #endif /* __XFS_LOG_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 641132d..b0b8e94 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -72,6 +72,8 @@ extern const struct xlog_recover_item_ops xlog_rui_item_ops;
  extern const struct xlog_recover_item_ops xlog_rud_item_ops;
  extern const struct xlog_recover_item_ops xlog_cui_item_ops;
  extern const struct xlog_recover_item_ops xlog_cud_item_ops;
+extern const struct xlog_recover_item_ops xlog_attri_item_ops;
+extern const struct xlog_recover_item_ops xlog_attrd_item_ops;
  /*
   * Macros, structures, prototypes for internal log manager use.
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 397d947..860cdd2 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -11,6 +11,7 @@ typedef uint32_t    prid_t;        /* project ID */
  typedef uint32_t    xfs_agblock_t;    /* blockno in alloc. group */
  typedef uint32_t    xfs_agino_t;    /* inode # within allocation grp */
  typedef uint32_t    xfs_extlen_t;    /* extent length in blocks */
+typedef uint32_t    xfs_attrlen_t;    /* attr length */
  typedef uint32_t    xfs_agnumber_t;    /* allocation group number */
  typedef int32_t        xfs_extnum_t;    /* # of extents in a file */
  typedef int16_t        xfs_aextnum_t;    /* # extents in an attribute fork */
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 1887605..9a649d1 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -24,6 +24,8 @@
  #include "xfs_rmap_btree.h"
  #include "xfs_log.h"
  #include "xfs_trans_priv.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
  #include "xfs_attr.h"
  #include "xfs_reflink.h"
  #include "scrub/scrub.h"
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index d4c687b5c..2fa173a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -10,6 +10,8 @@
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_inode.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
  #include "xfs_attr.h"
  #include "xfs_trace.h"
  #include "xfs_error.h"
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
new file mode 100644
index 0000000..923c288
--- /dev/null
+++ b/fs/xfs/xfs_attr_item.c
@@ -0,0 +1,829 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Allison Collins <allison.henderson@xxxxxxxxxx>
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_shared.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_attr_item.h"
+#include "xfs_log.h"
+#include "xfs_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_shared.h"
+#include "xfs_attr_item.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_trace.h"
+#include "libxfs/xfs_da_format.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+
+static const struct xfs_item_ops xfs_attri_item_ops;
+static const struct xfs_item_ops xfs_attrd_item_ops;
+
+static inline struct xfs_attri_log_item *ATTRI_ITEM(struct xfs_log_item *lip)
+{
+    return container_of(lip, struct xfs_attri_log_item, attri_item);
+}
+
+STATIC void
+xfs_attri_item_free(
+    struct xfs_attri_log_item    *attrip)
+{
+    kmem_free(attrip->attri_item.li_lv_shadow);
+    kmem_free(attrip);
+}
+
+/*
+ * Freeing the attrip requires that we remove it from the AIL if it has already + * been placed there. However, the ATTRI may not yet have been placed in the + * AIL when called by xfs_attri_release() from ATTRD processing due to the + * ordering of committed vs unpin operations in bulk insert operations. Hence
+ * the reference count to ensure only the last caller frees the ATTRI.
+ */
+STATIC void
+xfs_attri_release(
+    struct xfs_attri_log_item    *attrip)
+{
+    ASSERT(atomic_read(&attrip->attri_refcount) > 0);
+    if (atomic_dec_and_test(&attrip->attri_refcount)) {
+        xfs_trans_ail_delete(&attrip->attri_item,
+                     SHUTDOWN_LOG_IO_ERROR);
+        xfs_attri_item_free(attrip);
+    }
+}
+
+/*
+ * This returns the number of iovecs needed to log the given attri item. We + * only need 1 iovec for an attri item.  It just logs the attr_log_format
+ * structure.
+ */
+static inline int
+xfs_attri_item_sizeof(
+    struct xfs_attri_log_item *attrip)
+{
+    return sizeof(struct xfs_attri_log_format);
+}
+
+STATIC void
+xfs_attri_item_size(
+    struct xfs_log_item    *lip,
+    int            *nvecs,
+    int            *nbytes)
+{
+    struct xfs_attri_log_item       *attrip = ATTRI_ITEM(lip);
+
+    *nvecs += 1;
+    *nbytes += xfs_attri_item_sizeof(attrip);
+
+    /* Attr set and remove operations require a name */
+    ASSERT(attrip->attri_name_len > 0);
+
+    *nvecs += 1;
+    *nbytes += ATTR_NVEC_SIZE(attrip->attri_name_len);
+
+    /*
+     * Set ops can accept a value of 0 len to clear an attr value. Remove
+     * ops do not need a value at all.  So only account for the value
+     * when it is needed.
+     */
+    if (attrip->attri_value_len > 0) {
+        *nvecs += 1;
+        *nbytes += ATTR_NVEC_SIZE(attrip->attri_value_len);
+    }
+}
+
+/*
+ * This is called to fill in the log iovecs for the given attri log
+ * item. We use  1 iovec for the attri_format_item, 1 for the name, and
+ * another for the value if it is present
+ */
+STATIC void
+xfs_attri_item_format(
+    struct xfs_log_item    *lip,
+    struct xfs_log_vec    *lv)
+{
+    struct xfs_attri_log_item    *attrip = ATTRI_ITEM(lip);
+    struct xfs_log_iovec        *vecp = NULL;
+
+    attrip->attri_format.alfi_type = XFS_LI_ATTRI;
+    attrip->attri_format.alfi_size = 1;
+
+    /*
+     * This size accounting must be done before copying the attrip into the +     * iovec.  If we do it after, the wrong size will be recorded to the log +     * and we trip across assertion checks for bad region sizes later during
+     * the log recovery.
+     */
+
+    ASSERT(attrip->attri_name_len > 0);
+    attrip->attri_format.alfi_size++;
+
+    if (attrip->attri_value_len > 0)
+        attrip->attri_format.alfi_size++;
+
+    xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT,
+            &attrip->attri_format,
+            xfs_attri_item_sizeof(attrip));
+    if (attrip->attri_name_len > 0)
+        xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME,
+                attrip->attri_name,
+                ATTR_NVEC_SIZE(attrip->attri_name_len));
+
+    if (attrip->attri_value_len > 0)
+        xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE,
+                attrip->attri_value,
+                ATTR_NVEC_SIZE(attrip->attri_value_len));
+}
+
+/*
+ * The unpin operation is the last place an ATTRI is manipulated in the log. It + * is either inserted in the AIL or aborted in the event of a log I/O error. In + * either case, the ATTRI transaction has been successfully committed to make + * it this far. Therefore, we expect whoever committed the ATTRI to either + * construct and commit the ATTRD or drop the ATTRD's reference in the event of + * error. Simply drop the log's ATTRI reference now that the log is done with
+ * it.
+ */
+STATIC void
+xfs_attri_item_unpin(
+    struct xfs_log_item    *lip,
+    int            remove)
+{
+    struct xfs_attri_log_item    *attrip = ATTRI_ITEM(lip);
+
+    xfs_attri_release(attrip);
+}
+
+
+STATIC void
+xfs_attri_item_release(
+    struct xfs_log_item    *lip)
+{
+    xfs_attri_release(ATTRI_ITEM(lip));
+}
+
+/*
+ * Allocate and initialize an attri item
+ */
+STATIC struct xfs_attri_log_item *
+xfs_attri_init(
+    struct xfs_mount    *mp)
+
+{
+    struct xfs_attri_log_item    *attrip;
+    uint                size;
+
+    size = (uint)(sizeof(struct xfs_attri_log_item));
+    attrip = kmem_zalloc(size, 0);
+
+    xfs_log_item_init(mp, &attrip->attri_item, XFS_LI_ATTRI,
+              &xfs_attri_item_ops);
+    attrip->attri_format.alfi_id = (uintptr_t)(void *)attrip;
+    atomic_set(&attrip->attri_refcount, 2);
+
+    return attrip;
+}
+
+/*
+ * Copy an attr format buffer from the given buf, and into the destination attr
+ * format structure.
+ */
+STATIC int
+xfs_attri_copy_format(struct xfs_log_iovec *buf,
+              struct xfs_attri_log_format *dst_attr_fmt)
+{
+    struct xfs_attri_log_format *src_attr_fmt = buf->i_addr;
+    uint len = sizeof(struct xfs_attri_log_format);
+
+    if (buf->i_len != len)
+        return -EFSCORRUPTED;
+
+    memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len);
+    return 0;
+}
+
+static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip)
+{
+    return container_of(lip, struct xfs_attrd_log_item, attrd_item);
+}
+
+STATIC void
+xfs_attrd_item_free(struct xfs_attrd_log_item *attrdp)
+{
+    kmem_free(attrdp->attrd_item.li_lv_shadow);
+    kmem_free(attrdp);
+}
+
+/*
+ * This returns the number of iovecs needed to log the given attrd item. + * We only need 1 iovec for an attrd item.  It just logs the attr_log_format
+ * structure.
+ */
+static inline int
+xfs_attrd_item_sizeof(
+    struct xfs_attrd_log_item *attrdp)
+{
+    return sizeof(struct xfs_attrd_log_format);
+}
+
+STATIC void
+xfs_attrd_item_size(
+    struct xfs_log_item    *lip,
+    int            *nvecs,
+    int            *nbytes)
+{
+    struct xfs_attrd_log_item    *attrdp = ATTRD_ITEM(lip);
+    *nvecs += 1;
+    *nbytes += xfs_attrd_item_sizeof(attrdp);
+}
+
+/*
+ * This is called to fill in the log iovecs for the given attrd log item. We use + * only 1 iovec for the attrd_format, and we point that at the attr_log_format
+ * structure embedded in the attrd item.
+ */
+STATIC void
+xfs_attrd_item_format(
+    struct xfs_log_item    *lip,
+    struct xfs_log_vec    *lv)
+{
+    struct xfs_attrd_log_item    *attrdp = ATTRD_ITEM(lip);
+    struct xfs_log_iovec        *vecp = NULL;
+
+    attrdp->attrd_format.alfd_type = XFS_LI_ATTRD;
+    attrdp->attrd_format.alfd_size = 1;
+
+    xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRD_FORMAT,
+            &attrdp->attrd_format, xfs_attrd_item_sizeof(attrdp));
+}
+
+/*
+ * The ATTRD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the ATTRI and free the
+ * ATTRD.
+ */
+STATIC void
+xfs_attrd_item_release(
+    struct xfs_log_item     *lip)
+{
+    struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip);
+    xfs_attri_release(attrdp->attrd_attrip);
+    xfs_attrd_item_free(attrdp);
+}
+
+/*
+ * Log an ATTRI it to the ATTRD when the attr op is done.  An attr operation
+ * may be a set or a remove.  Note that the transaction is marked dirty
+ * regardless of whether the operation succeeds or fails to support the
+ * ATTRI/ATTRD lifecycle rules.
+ */
+int
+xfs_trans_attr(
+    struct xfs_delattr_context    *dac,
+    struct xfs_attrd_log_item    *attrdp,
+    struct xfs_buf            **leaf_bp,
+    uint32_t            op_flags)
+{
+    struct xfs_da_args        *args = dac->da_args;
+    int                error;
+
+    error = xfs_qm_dqattach_locked(args->dp, 0);
+    if (error)
+        return error;
+
+    switch (op_flags) {
+    case XFS_ATTR_OP_FLAGS_SET:
+        args->op_flags |= XFS_DA_OP_ADDNAME;
+        error = xfs_attr_set_iter(dac, leaf_bp);
+        break;
+    case XFS_ATTR_OP_FLAGS_REMOVE:
+        ASSERT(XFS_IFORK_Q((args->dp)));
+        error = xfs_attr_remove_iter(dac);
+        break;
+    default:
+        error = -EFSCORRUPTED;
+        break;
+    }
+
+    /*
+     * Mark the transaction dirty, even on error. This ensures the
+     * transaction is aborted, which:
+     *
+     * 1.) releases the ATTRI and frees the ATTRD
+     * 2.) shuts down the filesystem
+     */
+    args->trans->t_flags |= XFS_TRANS_DIRTY;
+    set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
+
+    return error;
+}
+
+/* Log an attr to the intent item. */
+STATIC void
+xfs_attr_log_item(
+    struct xfs_trans        *tp,
+    struct xfs_attri_log_item    *attrip,
+    struct xfs_attr_item        *attr)
+{
+    struct xfs_attri_log_format    *attrp;
+    char                *name_value;
+
+    name_value = ((char *)attr) + sizeof(struct xfs_attr_item);
+
+    tp->t_flags |= XFS_TRANS_DIRTY;
+    set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
+
+    /*
+     * At this point the xfs_attr_item has been constructed, and we've
+     * created the log intent. Fill in the attri log item and log format
+     * structure with fields from this xfs_attr_item
+     */
+    attrp = &attrip->attri_format;
+    attrp->alfi_ino = attr->xattri_ip->i_ino;
+    attrp->alfi_op_flags = attr->xattri_op_flags;
+    attrp->alfi_value_len = attr->xattri_value_len;
+    attrp->alfi_name_len = attr->xattri_name_len;
+    attrp->alfi_attr_flags = attr->xattri_flags;
+
+    attrip->attri_name = name_value;
+    attrip->attri_value = &name_value[attr->xattri_name_len];
+    attrip->attri_name_len = attr->xattri_name_len;
+    attrip->attri_value_len = attr->xattri_value_len;
+}
+
+/* Get an ATTRI. */
+static struct xfs_log_item *
+xfs_attr_create_intent(
+    struct xfs_trans        *tp,
+    struct list_head        *items,
+    unsigned int            count,
+    bool                sort)
+{
+    struct xfs_mount        *mp = tp->t_mountp;
+    struct xfs_attri_log_item    *attrip = xfs_attri_init(mp);
+    struct xfs_attr_item        *attr;
+
+    ASSERT(count == 1);
+
+    xfs_trans_add_item(tp, &attrip->attri_item);
+    list_for_each_entry(attr, items, xattri_list)
+        xfs_attr_log_item(tp, attrip, attr);
+    return &attrip->attri_item;
+}
+
+/* Process an attr. */
+STATIC int
+xfs_attr_finish_item(
+    struct xfs_trans        *tp,
+    struct xfs_log_item        *done,
+    struct list_head        *item,
+    struct xfs_btree_cur        **state)
+{
+    struct xfs_attr_item        *attr;
+    int                error;
+    int                local;
+    struct xfs_delattr_context    *dac;
+    struct xfs_da_args        *args;
+    struct xfs_attrd_log_item    *attrdp;
+    struct xfs_attri_log_item    *attrip;
+
+    attr = container_of(item, struct xfs_attr_item, xattri_list);
+    dac = &attr->xattri_dac;
+    args = &attr->xattri_args;
+
+    if (!(dac->flags & XFS_DAC_DELAYED_OP_INIT)) {
+        /* Only need to initialize args context once */
+        memset(args, 0, sizeof(*args));
+        args->geo = attr->xattri_ip->i_mount->m_attr_geo;
+        args->whichfork = XFS_ATTR_FORK;
+        args->dp = attr->xattri_ip;
+        args->name = ((const unsigned char *)attr) +
+                  sizeof(struct xfs_attr_item);
+        args->namelen = attr->xattri_name_len;
+        args->attr_filter = attr->xattri_flags;
+        args->hashval = xfs_da_hashname(args->name, args->namelen);
+        args->value = (void *)&args->name[attr->xattri_name_len];
+        args->valuelen = attr->xattri_value_len;
+        args->op_flags = XFS_DA_OP_OKNOENT;
+
+        /* must match existing transaction block res */
+        args->total = xfs_attr_calc_size(args, &local);
+
+        memset(dac, 0, sizeof(struct xfs_delattr_context));
+        dac->flags |= XFS_DAC_DELAYED_OP_INIT;
+        dac->da_args = args;
+    }
+
+    /*
+     * Always reset trans after EAGAIN cycle
+     * since the transaction is new
+     */
+    args->trans = tp;
+
+    error = xfs_trans_attr(dac, ATTRD_ITEM(done), &dac->leaf_bp,
+                   attr->xattri_op_flags);
+    /*
+     * The attrip refers to xfs_attr_item memory to log the name and value
+     * with the intent item. This already occurred when the intent was
+     * committed so these fields are no longer accessed. Clear them out of
+     * caution since we're about to free the xfs_attr_item.
+     */
+    attrdp = (struct xfs_attrd_log_item *)done;
+    attrip = attrdp->attrd_attrip;
+    attrip->attri_name = NULL;
+    attrip->attri_value = NULL;
+
+    if (error != -EAGAIN)
+        kmem_free(attr);
+
+    return error;
+}
+
+/* Abort all pending ATTRs. */
+STATIC void
+xfs_attr_abort_intent(
+    struct xfs_log_item        *intent)
+{
+    xfs_attri_release(ATTRI_ITEM(intent));
+}
+
+/* Cancel an attr */
+STATIC void
+xfs_attr_cancel_item(
+    struct list_head        *item)
+{
+    struct xfs_attr_item        *attr;
+
+    attr = container_of(item, struct xfs_attr_item, xattri_list);
+    kmem_free(attr);
+}
+
+/*
+ * The ATTRI is logged only once and cannot be moved in the log, so simply
+ * return the lsn at which it's been logged.
+ */
+STATIC xfs_lsn_t
+xfs_attri_item_committed(
+    struct xfs_log_item    *lip,
+    xfs_lsn_t        lsn)
+{
+    return lsn;
+}
+
+STATIC void
+xfs_attri_item_committing(
+    struct xfs_log_item    *lip,
+    xfs_lsn_t        lsn)
+{
+}
+
+STATIC bool
+xfs_attri_item_match(
+    struct xfs_log_item    *lip,
+    uint64_t        intent_id)
+{
+    return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id;
+}
+
+/*
+ * When the attrd item is committed to disk, all we need to do is delete our + * reference to our partner attri item and then free ourselves. Since we're + * freeing ourselves we must return -1 to keep the transaction code from
+ * further referencing this item.
+ */
+STATIC xfs_lsn_t
+xfs_attrd_item_committed(
+    struct xfs_log_item    *lip,
+    xfs_lsn_t        lsn)
+{
+    struct xfs_attrd_log_item    *attrdp = ATTRD_ITEM(lip);
+
+    /*
+     * Drop the ATTRI reference regardless of whether the ATTRD has been +     * aborted. Once the ATTRD transaction is constructed, it is the sole +     * responsibility of the ATTRD to release the ATTRI (even if the ATTRI
+     * is aborted due to log I/O error).
+     */
+    xfs_attri_release(attrdp->attrd_attrip);
+    xfs_attrd_item_free(attrdp);
+
+    return NULLCOMMITLSN;
+}
+
+STATIC void
+xfs_attrd_item_committing(
+    struct xfs_log_item    *lip,
+    xfs_lsn_t        lsn)
+{
+}
+
+
+/*
+ * Allocate and initialize an attrd item
+ */
+struct xfs_attrd_log_item *
+xfs_attrd_init(
+    struct xfs_mount        *mp,
+    struct xfs_attri_log_item    *attrip)
+
+{
+    struct xfs_attrd_log_item    *attrdp;
+    uint                size;
+
+    size = (uint)(sizeof(struct xfs_attrd_log_item));
+    attrdp = kmem_zalloc(size, 0);
+    memset(attrdp, 0, size);
+
+    xfs_log_item_init(mp, &attrdp->attrd_item, XFS_LI_ATTRD,
+              &xfs_attrd_item_ops);
+    attrdp->attrd_attrip = attrip;
+    attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id;
+
+    return attrdp;
+}
+
+/*
+ * This routine is called to allocate an "attr free done" log item.
+ */
+struct xfs_attrd_log_item *
+xfs_trans_get_attrd(struct xfs_trans        *tp,
+          struct xfs_attri_log_item    *attrip)
+{
+    struct xfs_attrd_log_item        *attrdp;
+
+    ASSERT(tp != NULL);
+
+    attrdp = xfs_attrd_init(tp->t_mountp, attrip);
+    ASSERT(attrdp != NULL);
+
+    xfs_trans_add_item(tp, &attrdp->attrd_item);
+    return attrdp;
+}
+
+static const struct xfs_item_ops xfs_attrd_item_ops = {
+    .iop_size    = xfs_attrd_item_size,
+    .iop_format    = xfs_attrd_item_format,
+    .iop_release    = xfs_attrd_item_release,
+    .iop_committing    = xfs_attrd_item_committing,
+    .iop_committed    = xfs_attrd_item_committed,
+};
+
+
+/* Get an ATTRD so we can process all the attrs. */
+static struct xfs_log_item *
+xfs_attr_create_done(
+    struct xfs_trans        *tp,
+    struct xfs_log_item        *intent,
+    unsigned int            count)
+{
+    return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item;
+}
+
+const struct xfs_defer_op_type xfs_attr_defer_type = {
+    .max_items    = 1,
+    .create_intent    = xfs_attr_create_intent,
+    .abort_intent    = xfs_attr_abort_intent,
+    .create_done    = xfs_attr_create_done,
+    .finish_item    = xfs_attr_finish_item,
+    .cancel_item    = xfs_attr_cancel_item,
+};
+
+/*
+ * Process an attr intent item that was recovered from the log.  We need to
+ * delete the attr that it describes.
+ */
+STATIC int
+xfs_attri_item_recover(
+    struct xfs_log_item        *lip,
+    struct xfs_trans        *parent_tp)
+{
+    struct xfs_attri_log_item    *attrip = ATTRI_ITEM(lip);
+    struct xfs_mount        *mp = parent_tp->t_mountp;
+    struct xfs_inode        *ip;
+    struct xfs_attrd_log_item    *attrdp;
+    struct xfs_da_args        args;
+    struct xfs_attri_log_format    *attrp;
+    struct xfs_trans_res        tres;
+    int                local;
+    int                error, err2 = 0;
+    int                rsvd = 0;
+    struct xfs_buf            *leaf_bp = NULL;
+    struct xfs_delattr_context    dac = {
+        .da_args    = &args,
+    };
+
+    /*
+     * First check the validity of the attr described by the ATTRI. If any
+     * are bad, then assume that all are bad and just toss the ATTRI.
+     */
+    attrp = &attrip->attri_format;
+    if (!(attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_SET ||
+          attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_REMOVE) ||
+        (attrp->alfi_value_len > XATTR_SIZE_MAX) ||
+        (attrp->alfi_name_len > XATTR_NAME_MAX) ||
+        (attrp->alfi_name_len == 0)) {
+        /*
+         * This will pull the ATTRI from the AIL and free the memory
+         * associated with it.
+         */
+        xfs_attri_release(attrip);
+        return -EFSCORRUPTED;
+    }
+
+    error = xfs_iget(mp, 0, attrp->alfi_ino, 0, 0, &ip);
+    if (error)
+        return error;
+
+    memset(&args, 0, sizeof(args));
+    args.geo = ip->i_mount->m_attr_geo;
+    args.whichfork = XFS_ATTR_FORK;
+    args.dp = ip;
+    args.name = attrip->attri_name;
+    args.namelen = attrp->alfi_name_len;
+    args.attr_filter = attrp->alfi_attr_flags;
+    args.hashval = xfs_da_hashname(attrip->attri_name,
+                    attrp->alfi_name_len);
+    args.value = attrip->attri_value;
+    args.valuelen = attrp->alfi_value_len;
+    args.op_flags = XFS_DA_OP_OKNOENT;
+    args.total = xfs_attr_calc_size(&args, &local);
+
+    tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+            M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
+    tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+    tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+
+    error = xfs_trans_alloc(mp, &tres, args.total,  0,
+                rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
+    if (error)
+        goto out_rele;
+    attrdp = xfs_trans_get_attrd(args.trans, attrip);
+
+    xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+    xfs_trans_ijoin(args.trans, ip, 0);
+
+    do {
+        error = xfs_trans_attr(&dac, attrdp, &leaf_bp,
+                       attrp->alfi_op_flags);
+        if (error && error != -EAGAIN)
+            goto abort_error;
+
+        xfs_trans_log_inode(args.trans, ip,
+                XFS_ILOG_CORE | XFS_ILOG_ADATA);
+
+        err2 = xfs_trans_roll(&args.trans);
+        if (err2) {
+            error = err2;
+            goto abort_error;
+        }
+
+        /* Rejoin inode and leaf if needed */
+        xfs_trans_ijoin(args.trans, ip, 0);
+        if (leaf_bp) {
+            xfs_trans_bjoin(args.trans, leaf_bp);
+            xfs_trans_bhold(args.trans, leaf_bp);
+        }
+
+    } while (error == -EAGAIN);
+
+    error = xfs_trans_commit(args.trans);
+    if (error)
+        goto abort_error;
+
+    xfs_iunlock(ip, XFS_ILOCK_EXCL);
+    xfs_irele(ip);
+    return error;
+
+abort_error:
+    xfs_trans_cancel(args.trans);
+    xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_rele:
+    xfs_irele(ip);
+    return error;
+}
+
+static const struct xfs_item_ops xfs_attri_item_ops = {
+    .iop_size    = xfs_attri_item_size,
+    .iop_format    = xfs_attri_item_format,
+    .iop_unpin    = xfs_attri_item_unpin,
+    .iop_committed    = xfs_attri_item_committed,
+    .iop_committing = xfs_attri_item_committing,
+    .iop_release    = xfs_attri_item_release,
+    .iop_recover    = xfs_attri_item_recover,
+    .iop_match    = xfs_attri_item_match,
+};
+
+
+
+STATIC int
+xlog_recover_attri_commit_pass2(
+    struct xlog                     *log,
+    struct list_head        *buffer_list,
+    struct xlog_recover_item        *item,
+    xfs_lsn_t                       lsn)
+{
+    int                             error;
+    struct xfs_mount                *mp = log->l_mp;
+    struct xfs_attri_log_item       *attrip;
+    struct xfs_attri_log_format     *attri_formatp;
+    char                *name = NULL;
+    char                *value = NULL;
+    int                region = 0;
+
+    attri_formatp = item->ri_buf[region].i_addr;
+
+    attrip = xfs_attri_init(mp);
+    error = xfs_attri_copy_format(&item->ri_buf[region],
+                      &attrip->attri_format);
+    if (error) {
+        xfs_attri_item_free(attrip);
+        return error;
+    }
+
+    attrip->attri_name_len = attri_formatp->alfi_name_len;
+    attrip->attri_value_len = attri_formatp->alfi_value_len;
+    attrip = kmem_realloc(attrip, sizeof(struct xfs_attri_log_item) +
+                  attrip->attri_name_len + attrip->attri_value_len,
+                  0);
+
+    ASSERT(attrip->attri_name_len > 0);
+    region++;
+    name = ((char *)attrip) + sizeof(struct xfs_attri_log_item);
+    memcpy(name, item->ri_buf[region].i_addr,
+           attrip->attri_name_len);
+    attrip->attri_name = name;
+
+    if (attrip->attri_value_len > 0) {
+        region++;
+        value = ((char *)attrip) + sizeof(struct xfs_attri_log_item) +
+            attrip->attri_name_len;
+        memcpy(value, item->ri_buf[region].i_addr,
+            attrip->attri_value_len);
+        attrip->attri_value = value;
+    }
+
+    /*
+     * The ATTRI has two references. One for the ATTRD and one for ATTRI to
+     * ensure it makes it into the AIL. Insert the ATTRI into the AIL
+     * directly and drop the ATTRI reference. Note that
+     * xfs_trans_ail_update() drops the AIL lock.
+     */
+    xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn);
+    xfs_attri_release(attrip);
+    return 0;
+}
+
+const struct xlog_recover_item_ops xlog_attri_item_ops = {
+    .item_type    = XFS_LI_ATTRI,
+    .commit_pass2    = xlog_recover_attri_commit_pass2,
+};
+
+/*
+ * This routine is called when an ATTRD format structure is found in a committed + * transaction in the log. Its purpose is to cancel the corresponding ATTRI if + * it was still in the log. To do this it searches the AIL for the ATTRI with + * an id equal to that in the ATTRD format structure. If we find it we drop + * the ATTRD reference, which removes the ATTRI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_attrd_commit_pass2(
+    struct xlog            *log,
+    struct list_head        *buffer_list,
+    struct xlog_recover_item    *item,
+    xfs_lsn_t            lsn)
+{
+    struct xfs_attrd_log_format    *attrd_formatp;
+
+    attrd_formatp = item->ri_buf[0].i_addr;
+    ASSERT((item->ri_buf[0].i_len ==
+                (sizeof(struct xfs_attrd_log_format))));
+
+    xlog_recover_release_intent(log, XFS_LI_ATTRI,
+                    attrd_formatp->alfd_alf_id);
+    return 0;
+}
+
+const struct xlog_recover_item_ops xlog_attrd_item_ops = {
+    .item_type    = XFS_LI_ATTRD,
+    .commit_pass2    = xlog_recover_attrd_commit_pass2,
+};
diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h
new file mode 100644
index 0000000..7dd2572
--- /dev/null
+++ b/fs/xfs/xfs_attr_item.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Allison Collins <allison.henderson@xxxxxxxxxx>
+ */
+#ifndef    __XFS_ATTR_ITEM_H__
+#define    __XFS_ATTR_ITEM_H__
+
+/* kernel only ATTRI/ATTRD definitions */
+
+struct xfs_mount;
+struct kmem_zone;
+
+/*
+ * Define ATTR flag bits. Manipulated by set/clear/test_bit operators.
+ */
+#define    XFS_ATTRI_RECOVERED    1
+
+
+/* iovec length must be 32-bit aligned */
+#define ATTR_NVEC_SIZE(size) (size == sizeof(int32_t) ? sizeof(int32_t) : \
+                size + sizeof(int32_t) - \
+                (size % sizeof(int32_t)))
+
+/*
+ * This is the "attr intention" log item.  It is used to log the fact that some + * attribute operations need to be processed.  An operation is currently either + * a set or remove.  Set or remove operations are described by the xfs_attr_item + * which may be logged to this intent.  Intents are used in conjunction with the
+ * "attr done" log item described below.
+ *
+ * The ATTRI is reference counted so that it is not freed prior to both the + * ATTRI and ATTRD being committed and unpinned. This ensures the ATTRI is
+ * inserted into the AIL even in the event of out of order ATTRI/ATTRD
+ * processing. In other words, an ATTRI is born with two references:
+ *
+ *      1.) an ATTRI held reference to track ATTRI AIL insertion
+ *      2.) an ATTRD held reference to track ATTRD commit
+ *
+ * On allocation, both references are the responsibility of the caller. Once the + * ATTRI is added to and dirtied in a transaction, ownership of reference one + * transfers to the transaction. The reference is dropped once the ATTRI is + * inserted to the AIL or in the event of failure along the way (e.g., commit + * failure, log I/O error, etc.). Note that the caller remains responsible for + * the ATTRD reference under all circumstances to this point. The caller has no
+ * means to detect failure once the transaction is committed, however.
+ * Therefore, an ATTRD is required after this point, even in the event of
+ * unrelated failure.
+ *
+ * Once an ATTRD is allocated and dirtied in a transaction, reference two + * transfers to the transaction. The ATTRD reference is dropped once it reaches + * the unpin handler. Similar to the ATTRI, the reference also drops in the + * event of commit failure or log I/O errors. Note that the ATTRD is not + * inserted in the AIL, so at this point both the ATTRI and ATTRD are freed.
+ */
+struct xfs_attri_log_item {
+    struct xfs_log_item        attri_item;
+    atomic_t            attri_refcount;
+    int                attri_name_len;
+    void                *attri_name;
+    int                attri_value_len;
+    void                *attri_value;
+    struct xfs_attri_log_format    attri_format;
+};
+
+/*
+ * This is the "attr done" log item.  It is used to log the fact that some attrs
+ * earlier mentioned in an attri item have been freed.
+ */
+struct xfs_attrd_log_item {
+    struct xfs_attri_log_item    *attrd_attrip;
+    struct xfs_log_item        attrd_item;
+    struct xfs_attrd_log_format    attrd_format;
+};
+
+#endif    /* __XFS_ATTR_ITEM_H__ */
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 50f922c..166b680 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -15,6 +15,7 @@
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_bmap.h"
+#include "xfs_da_btree.h"
  #include "xfs_attr.h"
  #include "xfs_attr_sf.h"
  #include "xfs_attr_leaf.h"
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6f22a66..edc05af 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -15,6 +15,8 @@
  #include "xfs_iwalk.h"
  #include "xfs_itable.h"
  #include "xfs_error.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
  #include "xfs_attr.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c1771e7..62e1534 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -17,6 +17,8 @@
  #include "xfs_itable.h"
  #include "xfs_fsops.h"
  #include "xfs_rtalloc.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
  #include "xfs_attr.h"
  #include "xfs_ioctl.h"
  #include "xfs_ioctl32.h"
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 80a13c8..fe60da1 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -13,6 +13,8 @@
  #include "xfs_inode.h"
  #include "xfs_acl.h"
  #include "xfs_quota.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
  #include "xfs_attr.h"
  #include "xfs_trans.h"
  #include "xfs_trace.h"
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad0c69ee..6405ce33 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1975,6 +1975,10 @@ xlog_print_tic_res(
          REG_TYPE_STR(CUD_FORMAT, "cud_format"),
          REG_TYPE_STR(BUI_FORMAT, "bui_format"),
          REG_TYPE_STR(BUD_FORMAT, "bud_format"),
+        REG_TYPE_STR(ATTRI_FORMAT, "attri_format"),
+        REG_TYPE_STR(ATTRD_FORMAT, "attrd_format"),
+        REG_TYPE_STR(ATTR_NAME, "attr_name"),
+        REG_TYPE_STR(ATTR_VALUE, "attr_value"),
      };
      BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1);
  #undef REG_TYPE_STR
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e2ec91b..ec31db0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1811,6 +1811,8 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
      &xlog_cud_item_ops,
      &xlog_bui_item_ops,
      &xlog_bud_item_ops,
+    &xlog_attri_item_ops,
+    &xlog_attrd_item_ops,
  };
  static const struct xlog_recover_item_ops *
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 5f04d8a..0597a04 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -126,6 +126,8 @@ xfs_check_ondisk_structs(void)
      XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format,    56);
      XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat,    20);
      XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header,        16);
+    XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format,    40);
+    XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format,    16);
      /*
       * The v5 superblock format extended several v4 header structures with
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index bca48b3..9b0c790 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -10,6 +10,7 @@
  #include "xfs_log_format.h"
  #include "xfs_da_format.h"
  #include "xfs_inode.h"
+#include "xfs_da_btree.h"
  #include "xfs_attr.h"
  #include "xfs_acl.h"
  #include "xfs_da_btree.h"
--
2.7.4





[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux