Andreas, Emoly, I'd appreciate your thoughts; what do you think about Tahsin's patch. Could you give it a try on a file system created with Lustre and let me know how it works for you? Many thanks!! - Ted On Mon, Jul 24, 2017 at 01:13:08PM -0700, Tahsin Erdogan wrote: > Original Lustre ea_inode feature did not have ref counts on xattr inodes > because there was always one parent that referenced it. New > implementation expects ref count to be initialized which is not true for > Lustre case. Handle this by detecting Lustre created xattr inode and set > its ref count to 1. > > The quota handling of xattr inodes have also changed with deduplication > support. New implementation manually manages quotas to support sharing > across multiple users. A consequence is that, a referencing inode > incorporates the blocks of xattr inode into its own i_block field. > > We need to know how a xattr inode was created so that we can reverse the > block charges during reference removal. This is handled by introducing a > EXT4_STATE_LUSTRE_EA_INODE flag. The flag is set on a xattr inode if > inode appears to have been created by Lustre. During xattr inode reference > removal, the manual quota uncharge is skipped if the flag is set. > > Signed-off-by: Tahsin Erdogan <tahsin@xxxxxxxxxx> > --- > fs/ext4/ext4.h | 1 + > fs/ext4/inode.c | 8 ---- > fs/ext4/xattr.c | 141 +++++++++++++++++++++++++++++++++++++------------------- > 3 files changed, 94 insertions(+), 56 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index e9440ed605c0..21e8b1dea958 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -1569,6 +1569,7 @@ enum { > nolocking */ > EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ > EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ > + EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ > }; > > #define EXT4_INODE_BIT_FNS(name, field, offset) \ > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 70699940e20d..cebb6e60a8af 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -4897,14 +4897,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) > brelse(iloc.bh); > ext4_set_inode_flags(inode); > > - if (ei->i_flags & EXT4_EA_INODE_FL) { > - ext4_xattr_inode_set_class(inode); > - > - inode_lock(inode); > - inode->i_flags |= S_NOQUOTA; > - inode_unlock(inode); > - } > - > unlock_new_inode(inode); > return inode; > > diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c > index 949b4ea3ff58..415be4a88cc3 100644 > --- a/fs/ext4/xattr.c > +++ b/fs/ext4/xattr.c > @@ -354,8 +354,10 @@ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) > return ret; > } > > +#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec) > + > static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, > - struct inode **ea_inode) > + u32 ea_inode_hash, struct inode **ea_inode) > { > struct inode *inode; > int err; > @@ -385,6 +387,24 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, > goto error; > } > > + ext4_xattr_inode_set_class(inode); > + > + /* > + * Check whether this is an old Lustre-style xattr inode. Lustre > + * implementation does not have hash validation, rather it has a > + * backpointer from ea_inode to the parent inode. > + */ > + if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) && > + EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino && > + inode->i_generation == parent->i_generation) { > + ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE); > + ext4_xattr_inode_set_ref(inode, 1); > + } else { > + inode_lock(inode); > + inode->i_flags |= S_NOQUOTA; > + inode_unlock(inode); > + } > + > *ea_inode = inode; > return 0; > error: > @@ -417,8 +437,6 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode, > return 0; > } > > -#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec) > - > /* > * Read xattr value from the EA inode. > */ > @@ -431,7 +449,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, > int err; > > err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum), > - &ea_inode); > + le32_to_cpu(entry->e_hash), &ea_inode); > if (err) { > ea_inode = NULL; > goto out; > @@ -449,29 +467,20 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, > if (err) > goto out; > > - err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size); > - /* > - * Compatibility check for old Lustre ea_inode implementation. Old > - * version does not have hash validation, but it has a backpointer > - * from ea_inode to the parent inode. > - */ > - if (err == -EFSCORRUPTED) { > - if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino || > - ea_inode->i_generation != inode->i_generation) { > + if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) { > + err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, > + size); > + if (err) { > ext4_warning_inode(ea_inode, > "EA inode hash validation failed"); > goto out; > } > - /* Do not add ea_inode to the cache. */ > - ea_inode_cache = NULL; > - err = 0; > - } else if (err) > - goto out; > > - if (ea_inode_cache) > - mb_cache_entry_create(ea_inode_cache, GFP_NOFS, > - ext4_xattr_inode_get_hash(ea_inode), > - ea_inode->i_ino, true /* reusable */); > + if (ea_inode_cache) > + mb_cache_entry_create(ea_inode_cache, GFP_NOFS, > + ext4_xattr_inode_get_hash(ea_inode), > + ea_inode->i_ino, true /* reusable */); > + } > out: > iput(ea_inode); > return err; > @@ -838,10 +847,15 @@ static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len) > return err; > } > > -static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len) > +static void ext4_xattr_inode_free_quota(struct inode *parent, > + struct inode *ea_inode, > + size_t len) > { > - dquot_free_space_nodirty(inode, round_up_cluster(inode, len)); > - dquot_free_inode(inode); > + if (ea_inode && > + ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) > + return; > + dquot_free_space_nodirty(parent, round_up_cluster(parent, len)); > + dquot_free_inode(parent); > } > > int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, > @@ -1071,7 +1085,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent, > if (!entry->e_value_inum) > continue; > ea_ino = le32_to_cpu(entry->e_value_inum); > - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); > + err = ext4_xattr_inode_iget(parent, ea_ino, > + le32_to_cpu(entry->e_hash), > + &ea_inode); > if (err) > goto cleanup; > err = ext4_xattr_inode_inc_ref(handle, ea_inode); > @@ -1093,7 +1109,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent, > if (!entry->e_value_inum) > continue; > ea_ino = le32_to_cpu(entry->e_value_inum); > - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); > + err = ext4_xattr_inode_iget(parent, ea_ino, > + le32_to_cpu(entry->e_hash), > + &ea_inode); > if (err) { > ext4_warning(parent->i_sb, > "cleanup ea_ino %u iget error %d", ea_ino, > @@ -1131,7 +1149,9 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, > if (!entry->e_value_inum) > continue; > ea_ino = le32_to_cpu(entry->e_value_inum); > - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); > + err = ext4_xattr_inode_iget(parent, ea_ino, > + le32_to_cpu(entry->e_hash), > + &ea_inode); > if (err) > continue; > > @@ -1159,7 +1179,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, > } > > if (!skip_quota) > - ext4_xattr_inode_free_quota(parent, > + ext4_xattr_inode_free_quota(parent, ea_inode, > le32_to_cpu(entry->e_value_size)); > > /* > @@ -1591,6 +1611,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, > if (!s->not_found && here->e_value_inum) { > ret = ext4_xattr_inode_iget(inode, > le32_to_cpu(here->e_value_inum), > + le32_to_cpu(here->e_hash), > &old_ea_inode); > if (ret) { > old_ea_inode = NULL; > @@ -1609,7 +1630,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, > &new_ea_inode); > if (ret) { > new_ea_inode = NULL; > - ext4_xattr_inode_free_quota(inode, i->value_len); > + ext4_xattr_inode_free_quota(inode, NULL, i->value_len); > goto out; > } > } > @@ -1628,13 +1649,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, > ext4_warning_inode(new_ea_inode, > "dec ref new_ea_inode err=%d", > err); > - ext4_xattr_inode_free_quota(inode, > + ext4_xattr_inode_free_quota(inode, new_ea_inode, > i->value_len); > } > goto out; > } > > - ext4_xattr_inode_free_quota(inode, > + ext4_xattr_inode_free_quota(inode, old_ea_inode, > le32_to_cpu(here->e_value_size)); > } > > @@ -1803,8 +1824,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, > struct mb_cache_entry *ce = NULL; > int error = 0; > struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); > - struct inode *ea_inode = NULL; > - size_t old_ea_inode_size = 0; > + struct inode *ea_inode = NULL, *tmp_inode; > + size_t old_ea_inode_quota = 0; > + unsigned int ea_ino; > + > > #define header(x) ((struct ext4_xattr_header *)(x)) > > @@ -1866,12 +1889,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, > * like it has an empty value. > */ > if (!s->not_found && s->here->e_value_inum) { > - /* > - * Defer quota free call for previous inode > - * until success is guaranteed. > - */ > - old_ea_inode_size = le32_to_cpu( > + ea_ino = le32_to_cpu(s->here->e_value_inum); > + error = ext4_xattr_inode_iget(inode, ea_ino, > + le32_to_cpu(s->here->e_hash), > + &tmp_inode); > + if (error) > + goto cleanup; > + > + if (!ext4_test_inode_state(tmp_inode, > + EXT4_STATE_LUSTRE_EA_INODE)) { > + /* > + * Defer quota free call for previous > + * inode until success is guaranteed. > + */ > + old_ea_inode_quota = le32_to_cpu( > s->here->e_value_size); > + } > + iput(tmp_inode); > + > s->here->e_value_inum = 0; > s->here->e_value_size = 0; > } > @@ -1898,8 +1933,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, > goto cleanup; > > if (i->value && s->here->e_value_inum) { > - unsigned int ea_ino; > - > /* > * A ref count on ea_inode has been taken as part of the call to > * ext4_xattr_set_entry() above. We would like to drop this > @@ -1907,7 +1940,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, > * initialized and has its own ref count on the ea_inode. > */ > ea_ino = le32_to_cpu(s->here->e_value_inum); > - error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode); > + error = ext4_xattr_inode_iget(inode, ea_ino, > + le32_to_cpu(s->here->e_hash), > + &ea_inode); > if (error) { > ea_inode = NULL; > goto cleanup; > @@ -2056,8 +2091,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, > } > } > > - if (old_ea_inode_size) > - ext4_xattr_inode_free_quota(inode, old_ea_inode_size); > + if (old_ea_inode_quota) > + ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota); > > /* Update the inode. */ > EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; > @@ -2084,7 +2119,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, > > /* If there was an error, revert the quota charge. */ > if (error) > - ext4_xattr_inode_free_quota(inode, > + ext4_xattr_inode_free_quota(inode, ea_inode, > i_size_read(ea_inode)); > iput(ea_inode); > } > @@ -2807,6 +2842,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, > struct ext4_xattr_ibody_header *header; > struct ext4_iloc iloc = { .bh = NULL }; > struct ext4_xattr_entry *entry; > + struct inode *ea_inode; > int error; > > error = ext4_xattr_ensure_credits(handle, inode, extra_credits, > @@ -2861,10 +2897,19 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, > > if (ext4_has_feature_ea_inode(inode->i_sb)) { > for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); > - entry = EXT4_XATTR_NEXT(entry)) > - if (entry->e_value_inum) > - ext4_xattr_inode_free_quota(inode, > + entry = EXT4_XATTR_NEXT(entry)) { > + if (!entry->e_value_inum) > + continue; > + error = ext4_xattr_inode_iget(inode, > + le32_to_cpu(entry->e_value_inum), > + le32_to_cpu(entry->e_hash), > + &ea_inode); > + if (error) > + continue; > + ext4_xattr_inode_free_quota(inode, ea_inode, > le32_to_cpu(entry->e_value_size)); > + iput(ea_inode); > + } > > } > > -- > 2.14.0.rc0.284.gd933b75aa4-goog >