Re: [PATCH] ext4: backward compatibility support for Lustre ea_inode implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Andreas, Emoly,

I'd appreciate your thoughts; what do you think about Tahsin's patch.
Could you give it a try on a file system created with Lustre and let
me know how it works for you?

Many thanks!!

					- Ted

On Mon, Jul 24, 2017 at 01:13:08PM -0700, Tahsin Erdogan wrote:
> Original Lustre ea_inode feature did not have ref counts on xattr inodes
> because there was always one parent that referenced it. New
> implementation expects ref count to be initialized which is not true for
> Lustre case. Handle this by detecting Lustre created xattr inode and set
> its ref count to 1.
> 
> The quota handling of xattr inodes have also changed with deduplication
> support. New implementation manually manages quotas to support sharing
> across multiple users. A consequence is that, a referencing inode
> incorporates the blocks of xattr inode into its own i_block field.
> 
> We need to know how a xattr inode was created so that we can reverse the
> block charges during reference removal. This is handled by introducing a
> EXT4_STATE_LUSTRE_EA_INODE flag. The flag is set on a xattr inode if
> inode appears to have been created by Lustre. During xattr inode reference
> removal, the manual quota uncharge is skipped if the flag is set.
> 
> Signed-off-by: Tahsin Erdogan <tahsin@xxxxxxxxxx>
> ---
>  fs/ext4/ext4.h  |   1 +
>  fs/ext4/inode.c |   8 ----
>  fs/ext4/xattr.c | 141 +++++++++++++++++++++++++++++++++++++-------------------
>  3 files changed, 94 insertions(+), 56 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index e9440ed605c0..21e8b1dea958 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1569,6 +1569,7 @@ enum {
>  					   nolocking */
>  	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
>  	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
> +	EXT4_STATE_LUSTRE_EA_INODE,	/* Lustre-style ea_inode */
>  };
>  
>  #define EXT4_INODE_BIT_FNS(name, field, offset)				\
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 70699940e20d..cebb6e60a8af 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4897,14 +4897,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
>  	brelse(iloc.bh);
>  	ext4_set_inode_flags(inode);
>  
> -	if (ei->i_flags & EXT4_EA_INODE_FL) {
> -		ext4_xattr_inode_set_class(inode);
> -
> -		inode_lock(inode);
> -		inode->i_flags |= S_NOQUOTA;
> -		inode_unlock(inode);
> -	}
> -
>  	unlock_new_inode(inode);
>  	return inode;
>  
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 949b4ea3ff58..415be4a88cc3 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -354,8 +354,10 @@ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
>  	return ret;
>  }
>  
> +#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
> +
>  static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> -				 struct inode **ea_inode)
> +				 u32 ea_inode_hash, struct inode **ea_inode)
>  {
>  	struct inode *inode;
>  	int err;
> @@ -385,6 +387,24 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
>  		goto error;
>  	}
>  
> +	ext4_xattr_inode_set_class(inode);
> +
> +	/*
> +	 * Check whether this is an old Lustre-style xattr inode. Lustre
> +	 * implementation does not have hash validation, rather it has a
> +	 * backpointer from ea_inode to the parent inode.
> +	 */
> +	if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) &&
> +	    EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino &&
> +	    inode->i_generation == parent->i_generation) {
> +		ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
> +		ext4_xattr_inode_set_ref(inode, 1);
> +	} else {
> +		inode_lock(inode);
> +		inode->i_flags |= S_NOQUOTA;
> +		inode_unlock(inode);
> +	}
> +
>  	*ea_inode = inode;
>  	return 0;
>  error:
> @@ -417,8 +437,6 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
>  	return 0;
>  }
>  
> -#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
> -
>  /*
>   * Read xattr value from the EA inode.
>   */
> @@ -431,7 +449,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
>  	int err;
>  
>  	err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
> -				    &ea_inode);
> +				    le32_to_cpu(entry->e_hash), &ea_inode);
>  	if (err) {
>  		ea_inode = NULL;
>  		goto out;
> @@ -449,29 +467,20 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
>  	if (err)
>  		goto out;
>  
> -	err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size);
> -	/*
> -	 * Compatibility check for old Lustre ea_inode implementation. Old
> -	 * version does not have hash validation, but it has a backpointer
> -	 * from ea_inode to the parent inode.
> -	 */
> -	if (err == -EFSCORRUPTED) {
> -		if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
> -		    ea_inode->i_generation != inode->i_generation) {
> +	if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) {
> +		err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer,
> +						     size);
> +		if (err) {
>  			ext4_warning_inode(ea_inode,
>  					   "EA inode hash validation failed");
>  			goto out;
>  		}
> -		/* Do not add ea_inode to the cache. */
> -		ea_inode_cache = NULL;
> -		err = 0;
> -	} else if (err)
> -		goto out;
>  
> -	if (ea_inode_cache)
> -		mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
> -				      ext4_xattr_inode_get_hash(ea_inode),
> -				      ea_inode->i_ino, true /* reusable */);
> +		if (ea_inode_cache)
> +			mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
> +					ext4_xattr_inode_get_hash(ea_inode),
> +					ea_inode->i_ino, true /* reusable */);
> +	}
>  out:
>  	iput(ea_inode);
>  	return err;
> @@ -838,10 +847,15 @@ static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
>  	return err;
>  }
>  
> -static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
> +static void ext4_xattr_inode_free_quota(struct inode *parent,
> +					struct inode *ea_inode,
> +					size_t len)
>  {
> -	dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
> -	dquot_free_inode(inode);
> +	if (ea_inode &&
> +	    ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE))
> +		return;
> +	dquot_free_space_nodirty(parent, round_up_cluster(parent, len));
> +	dquot_free_inode(parent);
>  }
>  
>  int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
> @@ -1071,7 +1085,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
>  		if (!entry->e_value_inum)
>  			continue;
>  		ea_ino = le32_to_cpu(entry->e_value_inum);
> -		err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> +		err = ext4_xattr_inode_iget(parent, ea_ino,
> +					    le32_to_cpu(entry->e_hash),
> +					    &ea_inode);
>  		if (err)
>  			goto cleanup;
>  		err = ext4_xattr_inode_inc_ref(handle, ea_inode);
> @@ -1093,7 +1109,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
>  		if (!entry->e_value_inum)
>  			continue;
>  		ea_ino = le32_to_cpu(entry->e_value_inum);
> -		err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> +		err = ext4_xattr_inode_iget(parent, ea_ino,
> +					    le32_to_cpu(entry->e_hash),
> +					    &ea_inode);
>  		if (err) {
>  			ext4_warning(parent->i_sb,
>  				     "cleanup ea_ino %u iget error %d", ea_ino,
> @@ -1131,7 +1149,9 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
>  		if (!entry->e_value_inum)
>  			continue;
>  		ea_ino = le32_to_cpu(entry->e_value_inum);
> -		err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> +		err = ext4_xattr_inode_iget(parent, ea_ino,
> +					    le32_to_cpu(entry->e_hash),
> +					    &ea_inode);
>  		if (err)
>  			continue;
>  
> @@ -1159,7 +1179,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
>  		}
>  
>  		if (!skip_quota)
> -			ext4_xattr_inode_free_quota(parent,
> +			ext4_xattr_inode_free_quota(parent, ea_inode,
>  					      le32_to_cpu(entry->e_value_size));
>  
>  		/*
> @@ -1591,6 +1611,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
>  	if (!s->not_found && here->e_value_inum) {
>  		ret = ext4_xattr_inode_iget(inode,
>  					    le32_to_cpu(here->e_value_inum),
> +					    le32_to_cpu(here->e_hash),
>  					    &old_ea_inode);
>  		if (ret) {
>  			old_ea_inode = NULL;
> @@ -1609,7 +1630,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
>  						     &new_ea_inode);
>  		if (ret) {
>  			new_ea_inode = NULL;
> -			ext4_xattr_inode_free_quota(inode, i->value_len);
> +			ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
>  			goto out;
>  		}
>  	}
> @@ -1628,13 +1649,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
>  					ext4_warning_inode(new_ea_inode,
>  						  "dec ref new_ea_inode err=%d",
>  						  err);
> -				ext4_xattr_inode_free_quota(inode,
> +				ext4_xattr_inode_free_quota(inode, new_ea_inode,
>  							    i->value_len);
>  			}
>  			goto out;
>  		}
>  
> -		ext4_xattr_inode_free_quota(inode,
> +		ext4_xattr_inode_free_quota(inode, old_ea_inode,
>  					    le32_to_cpu(here->e_value_size));
>  	}
>  
> @@ -1803,8 +1824,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  	struct mb_cache_entry *ce = NULL;
>  	int error = 0;
>  	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
> -	struct inode *ea_inode = NULL;
> -	size_t old_ea_inode_size = 0;
> +	struct inode *ea_inode = NULL, *tmp_inode;
> +	size_t old_ea_inode_quota = 0;
> +	unsigned int ea_ino;
> +
>  
>  #define header(x) ((struct ext4_xattr_header *)(x))
>  
> @@ -1866,12 +1889,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  			 * like it has an empty value.
>  			 */
>  			if (!s->not_found && s->here->e_value_inum) {
> -				/*
> -				 * Defer quota free call for previous inode
> -				 * until success is guaranteed.
> -				 */
> -				old_ea_inode_size = le32_to_cpu(
> +				ea_ino = le32_to_cpu(s->here->e_value_inum);
> +				error = ext4_xattr_inode_iget(inode, ea_ino,
> +					      le32_to_cpu(s->here->e_hash),
> +					      &tmp_inode);
> +				if (error)
> +					goto cleanup;
> +
> +				if (!ext4_test_inode_state(tmp_inode,
> +						EXT4_STATE_LUSTRE_EA_INODE)) {
> +					/*
> +					 * Defer quota free call for previous
> +					 * inode until success is guaranteed.
> +					 */
> +					old_ea_inode_quota = le32_to_cpu(
>  							s->here->e_value_size);
> +				}
> +				iput(tmp_inode);
> +
>  				s->here->e_value_inum = 0;
>  				s->here->e_value_size = 0;
>  			}
> @@ -1898,8 +1933,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  		goto cleanup;
>  
>  	if (i->value && s->here->e_value_inum) {
> -		unsigned int ea_ino;
> -
>  		/*
>  		 * A ref count on ea_inode has been taken as part of the call to
>  		 * ext4_xattr_set_entry() above. We would like to drop this
> @@ -1907,7 +1940,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  		 * initialized and has its own ref count on the ea_inode.
>  		 */
>  		ea_ino = le32_to_cpu(s->here->e_value_inum);
> -		error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
> +		error = ext4_xattr_inode_iget(inode, ea_ino,
> +					      le32_to_cpu(s->here->e_hash),
> +					      &ea_inode);
>  		if (error) {
>  			ea_inode = NULL;
>  			goto cleanup;
> @@ -2056,8 +2091,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  		}
>  	}
>  
> -	if (old_ea_inode_size)
> -		ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
> +	if (old_ea_inode_quota)
> +		ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota);
>  
>  	/* Update the inode. */
>  	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
> @@ -2084,7 +2119,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  
>  		/* If there was an error, revert the quota charge. */
>  		if (error)
> -			ext4_xattr_inode_free_quota(inode,
> +			ext4_xattr_inode_free_quota(inode, ea_inode,
>  						    i_size_read(ea_inode));
>  		iput(ea_inode);
>  	}
> @@ -2807,6 +2842,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
>  	struct ext4_xattr_ibody_header *header;
>  	struct ext4_iloc iloc = { .bh = NULL };
>  	struct ext4_xattr_entry *entry;
> +	struct inode *ea_inode;
>  	int error;
>  
>  	error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
> @@ -2861,10 +2897,19 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
>  
>  		if (ext4_has_feature_ea_inode(inode->i_sb)) {
>  			for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
> -			     entry = EXT4_XATTR_NEXT(entry))
> -				if (entry->e_value_inum)
> -					ext4_xattr_inode_free_quota(inode,
> +			     entry = EXT4_XATTR_NEXT(entry)) {
> +				if (!entry->e_value_inum)
> +					continue;
> +				error = ext4_xattr_inode_iget(inode,
> +					      le32_to_cpu(entry->e_value_inum),
> +					      le32_to_cpu(entry->e_hash),
> +					      &ea_inode);
> +				if (error)
> +					continue;
> +				ext4_xattr_inode_free_quota(inode, ea_inode,
>  					      le32_to_cpu(entry->e_value_size));
> +				iput(ea_inode);
> +			}
>  
>  		}
>  
> -- 
> 2.14.0.rc0.284.gd933b75aa4-goog
> 



[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux