Re: [PATCH v2 26/28] ext4: cleanup transaction restarts during inode deletion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Jun 14, 2017, at 8:17 AM, Tahsin Erdogan <tahsin@xxxxxxxxxx> wrote:
> 
> During inode deletion, journal credits that will be needed are hard to
> determine, that is why we have journal extend/restart calls in several
> places. Whenever a transaction is restarted, filesystem must be in a
> consistent state because there is no atomicity guarantee beyond a
> restart call.
> 
> Add ext4_xattr_ensure_credits() helper function which takes care of
> journal extend/restart logic. It also handles getting jbd2 write access
> and dirty metadata calls. This function is called at every iteration of
> handling an ea_inode reference.

Another option that might be less complex is to just add the xattr inodes
to the orphan list in the main transaction (which should be a fixed number
of credits), and then truncate/unlink the xattr inodes after the main
transaction has completed rather than making the transactions arbitrarily
large.  At one point we even had a separate unlink thread to handle this
in the background to reduce the unlink latency for very large files, which
also avoids issues with nested transactions.

Cheers, Andreas

> Signed-off-by: Tahsin Erdogan <tahsin@xxxxxxxxxx>
> ---
> v2: made ext4_xattr_ensure_credits() static
> 
> fs/ext4/inode.c |  66 ++++-----------
> fs/ext4/xattr.c | 257 ++++++++++++++++++++++++++++++++++++--------------------
> fs/ext4/xattr.h |   3 +-
> 3 files changed, 183 insertions(+), 143 deletions(-)
> 
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cf91532765a4..4d6936f0d8a4 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -239,7 +239,11 @@ void ext4_evict_inode(struct inode *inode)
> 	 */
> 	sb_start_intwrite(inode->i_sb);
> 
> -	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
> +	if (!IS_NOQUOTA(inode))
> +		extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
> +
> +	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
> +				 ext4_blocks_for_truncate(inode)+extra_credits);
> 	if (IS_ERR(handle)) {
> 		ext4_std_error(inode->i_sb, PTR_ERR(handle));
> 		/*
> @@ -251,36 +255,9 @@ void ext4_evict_inode(struct inode *inode)
> 		sb_end_intwrite(inode->i_sb);
> 		goto no_delete;
> 	}
> +
> 	if (IS_SYNC(inode))
> 		ext4_handle_sync(handle);
> -
> -	/*
> -	 * Delete xattr inode before deleting the main inode.
> -	 */
> -	err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array);
> -	if (err) {
> -		ext4_warning(inode->i_sb,
> -			     "couldn't delete inode's xattr (err %d)", err);
> -		goto stop_handle;
> -	}
> -
> -	if (!IS_NOQUOTA(inode))
> -		extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
> -
> -	if (!ext4_handle_has_enough_credits(handle,
> -			ext4_blocks_for_truncate(inode) + extra_credits)) {
> -		err = ext4_journal_extend(handle,
> -			ext4_blocks_for_truncate(inode) + extra_credits);
> -		if (err > 0)
> -			err = ext4_journal_restart(handle,
> -			ext4_blocks_for_truncate(inode) + extra_credits);
> -		if (err != 0) {
> -			ext4_warning(inode->i_sb,
> -				     "couldn't extend journal (err %d)", err);
> -			goto stop_handle;
> -		}
> -	}
> -
> 	inode->i_size = 0;
> 	err = ext4_mark_inode_dirty(handle, inode);
> 	if (err) {
> @@ -298,25 +275,17 @@ void ext4_evict_inode(struct inode *inode)
> 		}
> 	}
> 
> -	/*
> -	 * ext4_ext_truncate() doesn't reserve any slop when it
> -	 * restarts journal transactions; therefore there may not be
> -	 * enough credits left in the handle to remove the inode from
> -	 * the orphan list and set the dtime field.
> -	 */
> -	if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
> -		err = ext4_journal_extend(handle, extra_credits);
> -		if (err > 0)
> -			err = ext4_journal_restart(handle, extra_credits);
> -		if (err != 0) {
> -			ext4_warning(inode->i_sb,
> -				     "couldn't extend journal (err %d)", err);
> -		stop_handle:
> -			ext4_journal_stop(handle);
> -			ext4_orphan_del(NULL, inode);
> -			sb_end_intwrite(inode->i_sb);
> -			goto no_delete;
> -		}
> +	/* Remove xattr references. */
> +	err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
> +				      extra_credits);
> +	if (err) {
> +		ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
> +	stop_handle:
> +		ext4_journal_stop(handle);
> +		ext4_orphan_del(NULL, inode);
> +		sb_end_intwrite(inode->i_sb);
> +		ext4_xattr_inode_array_free(ea_inode_array);
> +		goto no_delete;
> 	}
> 
> 	/*
> @@ -342,7 +311,6 @@ void ext4_evict_inode(struct inode *inode)
> 		ext4_clear_inode(inode);
> 	else
> 		ext4_free_inode(handle, inode);
> -
> 	ext4_journal_stop(handle);
> 	sb_end_intwrite(inode->i_sb);
> 	ext4_xattr_inode_array_free(ea_inode_array);
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 3ee7e2f68476..abc7d5f84e5f 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -108,6 +108,10 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
> #define EXT4_GET_MB_CACHE(inode)	(((struct ext4_sb_info *) \
> 				inode->i_sb->s_fs_info)->s_mb_cache)
> 
> +static int
> +ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
> +			struct inode *inode);
> +
> #ifdef CONFIG_LOCKDEP
> void ext4_xattr_inode_set_class(struct inode *ea_inode)
> {
> @@ -653,6 +657,127 @@ static void ext4_xattr_update_super_block(handle_t *handle,
> 	}
> }
> 
> +static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
> +				     int credits, struct buffer_head *bh,
> +				     bool dirty, bool block_csum)
> +{
> +	int error;
> +
> +	if (!ext4_handle_valid(handle))
> +		return 0;
> +
> +	if (handle->h_buffer_credits >= credits)
> +		return 0;
> +
> +	error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
> +	if (!error)
> +		return 0;
> +	if (error < 0) {
> +		ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
> +		return error;
> +	}
> +
> +	if (bh && dirty) {
> +		if (block_csum)
> +			ext4_xattr_block_csum_set(inode, bh);
> +		error = ext4_handle_dirty_metadata(handle, NULL, bh);
> +		if (error) {
> +			ext4_warning(inode->i_sb, "Handle metadata (error %d)",
> +				     error);
> +			return error;
> +		}
> +	}
> +
> +	error = ext4_journal_restart(handle, credits);
> +	if (error) {
> +		ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
> +		return error;
> +	}
> +
> +	if (bh) {
> +		error = ext4_journal_get_write_access(handle, bh);
> +		if (error) {
> +			ext4_warning(inode->i_sb,
> +				     "Get write access failed (error %d)",
> +				     error);
> +			return error;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static void
> +ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent,
> +			    struct buffer_head *bh,
> +			    struct ext4_xattr_entry *first, bool block_csum,
> +			    struct ext4_xattr_inode_array **ea_inode_array,
> +			    int extra_credits)
> +{
> +	struct inode *ea_inode;
> +	struct ext4_xattr_entry *entry;
> +	bool dirty = false;
> +	unsigned int ea_ino;
> +	int err;
> +	int credits;
> +
> +	/* One credit for dec ref on ea_inode, one for orphan list addition, */
> +	credits = 2 + extra_credits;
> +
> +	for (entry = first; !IS_LAST_ENTRY(entry);
> +	     entry = EXT4_XATTR_NEXT(entry)) {
> +		if (!entry->e_value_inum)
> +			continue;
> +		ea_ino = le32_to_cpu(entry->e_value_inum);
> +		err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> +		if (err)
> +			continue;
> +
> +		err = ext4_expand_inode_array(ea_inode_array, ea_inode);
> +		if (err) {
> +			ext4_warning_inode(ea_inode,
> +					   "Expand inode array err=%d", err);
> +			iput(ea_inode);
> +			continue;
> +		}
> +
> +		err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
> +						dirty, block_csum);
> +		if (err) {
> +			ext4_warning_inode(ea_inode, "Ensure credits err=%d",
> +					   err);
> +			continue;
> +		}
> +
> +		inode_lock(ea_inode);
> +		clear_nlink(ea_inode);
> +		ext4_orphan_add(handle, ea_inode);
> +		inode_unlock(ea_inode);
> +
> +		/*
> +		 * Forget about ea_inode within the same transaction that decrements the ref
> +		 * count. This avoids duplicate decrements in case the rest of the work
> +		 * spills over to subsequent transactions.
> +		 */
> +		entry->e_value_inum = 0;
> +		entry->e_value_size = 0;
> +
> +		dirty = true;
> +	}
> +
> +	if (dirty) {
> +		/*
> +		 * Note that we are deliberately skipping csum calculation for
> +		 * the final update because we do not expect any journal
> +		 * restarts until xattr block is freed.
> +		 */
> +
> +		err = ext4_handle_dirty_metadata(handle, NULL, bh);
> +		if (err)
> +			ext4_warning_inode(parent,
> +					   "handle dirty metadata err=%d", err);
> +	}
> +}
> +
> /*
>  * Release the xattr block BH: If the reference count is > 1, decrement it;
>  * otherwise free the block.
> @@ -1985,42 +2110,6 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
> 	return 0;
> }
> 
> -/**
> - * Add xattr inode to orphan list
> - */
> -static int
> -ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
> -			    struct ext4_xattr_inode_array *ea_inode_array)
> -{
> -	int idx = 0, error = 0;
> -	struct inode *ea_inode;
> -
> -	if (ea_inode_array == NULL)
> -		return 0;
> -
> -	for (; idx < ea_inode_array->count; ++idx) {
> -		if (!ext4_handle_has_enough_credits(handle, credits)) {
> -			error = ext4_journal_extend(handle, credits);
> -			if (error > 0)
> -				error = ext4_journal_restart(handle, credits);
> -
> -			if (error != 0) {
> -				ext4_warning(inode->i_sb,
> -					"couldn't extend journal "
> -					"(err %d)", error);
> -				return error;
> -			}
> -		}
> -		ea_inode = ea_inode_array->inodes[idx];
> -		inode_lock(ea_inode);
> -		ext4_orphan_add(handle, ea_inode);
> -		inode_unlock(ea_inode);
> -		/* the inode's i_count will be released by caller */
> -	}
> -
> -	return 0;
> -}
> -
> /*
>  * ext4_xattr_delete_inode()
>  *
> @@ -2033,16 +2122,23 @@ ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
>  */
> int
> ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> -			struct ext4_xattr_inode_array **ea_inode_array)
> +			struct ext4_xattr_inode_array **ea_inode_array,
> +			int extra_credits)
> {
> 	struct buffer_head *bh = NULL;
> 	struct ext4_xattr_ibody_header *header;
> 	struct ext4_inode *raw_inode;
> -	struct ext4_iloc iloc;
> -	struct ext4_xattr_entry *entry;
> -	struct inode *ea_inode;
> -	unsigned int ea_ino;
> -	int credits = 3, error = 0;
> +	struct ext4_iloc iloc = { .bh = NULL };
> +	int error;
> +
> +	error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
> +					  NULL /* bh */,
> +					  false /* dirty */,
> +					  false /* block_csum */);
> +	if (error) {
> +		EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
> +		goto cleanup;
> +	}
> 
> 	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
> 		goto delete_external_ea;
> @@ -2050,31 +2146,20 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> 	error = ext4_get_inode_loc(inode, &iloc);
> 	if (error)
> 		goto cleanup;
> +
> +	error = ext4_journal_get_write_access(handle, iloc.bh);
> +	if (error)
> +		goto cleanup;
> +
> 	raw_inode = ext4_raw_inode(&iloc);
> 	header = IHDR(inode, raw_inode);
> -	for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
> -	     entry = EXT4_XATTR_NEXT(entry)) {
> -		if (!entry->e_value_inum)
> -			continue;
> -		ea_ino = le32_to_cpu(entry->e_value_inum);
> -		error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
> -		if (error)
> -			continue;
> -		error = ext4_expand_inode_array(ea_inode_array, ea_inode);
> -		if (error) {
> -			iput(ea_inode);
> -			brelse(iloc.bh);
> -			goto cleanup;
> -		}
> -		entry->e_value_inum = 0;
> -	}
> -	brelse(iloc.bh);
> +	ext4_xattr_inode_remove_all(handle, inode, iloc.bh, IFIRST(header),
> +				    false /* block_csum */, ea_inode_array,
> +				    extra_credits);
> 
> delete_external_ea:
> 	if (!EXT4_I(inode)->i_file_acl) {
> -		/* add xattr inode to orphan list */
> -		error = ext4_xattr_inode_orphan_add(handle, inode, credits,
> -						    *ea_inode_array);
> +		error = 0;
> 		goto cleanup;
> 	}
> 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
> @@ -2092,46 +2177,32 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> 		goto cleanup;
> 	}
> 
> -	for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
> -	     entry = EXT4_XATTR_NEXT(entry)) {
> -		if (!entry->e_value_inum)
> -			continue;
> -		ea_ino = le32_to_cpu(entry->e_value_inum);
> -		error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
> -		if (error)
> -			continue;
> -		error = ext4_expand_inode_array(ea_inode_array, ea_inode);
> -		if (error)
> -			goto cleanup;
> -		entry->e_value_inum = 0;
> -	}
> -
> -	/* add xattr inode to orphan list */
> -	error = ext4_xattr_inode_orphan_add(handle, inode, credits,
> -					*ea_inode_array);
> -	if (error)
> -		goto cleanup;
> -
> -	if (!IS_NOQUOTA(inode))
> -		credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
> -
> -	if (!ext4_handle_has_enough_credits(handle, credits)) {
> -		error = ext4_journal_extend(handle, credits);
> -		if (error > 0)
> -			error = ext4_journal_restart(handle, credits);
> +	if (ext4_has_feature_ea_inode(inode->i_sb)) {
> +		error = ext4_journal_get_write_access(handle, bh);
> 		if (error) {
> -			ext4_warning(inode->i_sb,
> -				"couldn't extend journal (err %d)", error);
> +			EXT4_ERROR_INODE(inode, "write access %llu",
> +					 EXT4_I(inode)->i_file_acl);
> 			goto cleanup;
> 		}
> +		ext4_xattr_inode_remove_all(handle, inode, bh,
> +					    BFIRST(bh),
> +					    true /* block_csum */,
> +					    ea_inode_array,
> +					    extra_credits);
> 	}
> 
> 	ext4_xattr_release_block(handle, inode, bh);
> +	/* Update i_file_acl within the same transaction that releases block. */
> 	EXT4_I(inode)->i_file_acl = 0;
> -
> +	error = ext4_mark_inode_dirty(handle, inode);
> +	if (error) {
> +		EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
> +				 error);
> +		goto cleanup;
> +	}
> cleanup:
> +	brelse(iloc.bh);
> 	brelse(bh);
> -
> 	return error;
> }
> 
> diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
> index adf761518a73..b2005a2716d9 100644
> --- a/fs/ext4/xattr.h
> +++ b/fs/ext4/xattr.h
> @@ -169,7 +169,8 @@ extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len);
> 
> extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
> extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> -				   struct ext4_xattr_inode_array **array);
> +				   struct ext4_xattr_inode_array **array,
> +				   int extra_credits);
> extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
> 
> extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
> --
> 2.13.1.508.gb3defc5cc-goog
> 


Cheers, Andreas





Attachment: signature.asc
Description: Message signed with OpenPGP


[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux