Re: [PATCH] Btrfs: make sure logged extents complete in the current transaction

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 18, 2014 at 05:19:41PM -0500, Josef Bacik wrote:
> Liu Bo pointed out that my previous fix would lose the generation update in the
> scenario I described.  It is actually much worse than that, we could lose the
> entire extent if we lose power right after the transaction commits.  Consider
> the following
> 
> write extent 0-4k
> log extent in log tree
> commit transaction
> 	< power fail happens here
> ordered extent completes
> 
> We would lose the 0-4k extent because it hasn't updated the actual fs tree, and
> the transaction commit will reset the log so it isn't replayed.  If we lose
> power before the transaction commit we are save, otherwise we are not.
> 
> Fix this by keeping track of all extents we logged in this transaction.  Then
> when we go to commit the transaction make sure we wait for all of those ordered
> extents to complete before proceeding.  This will make sure that if we lose
> power after the transaction commit we still have our data.  This also fixes the
> problem of the improperly updated extent generation.  Thanks,

This looks saner.

Reviewed-by: Liu Bo <bo.li.liu@xxxxxxxxxx>

thanks,
-liubo

> 
> cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Josef Bacik <jbacik@xxxxxx>
> ---
>  fs/btrfs/ordered-data.c |  6 ++++--
>  fs/btrfs/ordered-data.h |  6 +++++-
>  fs/btrfs/transaction.c  | 33 +++++++++++++++++++++++++++++++++
>  fs/btrfs/transaction.h  |  2 ++
>  fs/btrfs/tree-log.c     |  6 +++---
>  5 files changed, 47 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
> index ac734ec..7c2dd7a 100644
> --- a/fs/btrfs/ordered-data.c
> +++ b/fs/btrfs/ordered-data.c
> @@ -220,6 +220,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
>  	INIT_LIST_HEAD(&entry->work_list);
>  	init_completion(&entry->completion);
>  	INIT_LIST_HEAD(&entry->log_list);
> +	INIT_LIST_HEAD(&entry->trans_list);
>  
>  	trace_btrfs_ordered_extent_add(inode, entry);
>  
> @@ -472,7 +473,8 @@ void btrfs_submit_logged_extents(struct list_head *logged_list,
>  	spin_unlock_irq(&log->log_extents_lock[index]);
>  }
>  
> -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
> +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
> +			       struct btrfs_root *log, u64 transid)
>  {
>  	struct btrfs_ordered_extent *ordered;
>  	int index = transid % 2;
> @@ -497,7 +499,7 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
>  		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
>  						   &ordered->flags));
>  
> -		btrfs_put_ordered_extent(ordered);
> +		list_add_tail(&ordered->trans_list, &trans->ordered);
>  		spin_lock_irq(&log->log_extents_lock[index]);
>  	}
>  	spin_unlock_irq(&log->log_extents_lock[index]);
> diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
> index d81a274..171a841 100644
> --- a/fs/btrfs/ordered-data.h
> +++ b/fs/btrfs/ordered-data.h
> @@ -121,6 +121,9 @@ struct btrfs_ordered_extent {
>  	/* If we need to wait on this to be done */
>  	struct list_head log_list;
>  
> +	/* If the transaction needs to wait on this ordered extent */
> +	struct list_head trans_list;
> +
>  	/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
>  	wait_queue_head_t wait;
>  
> @@ -197,7 +200,8 @@ void btrfs_get_logged_extents(struct inode *inode,
>  void btrfs_put_logged_extents(struct list_head *logged_list);
>  void btrfs_submit_logged_extents(struct list_head *logged_list,
>  				 struct btrfs_root *log);
> -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
> +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
> +			       struct btrfs_root *log, u64 transid);
>  void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
>  int __init ordered_data_init(void);
>  void ordered_data_exit(void);
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index dcaae36..63c6d05 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -220,6 +220,7 @@ loop:
>  	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
>  	INIT_LIST_HEAD(&cur_trans->pending_chunks);
>  	INIT_LIST_HEAD(&cur_trans->switch_commits);
> +	INIT_LIST_HEAD(&cur_trans->pending_ordered);
>  	list_add_tail(&cur_trans->list, &fs_info->trans_list);
>  	extent_io_tree_init(&cur_trans->dirty_pages,
>  			     fs_info->btree_inode->i_mapping);
> @@ -488,6 +489,7 @@ again:
>  	h->sync = false;
>  	INIT_LIST_HEAD(&h->qgroup_ref_list);
>  	INIT_LIST_HEAD(&h->new_bgs);
> +	INIT_LIST_HEAD(&h->ordered);
>  
>  	smp_mb();
>  	if (cur_trans->state >= TRANS_STATE_BLOCKED &&
> @@ -719,6 +721,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
>  	if (!list_empty(&trans->new_bgs))
>  		btrfs_create_pending_block_groups(trans, root);
>  
> +	if (!list_empty(&trans->ordered)) {
> +		spin_lock(&info->trans_lock);
> +		list_splice(&trans->ordered, &cur_trans->pending_ordered);
> +		spin_unlock(&info->trans_lock);
> +	}
> +
>  	trans->delayed_ref_updates = 0;
>  	if (!trans->sync) {
>  		must_run_delayed_refs =
> @@ -1652,6 +1660,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
>  		btrfs_wait_ordered_roots(fs_info, -1);
>  }
>  
> +static inline void
> +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
> +			   struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_ordered_extent *ordered;
> +
> +	spin_lock(&fs_info->trans_lock);
> +	while (!list_empty(&cur_trans->pending_ordered)) {
> +		ordered = list_first_entry(&cur_trans->pending_ordered,
> +					   struct btrfs_ordered_extent,
> +					   trans_list);
> +		list_del_init(&ordered->trans_list);
> +		spin_unlock(&fs_info->trans_lock);
> +
> +		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
> +						   &ordered->flags));
> +		btrfs_put_ordered_extent(ordered);
> +		spin_lock(&fs_info->trans_lock);
> +	}
> +	spin_unlock(&fs_info->trans_lock);
> +}
> +
>  int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>  			     struct btrfs_root *root)
>  {
> @@ -1702,6 +1732,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>  	}
>  
>  	spin_lock(&root->fs_info->trans_lock);
> +	list_splice(&trans->ordered, &cur_trans->pending_ordered);
>  	if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
>  		spin_unlock(&root->fs_info->trans_lock);
>  		atomic_inc(&cur_trans->use_count);
> @@ -1754,6 +1785,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>  
>  	btrfs_wait_delalloc_flush(root->fs_info);
>  
> +	btrfs_wait_pending_ordered(cur_trans, root->fs_info);
> +
>  	btrfs_scrub_pause(root);
>  	/*
>  	 * Ok now we need to make sure to block out any other joins while we
> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
> index d8f40e1..1ba9c3e 100644
> --- a/fs/btrfs/transaction.h
> +++ b/fs/btrfs/transaction.h
> @@ -56,6 +56,7 @@ struct btrfs_transaction {
>  	wait_queue_head_t commit_wait;
>  	struct list_head pending_snapshots;
>  	struct list_head pending_chunks;
> +	struct list_head pending_ordered;
>  	struct list_head switch_commits;
>  	struct btrfs_delayed_ref_root delayed_refs;
>  	int aborted;
> @@ -105,6 +106,7 @@ struct btrfs_trans_handle {
>  	 */
>  	struct btrfs_root *root;
>  	struct seq_list delayed_ref_elem;
> +	struct list_head ordered;
>  	struct list_head qgroup_ref_list;
>  	struct list_head new_bgs;
>  };
> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
> index 70f99b1..337e4bc 100644
> --- a/fs/btrfs/tree-log.c
> +++ b/fs/btrfs/tree-log.c
> @@ -2600,7 +2600,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
>  	if (atomic_read(&log_root_tree->log_commit[index2])) {
>  		blk_finish_plug(&plug);
>  		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
> -		btrfs_wait_logged_extents(log, log_transid);
> +		btrfs_wait_logged_extents(trans, log, log_transid);
>  		wait_log_commit(trans, log_root_tree,
>  				root_log_ctx.log_transid);
>  		mutex_unlock(&log_root_tree->log_mutex);
> @@ -2645,7 +2645,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
>  	btrfs_wait_marked_extents(log_root_tree,
>  				  &log_root_tree->dirty_log_pages,
>  				  EXTENT_NEW | EXTENT_DIRTY);
> -	btrfs_wait_logged_extents(log, log_transid);
> +	btrfs_wait_logged_extents(trans, log, log_transid);
>  
>  	btrfs_set_super_log_root(root->fs_info->super_for_commit,
>  				log_root_tree->node->start);
> @@ -3766,7 +3766,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
>  	fi = btrfs_item_ptr(leaf, path->slots[0],
>  			    struct btrfs_file_extent_item);
>  
> -	btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
> +	btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
>  					       &token);
>  	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
>  		btrfs_set_token_file_extent_type(leaf, fi,
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]