On Tue, Nov 18, 2014 at 05:19:41PM -0500, Josef Bacik wrote: > Liu Bo pointed out that my previous fix would lose the generation update in the > scenario I described. It is actually much worse than that, we could lose the > entire extent if we lose power right after the transaction commits. Consider > the following > > write extent 0-4k > log extent in log tree > commit transaction > < power fail happens here > ordered extent completes > > We would lose the 0-4k extent because it hasn't updated the actual fs tree, and > the transaction commit will reset the log so it isn't replayed. If we lose > power before the transaction commit we are save, otherwise we are not. > > Fix this by keeping track of all extents we logged in this transaction. Then > when we go to commit the transaction make sure we wait for all of those ordered > extents to complete before proceeding. This will make sure that if we lose > power after the transaction commit we still have our data. This also fixes the > problem of the improperly updated extent generation. Thanks, This looks saner. Reviewed-by: Liu Bo <bo.li.liu@xxxxxxxxxx> thanks, -liubo > > cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Josef Bacik <jbacik@xxxxxx> > --- > fs/btrfs/ordered-data.c | 6 ++++-- > fs/btrfs/ordered-data.h | 6 +++++- > fs/btrfs/transaction.c | 33 +++++++++++++++++++++++++++++++++ > fs/btrfs/transaction.h | 2 ++ > fs/btrfs/tree-log.c | 6 +++--- > 5 files changed, 47 insertions(+), 6 deletions(-) > > diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c > index ac734ec..7c2dd7a 100644 > --- a/fs/btrfs/ordered-data.c > +++ b/fs/btrfs/ordered-data.c > @@ -220,6 +220,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, > INIT_LIST_HEAD(&entry->work_list); > init_completion(&entry->completion); > INIT_LIST_HEAD(&entry->log_list); > + INIT_LIST_HEAD(&entry->trans_list); > > trace_btrfs_ordered_extent_add(inode, entry); > > @@ -472,7 +473,8 @@ void btrfs_submit_logged_extents(struct list_head *logged_list, > spin_unlock_irq(&log->log_extents_lock[index]); > } > > -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) > +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, > + struct btrfs_root *log, u64 transid) > { > struct btrfs_ordered_extent *ordered; > int index = transid % 2; > @@ -497,7 +499,7 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) > wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, > &ordered->flags)); > > - btrfs_put_ordered_extent(ordered); > + list_add_tail(&ordered->trans_list, &trans->ordered); > spin_lock_irq(&log->log_extents_lock[index]); > } > spin_unlock_irq(&log->log_extents_lock[index]); > diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h > index d81a274..171a841 100644 > --- a/fs/btrfs/ordered-data.h > +++ b/fs/btrfs/ordered-data.h > @@ -121,6 +121,9 @@ struct btrfs_ordered_extent { > /* If we need to wait on this to be done */ > struct list_head log_list; > > + /* If the transaction needs to wait on this ordered extent */ > + struct list_head trans_list; > + > /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ > wait_queue_head_t wait; > > @@ -197,7 +200,8 @@ void btrfs_get_logged_extents(struct inode *inode, > void btrfs_put_logged_extents(struct list_head *logged_list); > void btrfs_submit_logged_extents(struct list_head *logged_list, > struct btrfs_root *log); > -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); > +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, > + struct btrfs_root *log, u64 transid); > void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); > int __init ordered_data_init(void); > void ordered_data_exit(void); > diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c > index dcaae36..63c6d05 100644 > --- a/fs/btrfs/transaction.c > +++ b/fs/btrfs/transaction.c > @@ -220,6 +220,7 @@ loop: > INIT_LIST_HEAD(&cur_trans->pending_snapshots); > INIT_LIST_HEAD(&cur_trans->pending_chunks); > INIT_LIST_HEAD(&cur_trans->switch_commits); > + INIT_LIST_HEAD(&cur_trans->pending_ordered); > list_add_tail(&cur_trans->list, &fs_info->trans_list); > extent_io_tree_init(&cur_trans->dirty_pages, > fs_info->btree_inode->i_mapping); > @@ -488,6 +489,7 @@ again: > h->sync = false; > INIT_LIST_HEAD(&h->qgroup_ref_list); > INIT_LIST_HEAD(&h->new_bgs); > + INIT_LIST_HEAD(&h->ordered); > > smp_mb(); > if (cur_trans->state >= TRANS_STATE_BLOCKED && > @@ -719,6 +721,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, > if (!list_empty(&trans->new_bgs)) > btrfs_create_pending_block_groups(trans, root); > > + if (!list_empty(&trans->ordered)) { > + spin_lock(&info->trans_lock); > + list_splice(&trans->ordered, &cur_trans->pending_ordered); > + spin_unlock(&info->trans_lock); > + } > + > trans->delayed_ref_updates = 0; > if (!trans->sync) { > must_run_delayed_refs = > @@ -1652,6 +1660,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) > btrfs_wait_ordered_roots(fs_info, -1); > } > > +static inline void > +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, > + struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_ordered_extent *ordered; > + > + spin_lock(&fs_info->trans_lock); > + while (!list_empty(&cur_trans->pending_ordered)) { > + ordered = list_first_entry(&cur_trans->pending_ordered, > + struct btrfs_ordered_extent, > + trans_list); > + list_del_init(&ordered->trans_list); > + spin_unlock(&fs_info->trans_lock); > + > + wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, > + &ordered->flags)); > + btrfs_put_ordered_extent(ordered); > + spin_lock(&fs_info->trans_lock); > + } > + spin_unlock(&fs_info->trans_lock); > +} > + > int btrfs_commit_transaction(struct btrfs_trans_handle *trans, > struct btrfs_root *root) > { > @@ -1702,6 +1732,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, > } > > spin_lock(&root->fs_info->trans_lock); > + list_splice(&trans->ordered, &cur_trans->pending_ordered); > if (cur_trans->state >= TRANS_STATE_COMMIT_START) { > spin_unlock(&root->fs_info->trans_lock); > atomic_inc(&cur_trans->use_count); > @@ -1754,6 +1785,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, > > btrfs_wait_delalloc_flush(root->fs_info); > > + btrfs_wait_pending_ordered(cur_trans, root->fs_info); > + > btrfs_scrub_pause(root); > /* > * Ok now we need to make sure to block out any other joins while we > diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h > index d8f40e1..1ba9c3e 100644 > --- a/fs/btrfs/transaction.h > +++ b/fs/btrfs/transaction.h > @@ -56,6 +56,7 @@ struct btrfs_transaction { > wait_queue_head_t commit_wait; > struct list_head pending_snapshots; > struct list_head pending_chunks; > + struct list_head pending_ordered; > struct list_head switch_commits; > struct btrfs_delayed_ref_root delayed_refs; > int aborted; > @@ -105,6 +106,7 @@ struct btrfs_trans_handle { > */ > struct btrfs_root *root; > struct seq_list delayed_ref_elem; > + struct list_head ordered; > struct list_head qgroup_ref_list; > struct list_head new_bgs; > }; > diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > index 70f99b1..337e4bc 100644 > --- a/fs/btrfs/tree-log.c > +++ b/fs/btrfs/tree-log.c > @@ -2600,7 +2600,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, > if (atomic_read(&log_root_tree->log_commit[index2])) { > blk_finish_plug(&plug); > btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); > - btrfs_wait_logged_extents(log, log_transid); > + btrfs_wait_logged_extents(trans, log, log_transid); > wait_log_commit(trans, log_root_tree, > root_log_ctx.log_transid); > mutex_unlock(&log_root_tree->log_mutex); > @@ -2645,7 +2645,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, > btrfs_wait_marked_extents(log_root_tree, > &log_root_tree->dirty_log_pages, > EXTENT_NEW | EXTENT_DIRTY); > - btrfs_wait_logged_extents(log, log_transid); > + btrfs_wait_logged_extents(trans, log, log_transid); > > btrfs_set_super_log_root(root->fs_info->super_for_commit, > log_root_tree->node->start); > @@ -3766,7 +3766,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, > fi = btrfs_item_ptr(leaf, path->slots[0], > struct btrfs_file_extent_item); > > - btrfs_set_token_file_extent_generation(leaf, fi, em->generation, > + btrfs_set_token_file_extent_generation(leaf, fi, trans->transid, > &token); > if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) > btrfs_set_token_file_extent_type(leaf, fi, > -- > 1.8.3.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html