On Mon, Apr 08, 2013 at 11:32:10PM +0200, Jan Kara wrote: > When writing metadata to the journal, we create temporary buffer heads > for that task. We also attach journal heads to these buffer heads but > the only purpose of the journal heads is to keep buffers linked in > transaction's BJ_IO list. We remove the need for journal heads by > reusing buffer_head's b_assoc_buffers list for that purpose. Also since > BJ_IO list is just a temporary list for transaction commit, we use a > private list in jbd2_journal_commit_transaction() for that thus removing > BJ_IO list from transaction completely. > > Signed-off-by: Jan Kara <jack@xxxxxxx> Reviewed-by: Zheng Liu <wenqing.lz@xxxxxxxxxx> Regards, - Zheng > --- > fs/jbd2/checkpoint.c | 1 - > fs/jbd2/commit.c | 65 ++++++++++++++++-------------------------------- > fs/jbd2/journal.c | 36 ++++++++++---------------- > fs/jbd2/transaction.c | 14 +++------- > include/linux/jbd2.h | 32 ++++++++++++------------ > 5 files changed, 56 insertions(+), 92 deletions(-) > > diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c > index c78841e..2735fef 100644 > --- a/fs/jbd2/checkpoint.c > +++ b/fs/jbd2/checkpoint.c > @@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact > J_ASSERT(transaction->t_state == T_FINISHED); > J_ASSERT(transaction->t_buffers == NULL); > J_ASSERT(transaction->t_forget == NULL); > - J_ASSERT(transaction->t_iobuf_list == NULL); > J_ASSERT(transaction->t_shadow_list == NULL); > J_ASSERT(transaction->t_log_list == NULL); > J_ASSERT(transaction->t_checkpoint_list == NULL); > diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c > index 750c701..c503df6 100644 > --- a/fs/jbd2/commit.c > +++ b/fs/jbd2/commit.c > @@ -368,7 +368,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) > { > struct transaction_stats_s stats; > transaction_t *commit_transaction; > - struct journal_head *jh, *new_jh, *descriptor; > + struct journal_head *jh, *descriptor; > struct buffer_head **wbuf = journal->j_wbuf; > int bufs; > int flags; > @@ -392,6 +392,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) > tid_t first_tid; > int update_tail; > int csum_size = 0; > + LIST_HEAD(io_bufs); > > if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) > csum_size = sizeof(struct jbd2_journal_block_tail); > @@ -658,29 +659,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) > > /* Bump b_count to prevent truncate from stumbling over > the shadowed buffer! @@@ This can go if we ever get > - rid of the BJ_IO/BJ_Shadow pairing of buffers. */ > + rid of the shadow pairing of buffers. */ > atomic_inc(&jh2bh(jh)->b_count); > > - /* Make a temporary IO buffer with which to write it out > - (this will requeue both the metadata buffer and the > - temporary IO buffer). new_bh goes on BJ_IO*/ > - > - set_bit(BH_JWrite, &jh2bh(jh)->b_state); > /* > - * akpm: jbd2_journal_write_metadata_buffer() sets > - * new_bh->b_transaction to commit_transaction. > - * We need to clean this up before we release new_bh > - * (which is of type BJ_IO) > + * Make a temporary IO buffer with which to write it out > + * (this will requeue the metadata buffer to BJ_Shadow). > */ > + set_bit(BH_JWrite, &jh2bh(jh)->b_state); > JBUFFER_TRACE(jh, "ph3: write metadata"); > flags = jbd2_journal_write_metadata_buffer(commit_transaction, > - jh, &new_jh, blocknr); > + jh, &wbuf[bufs], blocknr); > if (flags < 0) { > jbd2_journal_abort(journal, flags); > continue; > } > - set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); > - wbuf[bufs++] = jh2bh(new_jh); > + jbd2_file_log_bh(&io_bufs, wbuf[bufs]); > > /* Record the new block's tag in the current descriptor > buffer */ > @@ -694,10 +688,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) > tag = (journal_block_tag_t *) tagp; > write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); > tag->t_flags = cpu_to_be16(tag_flag); > - jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), > + jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], > commit_transaction->t_tid); > tagp += tag_bytes; > space_left -= tag_bytes; > + bufs++; > > if (first_tag) { > memcpy (tagp, journal->j_uuid, 16); > @@ -809,7 +804,7 @@ start_journal_io: > the log. Before we can commit it, wait for the IO so far to > complete. Control buffers being written are on the > transaction's t_log_list queue, and metadata buffers are on > - the t_iobuf_list queue. > + the io_bufs list. > > Wait for the buffers in reverse order. That way we are > less likely to be woken up until all IOs have completed, and > @@ -818,46 +813,31 @@ start_journal_io: > > jbd_debug(3, "JBD2: commit phase 3\n"); > > - /* > - * akpm: these are BJ_IO, and j_list_lock is not needed. > - * See __journal_try_to_free_buffer. > - */ > -wait_for_iobuf: > - while (commit_transaction->t_iobuf_list != NULL) { > - struct buffer_head *bh; > + while (!list_empty(&io_bufs)) { > + struct buffer_head *bh = list_entry(io_bufs.prev, > + struct buffer_head, > + b_assoc_buffers); > > - jh = commit_transaction->t_iobuf_list->b_tprev; > - bh = jh2bh(jh); > - if (buffer_locked(bh)) { > - wait_on_buffer(bh); > - goto wait_for_iobuf; > - } > - if (cond_resched()) > - goto wait_for_iobuf; > + wait_on_buffer(bh); > + cond_resched(); > > if (unlikely(!buffer_uptodate(bh))) > err = -EIO; > - > - clear_buffer_jwrite(bh); > - > - JBUFFER_TRACE(jh, "ph4: unfile after journal write"); > - jbd2_journal_unfile_buffer(journal, jh); > + jbd2_unfile_log_bh(bh); > > /* > - * ->t_iobuf_list should contain only dummy buffer_heads > - * which were created by jbd2_journal_write_metadata_buffer(). > + * The list contains temporary buffer heads created by > + * jbd2_journal_write_metadata_buffer(). > */ > BUFFER_TRACE(bh, "dumping temporary bh"); > - jbd2_journal_put_journal_head(jh); > __brelse(bh); > J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); > free_buffer_head(bh); > > - /* We also have to unlock and free the corresponding > - shadowed buffer */ > + /* We also have to refile the corresponding shadowed buffer */ > jh = commit_transaction->t_shadow_list->b_tprev; > bh = jh2bh(jh); > - clear_bit(BH_JWrite, &bh->b_state); > + clear_buffer_jwrite(bh); > J_ASSERT_BH(bh, buffer_jbddirty(bh)); > > /* The metadata is now released for reuse, but we need > @@ -952,7 +932,6 @@ wait_for_iobuf: > J_ASSERT(list_empty(&commit_transaction->t_inode_list)); > J_ASSERT(commit_transaction->t_buffers == NULL); > J_ASSERT(commit_transaction->t_checkpoint_list == NULL); > - J_ASSERT(commit_transaction->t_iobuf_list == NULL); > J_ASSERT(commit_transaction->t_shadow_list == NULL); > J_ASSERT(commit_transaction->t_log_list == NULL); > > diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c > index ed10991..eb6272b 100644 > --- a/fs/jbd2/journal.c > +++ b/fs/jbd2/journal.c > @@ -310,14 +310,12 @@ static void journal_kill_thread(journal_t *journal) > * > * If the source buffer has already been modified by a new transaction > * since we took the last commit snapshot, we use the frozen copy of > - * that data for IO. If we end up using the existing buffer_head's data > - * for the write, then we *have* to lock the buffer to prevent anyone > - * else from using and possibly modifying it while the IO is in > - * progress. > + * that data for IO. If we end up using the existing buffer_head's data > + * for the write, then we have to make sure nobody modifies it while the > + * IO is in progress. do_get_write_access() handles this. > * > - * The function returns a pointer to the buffer_heads to be used for IO. > - * > - * We assume that the journal has already been locked in this function. > + * The function returns a pointer to the buffer_head to be used for IO. > + * > * > * Return value: > * <0: Error > @@ -330,15 +328,14 @@ static void journal_kill_thread(journal_t *journal) > > int jbd2_journal_write_metadata_buffer(transaction_t *transaction, > struct journal_head *jh_in, > - struct journal_head **jh_out, > - unsigned long long blocknr) > + struct buffer_head **bh_out, > + sector_t blocknr) > { > int need_copy_out = 0; > int done_copy_out = 0; > int do_escape = 0; > char *mapped_data; > struct buffer_head *new_bh; > - struct journal_head *new_jh; > struct page *new_page; > unsigned int new_offset; > struct buffer_head *bh_in = jh2bh(jh_in); > @@ -370,14 +367,13 @@ retry_alloc: > new_bh->b_state = 0; > init_buffer(new_bh, NULL, NULL); > atomic_set(&new_bh->b_count, 1); > - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ > > + jbd_lock_bh_state(bh_in); > +repeat: > /* > * If a new transaction has already done a buffer copy-out, then > * we use that version of the data for the commit. > */ > - jbd_lock_bh_state(bh_in); > -repeat: > if (jh_in->b_frozen_data) { > done_copy_out = 1; > new_page = virt_to_page(jh_in->b_frozen_data); > @@ -417,7 +413,7 @@ repeat: > jbd_unlock_bh_state(bh_in); > tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); > if (!tmp) { > - jbd2_journal_put_journal_head(new_jh); > + brelse(new_bh); > return -ENOMEM; > } > jbd_lock_bh_state(bh_in); > @@ -428,7 +424,7 @@ repeat: > > jh_in->b_frozen_data = tmp; > mapped_data = kmap_atomic(new_page); > - memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); > + memcpy(tmp, mapped_data + new_offset, bh_in->b_size); > kunmap_atomic(mapped_data); > > new_page = virt_to_page(tmp); > @@ -454,14 +450,13 @@ repeat: > } > > set_bh_page(new_bh, new_page, new_offset); > - new_jh->b_transaction = NULL; > - new_bh->b_size = jh2bh(jh_in)->b_size; > - new_bh->b_bdev = transaction->t_journal->j_dev; > + new_bh->b_size = bh_in->b_size; > + new_bh->b_bdev = journal->j_dev; > new_bh->b_blocknr = blocknr; > set_buffer_mapped(new_bh); > set_buffer_dirty(new_bh); > > - *jh_out = new_jh; > + *bh_out = new_bh; > > /* > * The to-be-written buffer needs to get moved to the io queue, > @@ -474,9 +469,6 @@ repeat: > spin_unlock(&journal->j_list_lock); > jbd_unlock_bh_state(bh_in); > > - JBUFFER_TRACE(new_jh, "file as BJ_IO"); > - jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); > - > return do_escape | (done_copy_out << 1); > } > > diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c > index d6ee5ae..3be34c7 100644 > --- a/fs/jbd2/transaction.c > +++ b/fs/jbd2/transaction.c > @@ -1589,10 +1589,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) > * Remove a buffer from the appropriate transaction list. > * > * Note that this function can *change* the value of > - * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, > - * t_log_list or t_reserved_list. If the caller is holding onto a copy of one > - * of these pointers, it could go bad. Generally the caller needs to re-read > - * the pointer from the transaction_t. > + * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or > + * t_reserved_list. If the caller is holding onto a copy of one of these > + * pointers, it could go bad. Generally the caller needs to re-read the > + * pointer from the transaction_t. > * > * Called under j_list_lock. > */ > @@ -1622,9 +1622,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) > case BJ_Forget: > list = &transaction->t_forget; > break; > - case BJ_IO: > - list = &transaction->t_iobuf_list; > - break; > case BJ_Shadow: > list = &transaction->t_shadow_list; > break; > @@ -2126,9 +2123,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, > case BJ_Forget: > list = &transaction->t_forget; > break; > - case BJ_IO: > - list = &transaction->t_iobuf_list; > - break; > case BJ_Shadow: > list = &transaction->t_shadow_list; > break; > diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h > index 50e5a5e..a670595 100644 > --- a/include/linux/jbd2.h > +++ b/include/linux/jbd2.h > @@ -523,12 +523,6 @@ struct transaction_s > struct journal_head *t_checkpoint_io_list; > > /* > - * Doubly-linked circular list of temporary buffers currently undergoing > - * IO in the log [j_list_lock] > - */ > - struct journal_head *t_iobuf_list; > - > - /* > * Doubly-linked circular list of metadata buffers being shadowed by log > * IO. The IO buffers on the iobuf list and the shadow buffers on this > * list match each other one for one at all times. [j_list_lock] > @@ -990,6 +984,14 @@ extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, i > extern void __journal_free_buffer(struct journal_head *bh); > extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); > extern void __journal_clean_data_list(transaction_t *transaction); > +static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) > +{ > + list_add_tail(&bh->b_assoc_buffers, head); > +} > +static inline void jbd2_unfile_log_bh(struct buffer_head *bh) > +{ > + list_del_init(&bh->b_assoc_buffers); > +} > > /* Log buffer allocation */ > extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); > @@ -1038,11 +1040,10 @@ extern void jbd2_buffer_abort_trigger(struct journal_head *jh, > struct jbd2_buffer_trigger_type *triggers); > > /* Buffer IO */ > -extern int > -jbd2_journal_write_metadata_buffer(transaction_t *transaction, > - struct journal_head *jh_in, > - struct journal_head **jh_out, > - unsigned long long blocknr); > +extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, > + struct journal_head *jh_in, > + struct buffer_head **bh_out, > + sector_t blocknr); > > /* Transaction locking */ > extern void __wait_on_journal (journal_t *); > @@ -1284,11 +1285,10 @@ static inline int jbd_space_needed(journal_t *journal) > #define BJ_None 0 /* Not journaled */ > #define BJ_Metadata 1 /* Normal journaled metadata */ > #define BJ_Forget 2 /* Buffer superseded by this transaction */ > -#define BJ_IO 3 /* Buffer is for temporary IO use */ > -#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */ > -#define BJ_LogCtl 5 /* Buffer contains log descriptors */ > -#define BJ_Reserved 6 /* Buffer is reserved for access by journal */ > -#define BJ_Types 7 > +#define BJ_Shadow 3 /* Buffer contents being shadowed to the log */ > +#define BJ_LogCtl 4 /* Buffer contains log descriptors */ > +#define BJ_Reserved 5 /* Buffer is reserved for access by journal */ > +#define BJ_Types 6 > > extern int jbd_blocks_per_page(struct inode *inode); > > -- > 1.7.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html