On Aug 8, 2019, at 9:45 PM, Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> wrote: > > This patch adds core fast-commit commit path changes. This patch also > modifies existing JBD2 APIs to allow usage of fast commits. If fast > commits are enabled and journal->j_do_full_commit is not set, the > commit routine tries the file system specific fast commmit first. Only > if it fails, it falls back to the full commit. Commit start and wait > APIs now take an additional argument which indicates if fast commits > are allowed or not. > > In this patch we also add a new entry to journal->stats which counts > the number of fast commits performed. > > Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> It would be better to rename the existing function something like jbd2_log_start_commit_full() and add wrappers jbd2_log_start_commit() and jbd2_log_start_commit_fast() to avoid to change all of the callsites to add the same parameter: int jbd2_log_start_commit_fast(journal_t *journal, tid_t tid) { return jbd2_log_start_commit_full(journal, tid, false); } EXPORT_SYMBOL(jbd2_log_start_commit_fast); int jbd2_log_start_commit(journal_t *journal, tid_t tid) { return jbd2_log_start_commit_full(journal, tid, true); } EXPORT_SYMBOL(jbd2_log_start_commit); That makes it much more clear for the few callsites that need the "fast" variant what is being done, unlike a "true" or "false" argument to a function that isn't very clear what meaning it has. Cheers, Andreas > --- > > Changelog: > > V2: JBD2 commit routine passes stats to the fast commit callbac. Also, > added a new entry to journal->stats and its tracking. > --- > fs/ext4/super.c | 2 +- > fs/jbd2/checkpoint.c | 2 +- > fs/jbd2/commit.c | 47 +++++++++++++++++++++++-- > fs/jbd2/journal.c | 81 +++++++++++++++++++++++++++++++++++-------- > fs/jbd2/transaction.c | 6 ++-- > fs/ocfs2/alloc.c | 2 +- > fs/ocfs2/super.c | 2 +- > include/linux/jbd2.h | 9 +++-- > 8 files changed, 124 insertions(+), 27 deletions(-) > > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index 81c3ec165822..6bab59ae81f7 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -5148,7 +5148,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) > !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) > needs_barrier = true; > > - if (jbd2_journal_start_commit(sbi->s_journal, &target)) { > + if (jbd2_journal_start_commit(sbi->s_journal, &target, true)) { > if (wait) > ret = jbd2_log_wait_commit(sbi->s_journal, > target); > diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c > index a1909066bde6..6297978ae3bc 100644 > --- a/fs/jbd2/checkpoint.c > +++ b/fs/jbd2/checkpoint.c > @@ -277,7 +277,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) > > if (batch_count) > __flush_batch(journal, &batch_count); > - jbd2_log_start_commit(journal, tid); > + jbd2_log_start_commit(journal, tid, true); > /* > * jbd2_journal_commit_transaction() may want > * to take the checkpoint_mutex if JBD2_FLUSHED > diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c > index 132fb92098c7..9281814606e7 100644 > --- a/fs/jbd2/commit.c > +++ b/fs/jbd2/commit.c > @@ -351,8 +351,12 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, > * > * The primary function for committing a transaction to the log. This > * function is called by the journal thread to begin a complete commit. > + * > + * fc is input / output parameter. If fc is non-null and is set to true, this > + * function tries to perform fast commit. If the fast commit is successfully > + * performed, *fc is set to true. > */ > -void jbd2_journal_commit_transaction(journal_t *journal) > +void jbd2_journal_commit_transaction(journal_t *journal, bool *fc) > { > struct transaction_stats_s stats; > transaction_t *commit_transaction; > @@ -380,6 +384,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) > tid_t first_tid; > int update_tail; > int csum_size = 0; > + bool full_commit; > LIST_HEAD(io_bufs); > LIST_HEAD(log_bufs); > > @@ -413,6 +418,40 @@ void jbd2_journal_commit_transaction(journal_t *journal) > J_ASSERT(journal->j_running_transaction != NULL); > J_ASSERT(journal->j_committing_transaction == NULL); > > + read_lock(&journal->j_state_lock); > + full_commit = journal->j_do_full_commit; > + read_unlock(&journal->j_state_lock); > + > + /* Let file-system try its own fast commit */ > + if (jbd2_has_feature_fast_commit(journal)) { > + if (!full_commit && fc && *fc == true && > + journal->j_fc_commit_callback && > + !journal->j_fc_commit_callback( > + journal, journal->j_running_transaction->t_tid, > + journal->j_subtid, &stats.run)) { > + jbd_debug(3, "fast commit success.\n"); > + if (journal->j_fc_cleanup_callback) > + journal->j_fc_cleanup_callback(journal); > + write_lock(&journal->j_state_lock); > + journal->j_subtid++; > + if (fc) > + *fc = true; > + write_unlock(&journal->j_state_lock); > + goto update_overall_stats; > + } > + if (journal->j_fc_cleanup_callback) > + journal->j_fc_cleanup_callback(journal); > + write_lock(&journal->j_state_lock); > + journal->j_fc_off = 0; > + journal->j_subtid = 0; > + journal->j_do_full_commit = false; > + write_unlock(&journal->j_state_lock); > + } > + > + jbd_debug(3, "fast commit not performed, trying full.\n"); > + if (fc) > + *fc = false; > + > commit_transaction = journal->j_running_transaction; > > trace_jbd2_start_commit(journal, commit_transaction); > @@ -1129,8 +1168,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) > /* > * Calculate overall stats > */ > +update_overall_stats: > spin_lock(&journal->j_history_lock); > - journal->j_stats.ts_tid++; > + if (fc && *fc == true) > + journal->j_stats.ts_num_fast_commits++; > + else > + journal->j_stats.ts_tid++; > journal->j_stats.ts_requested += stats.ts_requested; > journal->j_stats.run.rs_wait += stats.run.rs_wait; > journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; > diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c > index 59ad709154a3..ab05e47ed2d4 100644 > --- a/fs/jbd2/journal.c > +++ b/fs/jbd2/journal.c > @@ -160,7 +160,13 @@ static void commit_timeout(struct timer_list *t) > * > * 1) COMMIT: Every so often we need to commit the current state of the > * filesystem to disk. The journal thread is responsible for writing > - * all of the metadata buffers to disk. > + * all of the metadata buffers to disk. If fast commits are allowed, > + * journal thread passes the control to the file system and file system > + * is then responsible for writing metadata buffers to disk (in whichever > + * format it wants). If fast commit succeds, journal thread won't perform > + * a normal commit. In case the fast commit fails, journal thread performs > + * full commit as normal. > + * > * > * 2) CHECKPOINT: We cannot reuse a used section of the log file until all > * of the data in that part of the log has been rewritten elsewhere on > @@ -172,6 +178,7 @@ static int kjournald2(void *arg) > { > journal_t *journal = arg; > transaction_t *transaction; > + bool fc_flag = true, fc_flag_save; > > /* > * Set up an interval timer which can be used to trigger a commit wakeup > @@ -209,9 +216,14 @@ static int kjournald2(void *arg) > jbd_debug(1, "OK, requests differ\n"); > write_unlock(&journal->j_state_lock); > del_timer_sync(&journal->j_commit_timer); > - jbd2_journal_commit_transaction(journal); > + fc_flag_save = fc_flag; > + jbd2_journal_commit_transaction(journal, &fc_flag); > write_lock(&journal->j_state_lock); > - goto loop; > + if (!fc_flag) { > + /* fast commit not performed */ > + fc_flag = fc_flag_save; > + goto loop; > + } > } > > wake_up(&journal->j_wait_done_commit); > @@ -235,16 +247,18 @@ static int kjournald2(void *arg) > > prepare_to_wait(&journal->j_wait_commit, &wait, > TASK_INTERRUPTIBLE); > - if (journal->j_commit_sequence != journal->j_commit_request) > + if (!fc_flag && > + journal->j_commit_sequence != journal->j_commit_request) > should_sleep = 0; > transaction = journal->j_running_transaction; > if (transaction && time_after_eq(jiffies, > - transaction->t_expires)) > + transaction->t_expires)) > should_sleep = 0; > if (journal->j_flags & JBD2_UNMOUNT) > should_sleep = 0; > if (should_sleep) { > write_unlock(&journal->j_state_lock); > + jbd_debug(1, "%s sleeps\n", __func__); > schedule(); > write_lock(&journal->j_state_lock); > } > @@ -259,7 +273,10 @@ static int kjournald2(void *arg) > transaction = journal->j_running_transaction; > if (transaction && time_after_eq(jiffies, transaction->t_expires)) { > journal->j_commit_request = transaction->t_tid; > + fc_flag = false; > jbd_debug(1, "woke because of timeout\n"); > + } else { > + fc_flag = true; > } > goto loop; > > @@ -517,11 +534,17 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) > return 0; > } > > -int jbd2_log_start_commit(journal_t *journal, tid_t tid) > +int jbd2_log_start_commit(journal_t *journal, tid_t tid, bool full_commit) > { > int ret; > > write_lock(&journal->j_state_lock); > + /* > + * If someone has already requested a full commit, > + * we have to honor it. > + */ > + if (!journal->j_do_full_commit) > + journal->j_do_full_commit = full_commit; > ret = __jbd2_log_start_commit(journal, tid); > write_unlock(&journal->j_state_lock); > return ret; > @@ -556,7 +579,7 @@ static int __jbd2_journal_force_commit(journal_t *journal) > tid = transaction->t_tid; > read_unlock(&journal->j_state_lock); > if (need_to_start) > - jbd2_log_start_commit(journal, tid); > + jbd2_log_start_commit(journal, tid, true); > ret = jbd2_log_wait_commit(journal, tid); > if (!ret) > ret = 1; > @@ -603,11 +626,14 @@ int jbd2_journal_force_commit(journal_t *journal) > * if a transaction is going to be committed (or is currently already > * committing), and fills its tid in at *ptid > */ > -int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) > +int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid, bool full_commit) > { > int ret = 0; > > write_lock(&journal->j_state_lock); > + if (!journal->j_do_full_commit) > + journal->j_do_full_commit = full_commit; > + > if (journal->j_running_transaction) { > tid_t tid = journal->j_running_transaction->t_tid; > > @@ -675,7 +701,7 @@ EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier); > * Wait for a specified commit to complete. > * The caller may not hold the journal lock. > */ > -int jbd2_log_wait_commit(journal_t *journal, tid_t tid) > +int __jbd2_log_wait_commit(journal_t *journal, tid_t tid, tid_t subtid) > { > int err = 0; > > @@ -702,12 +728,25 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) > } > #endif > while (tid_gt(tid, journal->j_commit_sequence)) { > - jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n", > - tid, journal->j_commit_sequence); > + if ((!journal->j_do_full_commit) && > + !tid_geq(subtid, journal->j_subtid)) > + break; > + jbd_debug(1, "JBD2: want full commit %u %s %u, ", > + tid, journal->j_do_full_commit ? > + "and ignoring fast commit request for " : > + "or want fast commit", > + journal->j_subtid); > + jbd_debug(1, "j_commit_sequence=%u, j_subtid=%u\n", > + journal->j_commit_sequence, journal->j_subtid); > read_unlock(&journal->j_state_lock); > wake_up(&journal->j_wait_commit); > - wait_event(journal->j_wait_done_commit, > - !tid_gt(tid, journal->j_commit_sequence)); > + if (journal->j_do_full_commit) > + wait_event(journal->j_wait_done_commit, > + !tid_gt(tid, journal->j_commit_sequence)); > + else > + wait_event(journal->j_wait_done_commit, > + !tid_gt(tid, journal->j_commit_sequence) || > + !tid_geq(subtid, journal->j_subtid)); > read_lock(&journal->j_state_lock); > } > read_unlock(&journal->j_state_lock); > @@ -717,6 +756,13 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) > return err; > } > > +int jbd2_log_wait_commit(journal_t *journal, tid_t tid) > +{ > + journal->j_do_full_commit = true; > + return __jbd2_log_wait_commit(journal, tid, 0); > +} > + > + > /* Return 1 when transaction with given tid has already committed. */ > int jbd2_transaction_committed(journal_t *journal, tid_t tid) > { > @@ -751,7 +797,7 @@ int jbd2_complete_transaction(journal_t *journal, tid_t tid) > if (journal->j_commit_request != tid) { > /* transaction not yet started, so request it */ > read_unlock(&journal->j_state_lock); > - jbd2_log_start_commit(journal, tid); > + jbd2_log_start_commit(journal, tid, true); > goto wait_commit; > } > } else if (!(journal->j_committing_transaction && > @@ -996,6 +1042,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) > "each up to %u blocks\n", > s->stats->ts_tid, s->stats->ts_requested, > s->journal->j_max_transaction_buffers); > + seq_printf(seq, "%lu fast commits performed\n", > + s->stats->ts_num_fast_commits); > if (s->stats->ts_tid == 0) > return 0; > seq_printf(seq, "average: \n %ums waiting for transaction\n", > @@ -1020,6 +1068,9 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) > s->stats->run.rs_blocks / s->stats->ts_tid); > seq_printf(seq, " %lu logged blocks per transaction\n", > s->stats->run.rs_blocks_logged / s->stats->ts_tid); > + seq_printf(seq, " %lu logged blocks per commit\n", > + s->stats->run.rs_blocks_logged / > + (s->stats->ts_tid + s->stats->ts_num_fast_commits)); > return 0; > } > > @@ -1741,7 +1792,7 @@ int jbd2_journal_destroy(journal_t *journal) > > /* Force a final log commit */ > if (journal->j_running_transaction) > - jbd2_journal_commit_transaction(journal); > + jbd2_journal_commit_transaction(journal, NULL); > > /* Force any old transactions to disk */ > > diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c > index 990e7b5062e7..87f6627d78aa 100644 > --- a/fs/jbd2/transaction.c > +++ b/fs/jbd2/transaction.c > @@ -154,7 +154,7 @@ static void wait_transaction_locked(journal_t *journal) > need_to_start = !tid_geq(journal->j_commit_request, tid); > read_unlock(&journal->j_state_lock); > if (need_to_start) > - jbd2_log_start_commit(journal, tid); > + jbd2_log_start_commit(journal, tid, true); > jbd2_might_wait_for_commit(journal); > schedule(); > finish_wait(&journal->j_wait_transaction_locked, &wait); > @@ -708,7 +708,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) > need_to_start = !tid_geq(journal->j_commit_request, tid); > read_unlock(&journal->j_state_lock); > if (need_to_start) > - jbd2_log_start_commit(journal, tid); > + jbd2_log_start_commit(journal, tid, true); > > rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_); > handle->h_buffer_credits = nblocks; > @@ -1822,7 +1822,7 @@ int jbd2_journal_stop(handle_t *handle) > jbd_debug(2, "transaction too old, requesting commit for " > "handle %p\n", handle); > /* This is non-blocking */ > - jbd2_log_start_commit(journal, transaction->t_tid); > + jbd2_log_start_commit(journal, transaction->t_tid, true); > > /* > * Special case: JBD2_SYNC synchronous updates require us > diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c > index 0c335b51043d..df41c43573b7 100644 > --- a/fs/ocfs2/alloc.c > +++ b/fs/ocfs2/alloc.c > @@ -6117,7 +6117,7 @@ int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb, > goto out; > } > > - if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) { > + if (jbd2_journal_start_commit(osb->journal->j_journal, &target, true)) { > jbd2_log_wait_commit(osb->journal->j_journal, target); > ret = 1; > } > diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c > index 8b2f39506648..60ecc51759ae 100644 > --- a/fs/ocfs2/super.c > +++ b/fs/ocfs2/super.c > @@ -410,7 +410,7 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) > } > > if (jbd2_journal_start_commit(osb->journal->j_journal, > - &target)) { > + &target, true)) { > if (wait) > jbd2_log_wait_commit(osb->journal->j_journal, > target); > diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h > index 153840b422cc..535f88dff653 100644 > --- a/include/linux/jbd2.h > +++ b/include/linux/jbd2.h > @@ -742,6 +742,7 @@ struct transaction_run_stats_s { > > struct transaction_stats_s { > unsigned long ts_tid; > + unsigned long ts_num_fast_commits; > unsigned long ts_requested; > struct transaction_run_stats_s run; > }; > @@ -1364,7 +1365,8 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); > void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); > > /* Commit management */ > -extern void jbd2_journal_commit_transaction(journal_t *); > +extern void jbd2_journal_commit_transaction(journal_t *journal, > + bool *full_commit); > > /* Checkpoint list management */ > void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); > @@ -1571,9 +1573,10 @@ extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); > * transitions on demand. > */ > > -int jbd2_log_start_commit(journal_t *journal, tid_t tid); > +int jbd2_log_start_commit(journal_t *journal, tid_t tid, bool full_commit); > int __jbd2_log_start_commit(journal_t *journal, tid_t tid); > -int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); > +int jbd2_journal_start_commit(journal_t *journal, tid_t *tid, > + bool full_commit); > int jbd2_log_wait_commit(journal_t *journal, tid_t tid); > int jbd2_transaction_committed(journal_t *journal, tid_t tid); > int jbd2_complete_transaction(journal_t *journal, tid_t tid); > -- > 2.23.0.rc1.153.gdeed80330f-goog > Cheers, Andreas
Attachment:
signature.asc
Description: Message signed with OpenPGP