On Wed, Mar 26, 2008 at 3:31 AM, Manish Katiyar <mkatiyar@xxxxxxxxx> wrote: > Other than the source code, are there any links/resources for the new > jbd2 design ? > > -- > Thanks & Regards, > ******************************************** > Manish Katiyar ( http://mkatiyar.googlepages.com ) As it is under development now, which works in conjunction with ext4 (which is ext4dev currently, if u look at the .config file), u will find difficulty in getting official documentation - possible under Linux Symposium: http://www.google.com/search?q=jbd2+linux+symposium&btnG=Google+Search So here are some question u can use to build your FAQ on jbd2 (or ext4 FAQ) - hopefully everyone can contribute MORE QUESTIONS and we can compile it together to update ext4 wiki: What is ext4/jbd2? How is ext4 different from ext3/ext2? Similarly how is jbd2 different from jbd? Performance/delays/async vs sync mechanism/size of logging/configurability of logging/periodicity of logging/recovery of corrupted data and its mechanics - any differences etc. But pertaining exactly your question, I did a diff between jbd/*.c and its corresponding files in jbd2/*.c, the summary observation: a. checksumming on journal logs (commit.c): as shown below, jbd2 has some checksum mechanism on journal, whereas jbd does not. 125,135c120,123 < tmp = (struct commit_header *)bh->b_data; < tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); < tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); < tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); < < if (JBD2_HAS_COMPAT_FEATURE(journal, < JBD2_FEATURE_COMPAT_CHECKSUM)) { < tmp->h_chksum_type = JBD2_CRC32_CHKSUM; < tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; < tmp->h_chksum[0] = cpu_to_be32(crc32_sum); < } --- > header = (journal_header_t *)(bh->b_data); > header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); > header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); > header->h_sequence = cpu_to_be32(commit_transaction->t_tid); 137,139c125 b. async vs sync mechanism of logging (commit.c): because of asynchronous operation, jbd2 will call wait for I/O to complete after it has done other things (if it is busy), whereas jbd waiting is implicit - I/O necessarily must complete before it can do anything else. < *cbh = bh; < return ret; < } < < /* < * This function along with journal_submit_commit_record < * allows to write the commit record asynchronously. < */ < static int journal_wait_on_commit_record(struct buffer_head *bh) < { < int ret = 0; < < clear_buffer_dirty(bh); < wait_on_buffer(bh); < < if (unlikely(!buffer_uptodate(bh))) < ret = -EIO; < put_bh(bh); /* One for getblk() */ < jbd2_journal_put_journal_head(bh2jh(bh)); --- > put_bh(bh); /* One for getblk() */ > journal_put_journal_head(descriptor); And this block is new in jbd2: < /* < * Wait for all submitted IO to complete. < */ < static int journal_wait_on_locked_list(journal_t *journal, < transaction_t *commit_transaction) < { < int ret = 0; < struct journal_head *jh; < < while (commit_transaction->t_locked_list) { < struct buffer_head *bh; < < jh = commit_transaction->t_locked_list->b_tprev; < bh = jh2bh(jh); < get_bh(bh); < if (buffer_locked(bh)) { < spin_unlock(&journal->j_list_lock); < wait_on_buffer(bh); < if (unlikely(!buffer_uptodate(bh))) < ret = -EIO; < spin_lock(&journal->j_list_lock); < } < if (!inverted_lock(journal, bh)) { < put_bh(bh); < spin_lock(&journal->j_list_lock); < continue; < } < if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { < __jbd2_journal_unfile_buffer(jh); < jbd_unlock_bh_state(bh); < jbd2_journal_remove_journal_head(bh); < put_bh(bh); < } else { < jbd_unlock_bh_state(bh); < } < put_bh(bh); < cond_resched_lock(&journal->j_list_lock); < } c. This diff I cannot explain: < jbd2_journal_switch_revoke_table(journal); < < stats.u.run.rs_flushing = jiffies; < stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, < stats.u.run.rs_flushing); --- > journal_switch_revoke_table(journal); And similar to above - the following block are new in jbd2: < stats.u.run.rs_logging = jiffies; < stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, < stats.u.run.rs_logging); < stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; < stats.u.run.rs_blocks_logged = 0; < d. This block is removed in jbd2: > jh = commit_transaction->t_locked_list->b_tprev; > bh = jh2bh(jh); > get_bh(bh); > if (buffer_locked(bh)) { > spin_unlock(&journal->j_list_lock); > wait_on_buffer(bh); > if (unlikely(!buffer_uptodate(bh))) > err = -EIO; > spin_lock(&journal->j_list_lock); > } > if (!inverted_lock(journal, bh)) { > put_bh(bh); > spin_lock(&journal->j_list_lock); > continue; > } > if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { > __journal_unfile_buffer(jh); > jbd_unlock_bh_state(bh); > journal_remove_journal_head(bh); > put_bh(bh); > } else { > jbd_unlock_bh_state(bh); > } > put_bh(bh); > cond_resched_lock(&journal->j_list_lock); > } e. checksum again: < /* < * Compute checksum. < */ < if (JBD2_HAS_COMPAT_FEATURE(journal, < JBD2_FEATURE_COMPAT_CHECKSUM)) { < crc32_sum = < jbd2_checksum_data(crc32_sum, bh); < } < f. async commit for logging again: < /* Done it all: now write the commit record asynchronously. */ < < if (JBD2_HAS_INCOMPAT_FEATURE(journal, < JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { < err = journal_submit_commit_record(journal, commit_transaction, < &cbh, crc32_sum); < if (err) < __jbd2_journal_abort_hard(journal); < < spin_lock(&journal->j_list_lock); < err = journal_wait_on_locked_list(journal, < commit_transaction); < spin_unlock(&journal->j_list_lock); < if (err) < __jbd2_journal_abort_hard(journal); < } < and this: < if (!JBD2_HAS_INCOMPAT_FEATURE(journal, < JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { < err = journal_submit_commit_record(journal, commit_transaction, < &cbh, crc32_sum); < if (err) < __jbd2_journal_abort_hard(journal); < } < if (!err && !is_journal_aborted(journal)) < err = journal_wait_on_commit_record(cbh); --- > if (journal_write_commit_record(journal, commit_transaction)) > err = -EIO; g. other than statistics calculation, what is this history thing: < commit_transaction->t_start = jiffies; < stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging, < commit_transaction->t_start); < < /* < * File the transaction for history < */ < stats.ts_type = JBD2_STATS_RUN; < stats.ts_tid = commit_transaction->t_tid; < stats.u.run.rs_handle_count = commit_transaction->t_handle_count; < spin_lock(&journal->j_history_lock); < memcpy(journal->j_history + journal->j_history_cur, &stats, < sizeof(stats)); < if (++journal->j_history_cur == journal->j_history_max) < journal->j_history_cur = 0; < < /* < * Calculate overall stats < */ < journal->j_stats.ts_tid++; < journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait; < journal->j_stats.u.run.rs_running += stats.u.run.rs_running; < journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked; < journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing; < journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging; < journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count; < journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks; < journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged; < spin_unlock(&journal->j_history_lock); < My question: Since it is async-based, does it mean that a timeout is needed? can it ever happened that the CPU is kept so busy that it will never come back and complete the I/O operation for logging? (thus necessitating a timeout?) h. Why is it providing a /proc entry for querying internal information (highly truncated due to voluminous chunks) - these functions / components are new in Ext4: < static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s, < static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos) < static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) < static int jbd2_seq_history_show(struct seq_file *seq, void *v) < static void jbd2_seq_history_stop(struct seq_file *seq, void *v) < static struct seq_operations jbd2_seq_history_ops = { < .start = jbd2_seq_history_start, < .next = jbd2_seq_history_next, < .stop = jbd2_seq_history_stop, < .show = jbd2_seq_history_show, < }; < < static int jbd2_seq_history_open(struct inode *inode, struct file *file) < static int jbd2_seq_history_release(struct inode *inode, struct file *file) < static struct file_operations jbd2_seq_history_fops = { < .owner = THIS_MODULE, < .open = jbd2_seq_history_open, < .read = seq_read, < .llseek = seq_lseek, < .release = jbd2_seq_history_release, < }; < < static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) < static int jbd2_seq_info_show(struct seq_file *seq, void *v) < static void jbd2_seq_info_stop(struct seq_file *seq, void *v) < static struct seq_operations jbd2_seq_info_ops = { < .start = jbd2_seq_info_start, < .next = jbd2_seq_info_next, < .stop = jbd2_seq_info_stop, < .show = jbd2_seq_info_show, < }; < < static int jbd2_seq_info_open(struct inode *inode, struct file *file) < static int jbd2_seq_info_release(struct inode *inode, struct file *file) < static struct file_operations jbd2_seq_info_fops = { < .owner = THIS_MODULE, < .open = jbd2_seq_info_open, < .read = seq_read, < .llseek = seq_lseek, < .release = jbd2_seq_info_release, < }; < < static struct proc_dir_entry *proc_jbd2_stats; < < static void jbd2_stats_proc_init(journal_t *journal) < static void jbd2_stats_proc_exit(journal_t *journal) < static void journal_init_stats(journal_t *journal) /proc/fs/ext4/sdb2>d group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req stats stream_req ./ Every time there is a new file copied, a new entry will be added here (and it is added asynchronously - not immediately, but after a short while): /proc/fs/jbd2/sdb2>cat history R/C tid wait run lock flush log hndls block inlog ctime write drop close R 2 0 107692 0 0 0 1 1 2 R 3 0 5471 0 0 0 1 1 2 R 4 0 11 0 0 16 1 6 7 I am tired.....Several more differences, but just a few more - majorities of others are just name changes. -- Regards, Peter Teoh -- To unsubscribe from this list: send an email with "unsubscribe kernelnewbies" to ecartis@xxxxxxxxxxxx Please read the FAQ at http://kernelnewbies.org/FAQ