Resizable hashtable should improve journal replay time when we have million of revoke records. Notice that rhashtable is used during replay only, as removal with list_del() is less expensive and it's still used during regular processing. before: 1048576 records - 95 seconds 2097152 records - 580 seconds after: 1048576 records - 2 seconds 2097152 records - 3 seconds 4194304 records - 7 seconds Signed-off-by: Alex Zhuravlev <bzzz@xxxxxxxxxxxxx> Signed-off-by: Li Dongyang <dongyangli@xxxxxxx> --- v1->v2: include rhashtable header in jbd2.h --- fs/jbd2/recovery.c | 4 +++ fs/jbd2/revoke.c | 65 +++++++++++++++++++++++++++++++------------- include/linux/jbd2.h | 7 +++++ 3 files changed, 57 insertions(+), 19 deletions(-) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 667f67342c52..d9287439171c 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -294,6 +294,10 @@ int jbd2_journal_recover(journal_t *journal) memset(&info, 0, sizeof(info)); sb = journal->j_superblock; + err = jbd2_journal_init_recovery_revoke(journal); + if (err) + return err; + /* * The journal superblock's s_start field (the current log head) * is always zero if, and only if, the journal was cleanly diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 4556e4689024..d6e96099e9c9 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -90,6 +90,7 @@ #include <linux/bio.h> #include <linux/log2.h> #include <linux/hash.h> +#include <linux/rhashtable.h> #endif static struct kmem_cache *jbd2_revoke_record_cache; @@ -101,7 +102,10 @@ static struct kmem_cache *jbd2_revoke_table_cache; struct jbd2_revoke_record_s { - struct list_head hash; + union { + struct list_head hash; + struct rhash_head linkage; + }; tid_t sequence; /* Used for recovery only */ unsigned long long blocknr; }; @@ -680,13 +684,22 @@ static void flush_descriptor(journal_t *journal, * single block. */ +static const struct rhashtable_params revoke_rhashtable_params = { + .key_len = sizeof(unsigned long long), + .key_offset = offsetof(struct jbd2_revoke_record_s, blocknr), + .head_offset = offsetof(struct jbd2_revoke_record_s, linkage), +}; + int jbd2_journal_set_revoke(journal_t *journal, unsigned long long blocknr, tid_t sequence) { struct jbd2_revoke_record_s *record; + gfp_t gfp_mask = GFP_NOFS; + int err; - record = find_revoke_record(journal, blocknr); + record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr, + revoke_rhashtable_params); if (record) { /* If we have multiple occurrences, only record the * latest sequence number in the hashed record */ @@ -694,7 +707,22 @@ int jbd2_journal_set_revoke(journal_t *journal, record->sequence = sequence; return 0; } - return insert_revoke_hash(journal, blocknr, sequence); + + if (journal_oom_retry) + gfp_mask |= __GFP_NOFAIL; + record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask); + if (!record) + return -ENOMEM; + + record->sequence = sequence; + record->blocknr = blocknr; + err = rhashtable_lookup_insert_fast(&journal->j_revoke_rhtable, + &record->linkage, + revoke_rhashtable_params); + if (err) + kmem_cache_free(jbd2_revoke_record_cache, record); + + return err; } /* @@ -710,7 +738,8 @@ int jbd2_journal_test_revoke(journal_t *journal, { struct jbd2_revoke_record_s *record; - record = find_revoke_record(journal, blocknr); + record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr, + revoke_rhashtable_params); if (!record) return 0; if (tid_gt(sequence, record->sequence)) @@ -718,6 +747,17 @@ int jbd2_journal_test_revoke(journal_t *journal, return 1; } +int jbd2_journal_init_recovery_revoke(journal_t *journal) +{ + return rhashtable_init(&journal->j_revoke_rhtable, + &revoke_rhashtable_params); +} + +static void jbd2_revoke_record_free(void *ptr, void *arg) +{ + kmem_cache_free(jbd2_revoke_record_cache, ptr); +} + /* * Finally, once recovery is over, we need to clear the revoke table so * that it can be reused by the running filesystem. @@ -725,19 +765,6 @@ int jbd2_journal_test_revoke(journal_t *journal, void jbd2_journal_clear_revoke(journal_t *journal) { - int i; - struct list_head *hash_list; - struct jbd2_revoke_record_s *record; - struct jbd2_revoke_table_s *revoke; - - revoke = journal->j_revoke; - - for (i = 0; i < revoke->hash_size; i++) { - hash_list = &revoke->hash_table[i]; - while (!list_empty(hash_list)) { - record = (struct jbd2_revoke_record_s*) hash_list->next; - list_del(&record->hash); - kmem_cache_free(jbd2_revoke_record_cache, record); - } - } + rhashtable_free_and_destroy(&journal->j_revoke_rhtable, + jbd2_revoke_record_free, NULL); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8aef9bb6ad57..2b0aa1e159b8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/bit_spinlock.h> #include <linux/blkdev.h> +#include <linux/rhashtable-types.h> #include <crypto/hash.h> #endif @@ -1122,6 +1123,11 @@ struct journal_s */ struct jbd2_revoke_table_s *j_revoke_table[2]; + /** + * @j_revoke_rhtable: rhashtable for revoke records during recovery + */ + struct rhashtable j_revoke_rhtable; + /** * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. */ @@ -1644,6 +1650,7 @@ extern void jbd2_journal_write_revoke_records(transaction_t *transaction, /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); +extern int jbd2_journal_init_recovery_revoke(journal_t *); extern void jbd2_journal_clear_revoke(journal_t *); extern void jbd2_journal_switch_revoke_table(journal_t *journal); extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); -- 2.47.0