[PATCH] jbd2: use rhashtable for revoke records during replay

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Resizable hashtable should improve journal replay time when
we have million of revoke records.
Notice that rhashtable is used during replay only,
as removal with list_del() is less expensive and it's still used
during regular processing.

before:
1048576 records - 95 seconds
2097152 records - 580 seconds

after:
1048576 records - 2 seconds
2097152 records - 3 seconds
4194304 records - 7 seconds

Signed-off-by: Alex Zhuravlev <bzzz@xxxxxxxxxxxxx>
Signed-off-by: Li Dongyang <dongyangli@xxxxxxx>
---
 fs/jbd2/recovery.c   |  4 +++
 fs/jbd2/revoke.c     | 65 +++++++++++++++++++++++++++++++-------------
 include/linux/jbd2.h |  6 ++++
 3 files changed, 56 insertions(+), 19 deletions(-)

diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 667f67342c52..d9287439171c 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -294,6 +294,10 @@ int jbd2_journal_recover(journal_t *journal)
 	memset(&info, 0, sizeof(info));
 	sb = journal->j_superblock;
 
+	err = jbd2_journal_init_recovery_revoke(journal);
+	if (err)
+		return err;
+
 	/*
 	 * The journal superblock's s_start field (the current log head)
 	 * is always zero if, and only if, the journal was cleanly
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 4556e4689024..d6e96099e9c9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -90,6 +90,7 @@
 #include <linux/bio.h>
 #include <linux/log2.h>
 #include <linux/hash.h>
+#include <linux/rhashtable.h>
 #endif
 
 static struct kmem_cache *jbd2_revoke_record_cache;
@@ -101,7 +102,10 @@ static struct kmem_cache *jbd2_revoke_table_cache;
 
 struct jbd2_revoke_record_s
 {
-	struct list_head  hash;
+	union {
+		struct list_head  hash;
+		struct rhash_head linkage;
+	};
 	tid_t		  sequence;	/* Used for recovery only */
 	unsigned long long	  blocknr;
 };
@@ -680,13 +684,22 @@ static void flush_descriptor(journal_t *journal,
  * single block.
  */
 
+static const struct rhashtable_params revoke_rhashtable_params = {
+	.key_len     = sizeof(unsigned long long),
+	.key_offset  = offsetof(struct jbd2_revoke_record_s, blocknr),
+	.head_offset = offsetof(struct jbd2_revoke_record_s, linkage),
+};
+
 int jbd2_journal_set_revoke(journal_t *journal,
 		       unsigned long long blocknr,
 		       tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
+	gfp_t gfp_mask = GFP_NOFS;
+	int err;
 
-	record = find_revoke_record(journal, blocknr);
+	record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+				   revoke_rhashtable_params);
 	if (record) {
 		/* If we have multiple occurrences, only record the
 		 * latest sequence number in the hashed record */
@@ -694,7 +707,22 @@ int jbd2_journal_set_revoke(journal_t *journal,
 			record->sequence = sequence;
 		return 0;
 	}
-	return insert_revoke_hash(journal, blocknr, sequence);
+
+	if (journal_oom_retry)
+		gfp_mask |= __GFP_NOFAIL;
+	record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
+	if (!record)
+		return -ENOMEM;
+
+	record->sequence = sequence;
+	record->blocknr = blocknr;
+	err = rhashtable_lookup_insert_fast(&journal->j_revoke_rhtable,
+					    &record->linkage,
+					    revoke_rhashtable_params);
+	if (err)
+		kmem_cache_free(jbd2_revoke_record_cache, record);
+
+	return err;
 }
 
 /*
@@ -710,7 +738,8 @@ int jbd2_journal_test_revoke(journal_t *journal,
 {
 	struct jbd2_revoke_record_s *record;
 
-	record = find_revoke_record(journal, blocknr);
+	record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+				   revoke_rhashtable_params);
 	if (!record)
 		return 0;
 	if (tid_gt(sequence, record->sequence))
@@ -718,6 +747,17 @@ int jbd2_journal_test_revoke(journal_t *journal,
 	return 1;
 }
 
+int jbd2_journal_init_recovery_revoke(journal_t *journal)
+{
+	return rhashtable_init(&journal->j_revoke_rhtable,
+			       &revoke_rhashtable_params);
+}
+
+static void jbd2_revoke_record_free(void *ptr, void *arg)
+{
+	kmem_cache_free(jbd2_revoke_record_cache, ptr);
+}
+
 /*
  * Finally, once recovery is over, we need to clear the revoke table so
  * that it can be reused by the running filesystem.
@@ -725,19 +765,6 @@ int jbd2_journal_test_revoke(journal_t *journal,
 
 void jbd2_journal_clear_revoke(journal_t *journal)
 {
-	int i;
-	struct list_head *hash_list;
-	struct jbd2_revoke_record_s *record;
-	struct jbd2_revoke_table_s *revoke;
-
-	revoke = journal->j_revoke;
-
-	for (i = 0; i < revoke->hash_size; i++) {
-		hash_list = &revoke->hash_table[i];
-		while (!list_empty(hash_list)) {
-			record = (struct jbd2_revoke_record_s*) hash_list->next;
-			list_del(&record->hash);
-			kmem_cache_free(jbd2_revoke_record_cache, record);
-		}
-	}
+	rhashtable_free_and_destroy(&journal->j_revoke_rhtable,
+				    jbd2_revoke_record_free, NULL);
 }
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..34e8a4ba9c2e 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1122,6 +1122,11 @@ struct journal_s
 	 */
 	struct jbd2_revoke_table_s *j_revoke_table[2];
 
+	/**
+	 * @j_revoke_rhtable:	rhashtable for revoke records during recovery
+	 */
+	struct rhashtable	j_revoke_rhtable;
+
 	/**
 	 * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction.
 	 */
@@ -1644,6 +1649,7 @@ extern void	   jbd2_journal_write_revoke_records(transaction_t *transaction,
 /* Recovery revoke support */
 extern int	jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
 extern int	jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
+extern int	jbd2_journal_init_recovery_revoke(journal_t *);
 extern void	jbd2_journal_clear_revoke(journal_t *);
 extern void	jbd2_journal_switch_revoke_table(journal_t *journal);
 extern void	jbd2_clear_buffer_revoked_flags(journal_t *journal);
-- 
2.47.0





[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux