[PATCH] md/persistent-data: Set tm hash size via a module_param

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Enlarging the hash table size can significantly improve transaction
manager hash operations. This commit adds the tm_hash_table_size
module_param that sets the hash size.

For demonstration, I've created a thin volume, filled it with 4.8TiB of
sequential data and took a snapshot. then, I overwritten the volume in a
way that caused a maximum btree nodes to be allocated. I've repeated the
process for 5 snapshots and measured the first snapshot deletion time.

When using 128k hash size (instead of the 256 default value) I was able
to reduce snapshot deletion time from 42min to 4min (10x improvement).

Signed-off-by: Meir Elisha <meir.elisha@xxxxxxxxxxx>
---

Script used for demonstration attached below.

 .../persistent-data/dm-transaction-manager.c  | 51 ++++++++++++++++---
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index c7ba4e6cbbc7..8d486b1e6693 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -10,6 +10,7 @@
 #include "dm-space-map-metadata.h"
 #include "dm-persistent-data-internal.h"
 
+#include <linux/moduleparam.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/hash.h>
@@ -84,8 +85,35 @@ struct shadow_info {
 /*
  * It would be nice if we scaled with the size of transaction.
  */
-#define DM_HASH_SIZE 256
-#define DM_HASH_MASK (DM_HASH_SIZE - 1)
+static uint tm_hash_table_size = 256;
+
+static int param_set_hash_size(const char *val, const struct kernel_param *kp)
+{
+	unsigned int num;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = kstrtouint(val, 0, &num);
+	if (ret)
+		return ret;
+
+	/* Hash size must be a power of 2 */
+	if (!(num && !(num & (num - 1))))
+		return -EINVAL;
+
+	*((unsigned int *)kp->arg) = num;
+	return 0;
+}
+
+static const struct kernel_param_ops tm_hash_table_size_ops = {
+	.set = param_set_hash_size,
+	.get = param_get_uint
+};
+
+module_param_cb(tm_hash_table_size, &tm_hash_table_size_ops, &tm_hash_table_size, 0644);
+MODULE_PARM_DESC(tm_hash_table_size, "transaction manager hash size");
 
 struct dm_transaction_manager {
 	int is_clone;
@@ -95,8 +123,9 @@ struct dm_transaction_manager {
 	struct dm_space_map *sm;
 
 	spinlock_t lock;
-	struct hlist_head buckets[DM_HASH_SIZE];
-
+	struct hlist_head *buckets;
+	uint hash_size;
+	uint hash_mask;
 	struct prefetch_set prefetches;
 };
 
@@ -105,7 +134,7 @@ struct dm_transaction_manager {
 static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 {
 	int r = 0;
-	unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
+	unsigned int bucket = dm_hash_block(b, tm->hash_mask);
 	struct shadow_info *si;
 
 	spin_lock(&tm->lock);
@@ -131,7 +160,7 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 	si = kmalloc(sizeof(*si), GFP_NOIO);
 	if (si) {
 		si->where = b;
-		bucket = dm_hash_block(b, DM_HASH_MASK);
+		bucket = dm_hash_block(b, tm->hash_mask);
 		spin_lock(&tm->lock);
 		hlist_add_head(&si->hlist, tm->buckets + bucket);
 		spin_unlock(&tm->lock);
@@ -146,7 +175,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm)
 	int i;
 
 	spin_lock(&tm->lock);
-	for (i = 0; i < DM_HASH_SIZE; i++) {
+	for (i = 0; i < tm->hash_size; i++) {
 		bucket = tm->buckets + i;
 		hlist_for_each_entry_safe(si, tmp, bucket, hlist)
 			kfree(si);
@@ -169,13 +198,19 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
 	if (!tm)
 		return ERR_PTR(-ENOMEM);
 
+	tm->hash_size = tm_hash_table_size;
+	tm->buckets = kmalloc_array(tm->hash_size, sizeof(*tm->buckets), GFP_KERNEL);
+	if (!tm->buckets)
+		return ERR_PTR(-ENOMEM);
+
 	tm->is_clone = 0;
 	tm->real = NULL;
 	tm->bm = bm;
 	tm->sm = sm;
+	tm->hash_mask = tm->hash_size - 1;
 
 	spin_lock_init(&tm->lock);
-	for (i = 0; i < DM_HASH_SIZE; i++)
+	for (i = 0; i < tm->hash_size; i++)
 		INIT_HLIST_HEAD(tm->buckets + i);
 
 	prefetch_init(&tm->prefetches);
-- 
2.34.1

Script used for testing(assumes PAGE_SIZE=4096):

# Create volume group
vgcreate --physicalextentsize 16384k vg1 /dev/nvme1n1
# Create thin pool
lvcreate --yes --extents 320002 --poolmetadatasize 104860412 --chunksize 128k --thinpool tp vg1
# Create thin volume
lvcreate --yes --virtualsize 5868826560512b --thin --name lv1 vg1/tp

# Fill volume with initial data
fio --name=write_data --ioengine=libaio \
        --direct=1 \
        --randrepeat=0 \
        --numjobs=1 \
        --bs=128k \
        --filename=/dev/vg1/lv1 \
        --size=4800G \
        --iodepth=8 \
        --rw=randwrite

for i in {1..5}; do
        # Create a snapshot
        lvcreate --name snap$i --snapshot vg1/lv1
        # Fill snap tree with the worst case scenario
        # write 1 chunk at every btree leaf to enforce new tree for each snapshot
        fio --name=write --filename="/dev/vg1/lv1" --rw=write:32128k --rw_sequencer=sequential \
            --bs=128k --size=4800G --direct=1 --ioengine=libaio

        sleep 1
done

# Remove a snapshot
time lvremove vg1/snap1

-- 
2.34.1




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux