Zdenek reported that for high number of dm devices, the 64-entry hash table has non-trivial overhead. This patch replaces the hash table with a red-black tree. Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> --- drivers/md/dm-ioctl.c | 256 +++++++++++++++++++++++++++----------------------- 1 file changed, 143 insertions(+), 113 deletions(-) Index: linux-2.6/drivers/md/dm-ioctl.c =================================================================== --- linux-2.6.orig/drivers/md/dm-ioctl.c 2021-02-05 20:21:22.000000000 +0100 +++ linux-2.6/drivers/md/dm-ioctl.c 2021-03-09 21:04:07.000000000 +0100 @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/wait.h> #include <linux/slab.h> +#include <linux/rbtree.h> #include <linux/dm-ioctl.h> #include <linux/hdreg.h> #include <linux/compat.h> @@ -36,8 +37,10 @@ struct dm_file { * name or uuid. *---------------------------------------------------------------*/ struct hash_cell { - struct list_head name_list; - struct list_head uuid_list; + struct rb_node name_node; + struct rb_node uuid_node; + bool name_set; + bool uuid_set; char *name; char *uuid; @@ -53,10 +56,8 @@ struct vers_iter { }; -#define NUM_BUCKETS 64 -#define MASK_BUCKETS (NUM_BUCKETS - 1) -static struct list_head _name_buckets[NUM_BUCKETS]; -static struct list_head _uuid_buckets[NUM_BUCKETS]; +static struct rb_root name_rb_tree = RB_ROOT; +static struct rb_root uuid_rb_tree = RB_ROOT; static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred); @@ -70,73 +71,110 @@ static DECLARE_RWSEM(_hash_lock); */ static DEFINE_MUTEX(dm_hash_cells_mutex); -static void init_buckets(struct list_head *buckets) -{ - unsigned int i; - - for (i = 0; i < NUM_BUCKETS; i++) - INIT_LIST_HEAD(buckets + i); -} - -static int dm_hash_init(void) -{ - init_buckets(_name_buckets); - init_buckets(_uuid_buckets); - return 0; -} - static void dm_hash_exit(void) { dm_hash_remove_all(false, false, false); } /*----------------------------------------------------------------- - * Hash function: - * We're not really concerned with the str hash function being - * fast since it's only used by the ioctl interface. - *---------------------------------------------------------------*/ -static unsigned int hash_str(const char *str) -{ - const unsigned int hash_mult = 2654435387U; - unsigned int h = 0; - - while (*str) - h = (h + (unsigned int) *str++) * hash_mult; - - return h & MASK_BUCKETS; -} - -/*----------------------------------------------------------------- * Code for looking up a device by name *---------------------------------------------------------------*/ static struct hash_cell *__get_name_cell(const char *str) { - struct hash_cell *hc; - unsigned int h = hash_str(str); + struct rb_node *n = name_rb_tree.rb_node; - list_for_each_entry (hc, _name_buckets + h, name_list) - if (!strcmp(hc->name, str)) { + while (n) { + struct hash_cell *hc = container_of(n, struct hash_cell, name_node); + int c = strcmp(hc->name, str); + if (!c) { dm_get(hc->md); return hc; } + n = c >= 0 ? n->rb_left : n->rb_right; + } return NULL; } static struct hash_cell *__get_uuid_cell(const char *str) { - struct hash_cell *hc; - unsigned int h = hash_str(str); + struct rb_node *n = uuid_rb_tree.rb_node; - list_for_each_entry (hc, _uuid_buckets + h, uuid_list) - if (!strcmp(hc->uuid, str)) { + while (n) { + struct hash_cell *hc = container_of(n, struct hash_cell, uuid_node); + int c = strcmp(hc->uuid, str); + if (!c) { dm_get(hc->md); return hc; } + n = c >= 0 ? n->rb_left : n->rb_right; + } return NULL; } +static void __unlink_name(struct hash_cell *hc) +{ + if (hc->name_set) { + hc->name_set = false; + rb_erase(&hc->name_node, &name_rb_tree); + } +} + +static void __unlink_uuid(struct hash_cell *hc) +{ + if (hc->uuid_set) { + hc->uuid_set = false; + rb_erase(&hc->uuid_node, &uuid_rb_tree); + } +} + +static void __link_name(struct hash_cell *new_hc) +{ + struct rb_node **n, *parent; + + __unlink_name(new_hc); + + new_hc->name_set = true; + + n = &name_rb_tree.rb_node; + parent = NULL; + + while (*n) { + struct hash_cell *hc = container_of(*n, struct hash_cell, name_node); + int c = strcmp(hc->name, new_hc->name); + BUG_ON(!c); + parent = *n; + n = c >= 0 ? &hc->name_node.rb_left : &hc->name_node.rb_right; + } + + rb_link_node(&new_hc->name_node, parent, n); + rb_insert_color(&new_hc->name_node, &name_rb_tree); +} + +static void __link_uuid(struct hash_cell *new_hc) +{ + struct rb_node **n, *parent; + + __unlink_uuid(new_hc); + + new_hc->uuid_set = true; + + n = &uuid_rb_tree.rb_node; + parent = NULL; + + while (*n) { + struct hash_cell *hc = container_of(*n, struct hash_cell, uuid_node); + int c = strcmp(hc->uuid, new_hc->uuid); + BUG_ON(!c); + parent = *n; + n = c > 0 ? &hc->uuid_node.rb_left : &hc->uuid_node.rb_right; + } + + rb_link_node(&new_hc->uuid_node, parent, n); + rb_insert_color(&new_hc->uuid_node, &uuid_rb_tree); +} + static struct hash_cell *__get_dev_cell(uint64_t dev) { struct mapped_device *md; @@ -185,8 +223,7 @@ static struct hash_cell *alloc_cell(cons } } - INIT_LIST_HEAD(&hc->name_list); - INIT_LIST_HEAD(&hc->uuid_list); + hc->name_set = hc->uuid_set = false; hc->md = md; hc->new_map = NULL; return hc; @@ -226,16 +263,16 @@ static int dm_hash_insert(const char *na goto bad; } - list_add(&cell->name_list, _name_buckets + hash_str(name)); + __link_name(cell); if (uuid) { hc = __get_uuid_cell(uuid); if (hc) { - list_del(&cell->name_list); + __unlink_name(cell); dm_put(hc->md); goto bad; } - list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid)); + __link_uuid(cell); } dm_get(md); mutex_lock(&dm_hash_cells_mutex); @@ -256,9 +293,9 @@ static struct dm_table *__hash_remove(st struct dm_table *table; int srcu_idx; - /* remove from the dev hash */ - list_del(&hc->uuid_list); - list_del(&hc->name_list); + /* remove from the dev trees */ + __unlink_name(hc); + __unlink_uuid(hc); mutex_lock(&dm_hash_cells_mutex); dm_set_mdptr(hc->md, NULL); mutex_unlock(&dm_hash_cells_mutex); @@ -279,7 +316,8 @@ static struct dm_table *__hash_remove(st static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred) { - int i, dev_skipped; + int dev_skipped; + struct rb_node *n; struct hash_cell *hc; struct mapped_device *md; struct dm_table *t; @@ -289,40 +327,39 @@ retry: down_write(&_hash_lock); - for (i = 0; i < NUM_BUCKETS; i++) { - list_for_each_entry(hc, _name_buckets + i, name_list) { - md = hc->md; - dm_get(md); - - if (keep_open_devices && - dm_lock_for_deletion(md, mark_deferred, only_deferred)) { - dm_put(md); - dev_skipped++; - continue; - } + for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { + hc = container_of(n, struct hash_cell, name_node); + md = hc->md; + dm_get(md); + + if (keep_open_devices && + dm_lock_for_deletion(md, mark_deferred, only_deferred)) { + dm_put(md); + dev_skipped++; + continue; + } - t = __hash_remove(hc); + t = __hash_remove(hc); - up_write(&_hash_lock); + up_write(&_hash_lock); - if (t) { - dm_sync_table(md); - dm_table_destroy(t); - } - dm_put(md); - if (likely(keep_open_devices)) - dm_destroy(md); - else - dm_destroy_immediate(md); - - /* - * Some mapped devices may be using other mapped - * devices, so repeat until we make no further - * progress. If a new mapped device is created - * here it will also get removed. - */ - goto retry; + if (t) { + dm_sync_table(md); + dm_table_destroy(t); } + dm_put(md); + if (likely(keep_open_devices)) + dm_destroy(md); + else + dm_destroy_immediate(md); + + /* + * Some mapped devices may be using other mapped + * devices, so repeat until we make no further + * progress. If a new mapped device is created + * here it will also get removed. + */ + goto retry; } up_write(&_hash_lock); @@ -340,7 +377,7 @@ static void __set_cell_uuid(struct hash_ hc->uuid = new_uuid; mutex_unlock(&dm_hash_cells_mutex); - list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid)); + __link_uuid(hc); } /* @@ -354,14 +391,14 @@ static char *__change_cell_name(struct h /* * Rename and move the name cell. */ - list_del(&hc->name_list); + __unlink_name(hc); old_name = hc->name; mutex_lock(&dm_hash_cells_mutex); hc->name = new_name; mutex_unlock(&dm_hash_cells_mutex); - list_add(&hc->name_list, _name_buckets + hash_str(new_name)); + __link_name(hc); return old_name; } @@ -505,7 +542,7 @@ static void *get_result_buffer(struct dm static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size) { - unsigned int i; + struct rb_node *n; struct hash_cell *hc; size_t len, needed = 0; struct gendisk *disk; @@ -518,11 +555,10 @@ static int list_devices(struct file *fil * Loop through all the devices working out how much * space we need. */ - for (i = 0; i < NUM_BUCKETS; i++) { - list_for_each_entry (hc, _name_buckets + i, name_list) { - needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); - needed += align_val(sizeof(uint32_t)); - } + for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { + hc = container_of(n, struct hash_cell, name_node); + needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); + needed += align_val(sizeof(uint32_t)); } /* @@ -540,21 +576,20 @@ static int list_devices(struct file *fil /* * Now loop through filling out the names. */ - for (i = 0; i < NUM_BUCKETS; i++) { - list_for_each_entry (hc, _name_buckets + i, name_list) { - if (old_nl) - old_nl->next = (uint32_t) ((void *) nl - - (void *) old_nl); - disk = dm_disk(hc->md); - nl->dev = huge_encode_dev(disk_devt(disk)); - nl->next = 0; - strcpy(nl->name, hc->name); - - old_nl = nl; - event_nr = align_ptr(nl->name + strlen(hc->name) + 1); - *event_nr = dm_get_event_nr(hc->md); - nl = align_ptr(event_nr + 1); - } + for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { + hc = container_of(n, struct hash_cell, name_node); + if (old_nl) + old_nl->next = (uint32_t) ((void *) nl - + (void *) old_nl); + disk = dm_disk(hc->md); + nl->dev = huge_encode_dev(disk_devt(disk)); + nl->next = 0; + strcpy(nl->name, hc->name); + + old_nl = nl; + event_nr = align_ptr(nl->name + strlen(hc->name) + 1); + *event_nr = dm_get_event_nr(hc->md); + nl = align_ptr(event_nr + 1); } /* * If mismatch happens, security may be compromised due to buffer @@ -1991,14 +2026,9 @@ int __init dm_interface_init(void) { int r; - r = dm_hash_init(); - if (r) - return r; - r = misc_register(&_dm_misc); if (r) { DMERR("misc_register failed for control device"); - dm_hash_exit(); return r; } -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel