vread() has been linearly searching vmap_area_list for looking up vmalloc areas to read from. These same areas are also tracked by a rb_tree (vmap_area_root) which offers logarithmic lookup. This patch modifies vread() to use the rb_tree structure instead of the list and the speedup for heavy /proc/kcore readers can be pretty significant. Below are the wall clock measurements of a Python application that leverages the drgn debugging library to read and interpret data read from /proc/kcore. Before the patch: ----- $ time sudo sdb -e 'dbuf | head 2500 | wc' (unsigned long)2500 real 0m21.128s user 0m2.321s sys 0m19.227s ----- With the patch: ----- $ time sudo sdb -e 'dbuf | head 2500 | wc' (unsigned long)2500 real 0m1.870s user 0m1.628s sys 0m0.660s ----- Signed-off-by: Serapheim Dimitropoulos <serapheim@xxxxxxxxxxx> --- mm/vmalloc.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 49ab9b6c001d..86343b879938 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2851,6 +2851,7 @@ long vread(char *buf, char *addr, unsigned long count) { struct vmap_area *va; struct vm_struct *vm; + struct rb_node *node; char *vaddr, *buf_start = buf; unsigned long buflen = count; unsigned long n; @@ -2860,17 +2861,15 @@ long vread(char *buf, char *addr, unsigned long count) count = -(unsigned long) addr; spin_lock(&vmap_area_lock); - list_for_each_entry(va, &vmap_area_list, list) { - if (!count) - break; - + va = __find_vmap_area((unsigned long)addr); + if (!va) + goto finished; + while (count) { if (!va->vm) - continue; + goto next_node; vm = va->vm; vaddr = (char *) vm->addr; - if (addr >= vaddr + get_vm_area_size(vm)) - continue; while (addr < vaddr) { if (count == 0) goto finished; @@ -2889,6 +2888,12 @@ long vread(char *buf, char *addr, unsigned long count) buf += n; addr += n; count -= n; + +next_node: + node = rb_next(&va->rb_node); + if (!node) + break; + va = rb_entry(node, struct vmap_area, rb_node); } finished: spin_unlock(&vmap_area_lock); -- 2.17.1