Store busy-VA objects per a CPU zone. A va->va_start address is converted into a correct zone where it is placed and resides. An addr_to_cvz() function is used to do a proper address conversion. Such approach balances VAs across CPUs. That is why an access becomes scalable to number of CPUs in a system. Please note: Since a zone size is set to ULONG_MAX, i.e. everything is bound thus accessed to the CPU_0 so far, this patch does not give any difference comparing with a current behavior. The global vmap_area_lock, vmap_area_root are removed as there is no need in it anymore. The vmap_area_list is still kept and is _empty_. It is exported for a kexec only. The vmallocinfo and vread() have to be reworked to be able to handle multiple zones. As a result of this patch it can handle only one zone, i.e. when cache is disabled. Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx> --- mm/vmalloc.c | 127 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 84 insertions(+), 43 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f6da2590b4de..a9170fe19909 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -729,11 +729,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn); #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0 -static DEFINE_SPINLOCK(vmap_area_lock); static DEFINE_SPINLOCK(free_vmap_area_lock); /* Export for kexec only */ LIST_HEAD(vmap_area_list); -static struct rb_root vmap_area_root = RB_ROOT; static bool vmap_initialized __read_mostly; static struct rb_root purge_vmap_area_root = RB_ROOT; @@ -823,7 +821,7 @@ addr_to_cpu(unsigned long addr) } static inline struct cpu_vmap_zone * -__maybe_unused addr_to_cvz(unsigned long addr) +addr_to_cvz(unsigned long addr) { return &per_cpu(cpu_vmap_zone, addr_to_cpu(addr)); } @@ -859,10 +857,10 @@ unsigned long vmalloc_nr_pages(void) } /* Look up the first VA which satisfies addr < va_end, NULL if none. */ -static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr) +static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) { struct vmap_area *va = NULL; - struct rb_node *n = vmap_area_root.rb_node; + struct rb_node *n = root->rb_node; addr = (unsigned long)kasan_reset_tag((void *)addr); @@ -1608,12 +1606,14 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head, */ static void free_vmap_area(struct vmap_area *va) { + struct cpu_vmap_zone *z = addr_to_cvz(va->va_start); + /* * Remove from the busy tree/list. */ - spin_lock(&vmap_area_lock); - unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + fbl_lock(z, BUSY); + unlink_va(va, &fbl_root(z, BUSY)); + fbl_unlock(z, BUSY); /* * Insert/Merge it back to the free tree/list. @@ -1656,6 +1656,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, int node, gfp_t gfp_mask, unsigned long va_flags) { + struct cpu_vmap_zone *z; struct vmap_area *va; unsigned long freed; unsigned long addr; @@ -1701,9 +1702,11 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, va->vm = NULL; va->flags = va_flags; - spin_lock(&vmap_area_lock); - insert_vmap_area(va, &vmap_area_root, &vmap_area_list); - spin_unlock(&vmap_area_lock); + z = addr_to_cvz(va->va_start); + + fbl_lock(z, BUSY); + insert_vmap_area(va, &fbl_root(z, BUSY), &fbl_head(z, BUSY)); + fbl_unlock(z, BUSY); BUG_ON(!IS_ALIGNED(va->va_start, align)); BUG_ON(va->va_start < vstart); @@ -1926,24 +1929,26 @@ static void free_unmap_vmap_area(struct vmap_area *va) struct vmap_area *find_vmap_area(unsigned long addr) { + struct cpu_vmap_zone *z = addr_to_cvz(addr); struct vmap_area *va; - spin_lock(&vmap_area_lock); - va = __find_vmap_area(addr, &vmap_area_root); - spin_unlock(&vmap_area_lock); + fbl_lock(z, BUSY); + va = __find_vmap_area(addr, &fbl_root(z, BUSY)); + fbl_unlock(z, BUSY); return va; } static struct vmap_area *find_unlink_vmap_area(unsigned long addr) { + struct cpu_vmap_zone *z = addr_to_cvz(addr); struct vmap_area *va; - spin_lock(&vmap_area_lock); - va = __find_vmap_area(addr, &vmap_area_root); + fbl_lock(z, BUSY); + va = __find_vmap_area(addr, &fbl_root(z, BUSY)); if (va) - unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + unlink_va(va, &fbl_root(z, BUSY)); + fbl_unlock(z, BUSY); return va; } @@ -2095,14 +2100,17 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) static void free_vmap_block(struct vmap_block *vb) { + struct cpu_vmap_zone *z; struct vmap_block *tmp; tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start)); BUG_ON(tmp != vb); - spin_lock(&vmap_area_lock); - unlink_va(vb->va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + z = addr_to_cvz(vb->va->va_start); + + fbl_lock(z, BUSY); + unlink_va(vb->va, &fbl_root(z, BUSY)); + fbl_unlock(z, BUSY); free_vmap_area_noflush(vb->va); kfree_rcu(vb, rcu_head); @@ -2484,9 +2492,11 @@ static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { - spin_lock(&vmap_area_lock); + struct cpu_vmap_zone *z = addr_to_cvz(va->va_start); + + fbl_lock(z, BUSY); setup_vmalloc_vm_locked(vm, va, flags, caller); - spin_unlock(&vmap_area_lock); + fbl_unlock(z, BUSY); } static void clear_vm_uninitialized_flag(struct vm_struct *vm) @@ -3605,6 +3615,7 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags */ long vread(char *buf, char *addr, unsigned long count) { + struct cpu_vmap_zone *z; struct vmap_area *va; struct vm_struct *vm; char *vaddr, *buf_start = buf; @@ -3617,8 +3628,11 @@ long vread(char *buf, char *addr, unsigned long count) if ((unsigned long) addr + count < count) count = -(unsigned long) addr; - spin_lock(&vmap_area_lock); - va = find_vmap_area_exceed_addr((unsigned long)addr); + /* Hooked to CPU0 because a cache is not activated. */ + z = &per_cpu(cpu_vmap_zone, 0); + fbl_lock(z, BUSY); + + va = find_vmap_area_exceed_addr((unsigned long)addr, &fbl_root(z, BUSY)); if (!va) goto finished; @@ -3626,7 +3640,7 @@ long vread(char *buf, char *addr, unsigned long count) if ((unsigned long)addr + count <= va->va_start) goto finished; - list_for_each_entry_from(va, &vmap_area_list, list) { + list_for_each_entry_from(va, &fbl_head(z, BUSY), list) { if (!count) break; @@ -3674,7 +3688,7 @@ long vread(char *buf, char *addr, unsigned long count) count -= n; } finished: - spin_unlock(&vmap_area_lock); + fbl_unlock(z, BUSY); if (buf == buf_start) return 0; @@ -4014,14 +4028,15 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, } /* insert all vm's */ - spin_lock(&vmap_area_lock); for (area = 0; area < nr_vms; area++) { - insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list); + struct cpu_vmap_zone *z = addr_to_cvz(vas[area]->va_start); + fbl_lock(z, BUSY); + insert_vmap_area(vas[area], &fbl_root(z, BUSY), &fbl_head(z, BUSY)); setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, pcpu_get_vm_areas); + fbl_unlock(z, BUSY); } - spin_unlock(&vmap_area_lock); /* * Mark allocated areas as accessible. Do it now as a best-effort @@ -4145,24 +4160,24 @@ bool vmalloc_dump_obj(void *object) #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) __acquires(&vmap_purge_lock) - __acquires(&vmap_area_lock) + __acquires(&fbl(&per_cpu(cpu_vmap_zone, 0), BUSY, lock)) { mutex_lock(&vmap_purge_lock); - spin_lock(&vmap_area_lock); + fbl_lock((&per_cpu(cpu_vmap_zone, 0)), BUSY); - return seq_list_start(&vmap_area_list, *pos); + return seq_list_start(&fbl_head((&per_cpu(cpu_vmap_zone, 0)), BUSY), *pos); } static void *s_next(struct seq_file *m, void *p, loff_t *pos) { - return seq_list_next(p, &vmap_area_list, pos); + return seq_list_next(p, &fbl_head((&per_cpu(cpu_vmap_zone, 0)), BUSY), pos); } static void s_stop(struct seq_file *m, void *p) - __releases(&vmap_area_lock) + __releases(&fbl(&per_cpu(cpu_vmap_zone, 0), BUSY, lock)) __releases(&vmap_purge_lock) { - spin_unlock(&vmap_area_lock); + fbl_unlock((&per_cpu(cpu_vmap_zone, 0)), BUSY); mutex_unlock(&vmap_purge_lock); } @@ -4258,7 +4273,7 @@ static int s_show(struct seq_file *m, void *p) * As a final step, dump "unpurged" areas. */ final: - if (list_is_last(&va->list, &vmap_area_list)) + if (list_is_last(&va->list, &fbl_head((&per_cpu(cpu_vmap_zone, 0)), BUSY))) show_purge_info(m); return 0; @@ -4289,7 +4304,8 @@ static void vmap_init_free_space(void) { unsigned long vmap_start = 1; const unsigned long vmap_end = ULONG_MAX; - struct vmap_area *busy, *free; + struct vmap_area *free; + struct vm_struct *busy; /* * B F B B B F @@ -4297,12 +4313,12 @@ static void vmap_init_free_space(void) * | The KVA space | * |<--------------------------------->| */ - list_for_each_entry(busy, &vmap_area_list, list) { - if (busy->va_start - vmap_start > 0) { + for (busy = vmlist; busy; busy = busy->next) { + if (busy->addr - vmap_start > 0) { free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); if (!WARN_ON_ONCE(!free)) { free->va_start = vmap_start; - free->va_end = busy->va_start; + free->va_end = (unsigned long) busy->addr; insert_vmap_area_augment(free, NULL, &free_vmap_area_root, @@ -4310,7 +4326,7 @@ static void vmap_init_free_space(void) } } - vmap_start = busy->va_end; + vmap_start = (unsigned long) busy->addr + busy->size; } if (vmap_end - vmap_start > 0) { @@ -4326,6 +4342,22 @@ static void vmap_init_free_space(void) } } +static void vmap_init_pcpu_zones(void) +{ + struct cpu_vmap_zone *z; + int i, j; + + for_each_possible_cpu(i) { + z = per_cpu_ptr(&cpu_vmap_zone, i); + + for (j = 0; j < ARRAY_SIZE(z->fbl); j++) { + INIT_LIST_HEAD(&z->fbl[j].head); + z->fbl[j].root = RB_ROOT; + spin_lock_init(&z->fbl[j].lock); + } + } +} + void __init vmalloc_init(void) { struct vmap_area *va; @@ -4349,8 +4381,15 @@ void __init vmalloc_init(void) INIT_WORK(&p->wq, delayed_vfree_work); } + /* + * Setup per-cpu data before importing vmlist. + */ + vmap_init_pcpu_zones(); + /* Import existing vmlist entries. */ for (tmp = vmlist; tmp; tmp = tmp->next) { + struct cpu_vmap_zone *z; + va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); if (WARN_ON_ONCE(!va)) continue; @@ -4358,7 +4397,9 @@ void __init vmalloc_init(void) va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; va->vm = tmp; - insert_vmap_area(va, &vmap_area_root, &vmap_area_list); + + z = addr_to_cvz(va->va_start); + insert_vmap_area(va, &fbl_root(z, BUSY), &fbl_head(z, BUSY)); } /* -- 2.30.2