Introduce a fast path of allocation sequence, that consists of per-cpu path and fallback mechanism which is used when a request can not be accomplished by fast track. A fast track pre-loads a chunk from a global vmap heap directly into its per-cpu zone, following by clipping the chunk based on allocation request. This technique allows to offload a global free_vmap_area_lock making an allocation path to be serialized to number of CPUs in a system. Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx> --- mm/vmalloc.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 4 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index fe993c0561dd..8054b8bf6c18 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1642,6 +1642,93 @@ preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node) kmem_cache_free(vmap_area_cachep, va); } +static unsigned long +this_cpu_zone_alloc_fill(struct cpu_vmap_zone *z, + unsigned long size, unsigned long align, + gfp_t gfp_mask, int node) +{ + unsigned long addr = VMALLOC_END; + struct vmap_area *va; + + /* + * It still can race. One task sets a progress to + * 1 a second one gets preempted on entry, the first + * zeroed the progress flag and second proceed with + * an extra prefetch. + */ + if (atomic_xchg(&z->fill_in_progress, 1)) + return addr; + + va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); + if (unlikely(!va)) + goto out; + + spin_lock(&free_vmap_area_lock); + addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list, + cvz_size, 1, VMALLOC_START, VMALLOC_END); + spin_unlock(&free_vmap_area_lock); + + if (addr == VMALLOC_END) { + kmem_cache_free(vmap_area_cachep, va); + goto out; + } + + va->va_start = addr; + va->va_end = addr + cvz_size; + + fbl_lock(z, FREE); + va = merge_or_add_vmap_area_augment(va, + &fbl_root(z, FREE), &fbl_head(z, FREE)); + addr = va_alloc(va, &fbl_root(z, FREE), &fbl_head(z, FREE), + size, align, VMALLOC_START, VMALLOC_END); + fbl_unlock(z, FREE); + +out: + atomic_set(&z->fill_in_progress, 0); + return addr; +} + +static unsigned long +this_cpu_zone_alloc(unsigned long size, unsigned long align, gfp_t gfp_mask, int node) +{ + struct cpu_vmap_zone *z = raw_cpu_ptr(&cpu_vmap_zone); + unsigned long extra = align > PAGE_SIZE ? align : 0; + unsigned long addr = VMALLOC_END, left = 0; + + /* + * It is disabled, fallback to a global heap. + */ + if (cvz_size == ULONG_MAX) + return addr; + + /* + * Any allocation bigger/equal than one half of + * a zone-size will fallback to a global heap. + */ + if (cvz_size / (size + extra) < 3) + return addr; + + if (RB_EMPTY_ROOT(&fbl_root(z, FREE))) + goto fill; + + fbl_lock(z, FREE); + addr = __alloc_vmap_area(&fbl_root(z, FREE), &fbl_head(z, FREE), + size, align, VMALLOC_START, VMALLOC_END); + + if (addr == VMALLOC_END) + left = get_subtree_max_size(fbl_root(z, FREE).rb_node); + fbl_unlock(z, FREE); + +fill: + /* + * A low watermark is 3 pages. + */ + if (addr == VMALLOC_END && left < 4 * PAGE_SIZE) + addr = this_cpu_zone_alloc_fill(z, size, align, gfp_mask, node); + + return addr; +} + /* * Allocate a region of KVA of the specified size and alignment, within the * vstart and vend. @@ -1678,11 +1765,21 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, */ kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask); + /* + * Fast path allocation, start with it. + */ + if (vstart == VMALLOC_START && vend == VMALLOC_END) + addr = this_cpu_zone_alloc(size, align, gfp_mask, node); + else + addr = vend; + retry: - preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node); - addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list, - size, align, vstart, vend); - spin_unlock(&free_vmap_area_lock); + if (addr == vend) { + preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node); + addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list, + size, align, vstart, vend); + spin_unlock(&free_vmap_area_lock); + } trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend); @@ -1827,6 +1924,27 @@ purge_cpu_vmap_zone(struct cpu_vmap_zone *z) return num_purged_areas; } +static void +drop_cpu_vmap_cache(struct cpu_vmap_zone *z) +{ + struct vmap_area *va, *n_va; + LIST_HEAD(free_head); + + if (RB_EMPTY_ROOT(&fbl_root(z, FREE))) + return; + + fbl_lock(z, FREE); + WRITE_ONCE(fbl(z, FREE, root.rb_node), NULL); + list_replace_init(&fbl_head(z, FREE), &free_head); + fbl_unlock(z, FREE); + + spin_lock(&free_vmap_area_lock); + list_for_each_entry_safe(va, n_va, &free_head, list) + merge_or_add_vmap_area_augment(va, + &free_vmap_area_root, &free_vmap_area_list); + spin_unlock(&free_vmap_area_lock); +} + /* * Purges all lazily-freed vmap areas. */ @@ -1868,6 +1986,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) for_each_possible_cpu(i) { z = per_cpu_ptr(&cpu_vmap_zone, i); num_purged_areas += purge_cpu_vmap_zone(z); + drop_cpu_vmap_cache(z); } } -- 2.30.2