Re: KASAN-related VMAP allocation errors in debug kernels with many logical CPUS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> 
> Was lucky to grab that system again. Compiled a custom 6.0 kernel, whereby I printk all vmap allocation errors, including the range similarly to what you suggested above (but printk only on the failure path).
> 
> So these are the failing allocations:
> 
> # dmesg | grep " -> alloc"
> [  168.862511] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.863020] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.863841] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.864562] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.864646] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.865688] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.865718] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.866098] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.866551] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.866752] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.867147] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.867210] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.867312] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.867650] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.867767] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.867815] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.867815] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.868059] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.868463] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.868822] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.868919] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.869843] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.869854] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.870174] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.870611] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.870806] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.870982] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  168.879000] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.449101] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.449834] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.450667] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.451539] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.452326] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.453239] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.454052] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.454697] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.454811] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.455575] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.455754] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.461450] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.805223] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.805507] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.929577] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.930389] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.931244] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.932035] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.932796] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.933592] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.934470] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.935344] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  169.970641] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.191600] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.191875] -> alloc 40960 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.241901] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.242708] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.243465] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.244211] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.245060] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.245868] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.246433] -> alloc 40960 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.246657] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.247451] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.248226] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.248902] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.249704] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.250497] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.251244] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.252076] -> alloc 319488 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.587168] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  170.598995] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  171.865721] -> alloc 2506752 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> [  172.138557] -> alloc 917504 size, align: 4096, vstart: 18446744072639352832, vend: 18446744073692774400
> 
OK. It is related to a module vmap space allocation when a module is
inserted. I wounder why it requires 2.5MB for a module? It seems a lot
to me.

> 
> Really looks like only module vmap space. ~ 1 GiB of vmap module space ...
> 
If an allocation request for a module is 2.5MB we can load ~400 modules
having 1GB address space.

"lsmod | wc -l"? How many modules your system has?

> I did try:
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index dd6cdb201195..199154a2228a 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -72,6 +72,8 @@ early_param("nohugevmalloc", set_nohugevmalloc);
>  static const bool vmap_allow_huge = false;
>  #endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
> +static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
> +
>  bool is_vmalloc_addr(const void *x)
>  {
>         unsigned long addr = (unsigned long)kasan_reset_tag(x);
> @@ -1574,7 +1576,6 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>         struct vmap_area *va;
>         unsigned long freed;
>         unsigned long addr;
> -       int purged = 0;
>         int ret;
>         BUG_ON(!size);
> @@ -1631,23 +1632,22 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>         return va;
>  overflow:
> -       if (!purged) {
> +       if (atomic_long_read(&vmap_lazy_nr)) {
>                 purge_vmap_area_lazy();
> -               purged = 1;
>                 goto retry;
>         }
>         freed = 0;
>         blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
> -       if (freed > 0) {
> -               purged = 0;
> +       if (freed > 0)
>                 goto retry;
> -       }
> -       if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
> +       if (!(gfp_mask & __GFP_NOWARN)) {
>                 pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
>                         size);
> +               printk("-> alloc %lu size, align: %lu, vstart: %lu, vend: %lu\n", size, align, vstart, vend);
> +       }
>         kmem_cache_free(vmap_area_cachep, va);
>         return ERR_PTR(-EBUSY);
> @@ -1690,8 +1690,6 @@ static unsigned long lazy_max_pages(void)
>         return log * (32UL * 1024 * 1024 / PAGE_SIZE);
>  }
> -static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
> -
> 
> 
> But that didn't help at all. That system is crazy:
> 
If an allocation fails, the next step is to drain outstanding vmap
areas. So a caller does it from its context and then repeat one more
time and only after that a fail message is printed.

>
> # lspci | wc -l
> 1117
> 
So probably you need a lot of modules in order to fully make functional your HW :)

> 
> What I find interesting is that we have these recurring allocations of similar sizes failing.
> I wonder if user space is capable of loading the same kernel module concurrently to
> trigger a massive amount of allocations, and module loading code only figures out
> later that it has already been loaded and backs off.
> 
If there is a request about allocating memory it has to be succeeded
unless there are some errors like no space no memory.

>
> My best guess would be that module loading is serialized completely, but for some reason,
> something seems to go wrong with a lot of concurrency ...
> 
lazy_max_pages() depends on number of online CPUs. Probably something
related...

I wrote a small patch to dump a modules address space when a fail occurs:

<snip v6.0>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 83b54beb12fa..88d323310df5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1580,6 +1580,37 @@ preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node)
 		kmem_cache_free(vmap_area_cachep, va);
 }
 
+static void
+dump_modules_free_space(unsigned long vstart, unsigned long vend)
+{
+	unsigned long va_start, va_end;
+	unsigned int total = 0;
+	struct vmap_area *va;
+
+	if (vend != MODULES_END)
+		return;
+
+	trace_printk("--- Dump a modules address space: 0x%lx - 0x%lx\n", vstart, vend);
+
+	spin_lock(&free_vmap_area_lock);
+	list_for_each_entry(va, &free_vmap_area_list, list) {
+		va_start = (va->va_start > vstart) ? va->va_start:vstart;
+		va_end = (va->va_end < vend) ? va->va_end:vend;
+
+		if (va_start >= va_end)
+			continue;
+
+		if (va_start >= vstart && va_end <= vend) {
+			trace_printk(" va_free: 0x%lx - 0x%lx size=%lu\n",
+				va_start, va_end, va_end - va_start);
+			total += (va_end - va_start);
+		}
+	}
+
+	spin_unlock(&free_vmap_area_lock);
+	trace_printk("--- Total free: %u ---\n", total);
+}
+
 /*
  * Allocate a region of KVA of the specified size and alignment, within the
  * vstart and vend.
@@ -1663,10 +1694,13 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 		goto retry;
 	}
 
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
+	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
 		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
 			size);
 
+		dump_modules_free_space();
+	}
+
 	kmem_cache_free(vmap_area_cachep, va);
 	return ERR_PTR(-EBUSY);
 }
<snip>

it would be good to understand whether we are really run out of space?
Adding a print of lazy_max_pages() and vmap_lazy_nr would be also good.

--
Uladzislau Rezki




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux