vms_abort_munmap_vmas() is a recovery path where, on entry, some VMAs have already been torn down halfway (in a way we can't undo) but are still present in the maple tree. At this point, we *must* remove the VMAs from the VMA tree, otherwise we get UAF. Because removing VMA tree nodes can require memory allocation, the existing code has an error path which tries to handle this by reattaching the VMAs; but that can't be done safely. A nicer way to fix it would probably be to preallocate enough maple tree nodes for the removal before the point of no return, or something like that; but for now, fix it the easy and kinda ugly way, by marking this allocation __GFP_NOFAIL. Fixes: 4f87153e82c4 ("mm: change failure of MAP_FIXED to restoring the gap on failure") Signed-off-by: Jann Horn <jannh@xxxxxxxxxx> --- This can be tested with the following reproducer (on a kernel built with CONFIG_KASAN=y, CONFIG_FAILSLAB=y, CONFIG_FAULT_INJECTION_DEBUG_FS=y, with the reproducer running as root): ``` typeof(x) __res = (x); \ if (__res == (typeof(x))-1) \ err(1, "SYSCHK(" #x ")"); \ __res; \ }) static void write_file(char *name, char *buf) { int fd = open(name, O_WRONLY); if (fd == -1) err(1, "unable to open for writing: %s", name); if (SYSCHK(write(fd, buf, strlen(buf))) != strlen(buf)) errx(1, "write %s", name); SYSCHK(close(fd)); } int main(void) { // make a large area with a bunch of VMAs char *area = SYSCHK(mmap(NULL, AREA_SIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)); for (int off=0; off<AREA_SIZE; off += 0x2000) SYSCHK(mmap(area+off, 0x1000, PROT_READ, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)); // open a file whose mappings use usbdev_vm_ops, and map it in part of this area int map_fd = SYSCHK(open("/dev/bus/usb/001/001", O_RDONLY)); void *map = SYSCHK(mmap(area, 0x1000, PROT_READ, MAP_SHARED|MAP_FIXED, map_fd, 0)); close(map_fd); // make RWX mapping request fail late SYSCHK(prctl(65/*PR_SET_MDWE*/, 1/*PR_MDWE_REFUSE_EXEC_GAIN*/, 0, 0, 0)); // some random other file int fd = SYSCHK(open("/etc/passwd", O_RDONLY)); /* disarm for now */ write_file("/sys/kernel/debug/failslab/probability", "0"); /* fail allocations of maple_node... */ write_file("/sys/kernel/debug/failslab/cache-filter", "Y"); write_file("/sys/kernel/slab/maple_node/failslab", "1"); /* ... even though it's a sleepable allocation... */ write_file("/sys/kernel/debug/failslab/ignore-gfp-wait", "N"); /* ... in this task... */ write_file("/sys/kernel/debug/failslab/task-filter", "Y"); write_file("/proc/self/make-it-fail", "1"); /* ... every time... */ write_file("/sys/kernel/debug/failslab/times", "-1"); /* ... after 8 successful allocations (value chosen experimentally)... */ write_file("/sys/kernel/debug/failslab/space", "2048"); // one object is 256 /* ... and print where the allocations actually failed... */ write_file("/sys/kernel/debug/failslab/verbose", "2"); /* ... and that's it, arm it */ write_file("/sys/kernel/debug/failslab/probability", "100"); // This mmap request will fail late due to MDWE. // The error recovery path will attempt to clear out the VMA pointers with a // spanning_store (which will be blocked by error injection). mmap(area, AREA_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, fd, 0); /* disarm */ write_file("/sys/kernel/debug/failslab/probability", "0"); SYSCHK(munmap(map, 0x1000)); // UAF expected here system("cat /proc/$PPID/maps"); } ``` Result: ``` FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 100, space 256, times -1 CPU: 3 UID: 0 PID: 607 Comm: unmap-fail Not tainted 6.12.0-rc3-00013-geca631b8fe80 #518 [...] Call Trace: <TASK> dump_stack_lvl+0x80/0xa0 should_fail_ex+0x4d3/0x5c0 [...] should_failslab+0xc7/0x130 kmem_cache_alloc_noprof+0x73/0x3a0 [...] mas_alloc_nodes+0x3a3/0x690 mas_nomem+0xaa/0x1d0 mas_store_gfp+0x515/0xa80 [...] mmap_region+0xa96/0x2590 [...] do_mmap+0x71e/0xfe0 [...] vm_mmap_pgoff+0x17a/0x2f0 [...] ksys_mmap_pgoff+0x2ee/0x460 do_syscall_64+0x68/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] </TASK> mmap: unmap-fail: (607) Unable to abort munmap() operation ================================================================== BUG: KASAN: slab-use-after-free in dec_usb_memory_use_count+0x365/0x430 Read of size 8 at addr ffff88810e9ba8b8 by task unmap-fail/607 CPU: 3 UID: 0 PID: 607 Comm: unmap-fail Not tainted 6.12.0-rc3-00013-geca631b8fe80 #518 [...] Call Trace: <TASK> dump_stack_lvl+0x66/0xa0 print_report+0xce/0x670 [...] kasan_report+0xf7/0x130 [...] dec_usb_memory_use_count+0x365/0x430 remove_vma+0x76/0x120 vms_complete_munmap_vmas+0x447/0x750 do_vmi_align_munmap+0x4b9/0x700 [...] do_vmi_munmap+0x164/0x2e0 __vm_munmap+0x128/0x2a0 [...] __x64_sys_munmap+0x59/0x80 do_syscall_64+0x68/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] </TASK> Allocated by task 607: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0xaa/0xb0 usbdev_mmap+0x1a0/0xaf0 mmap_region+0xf6e/0x2590 do_mmap+0x71e/0xfe0 vm_mmap_pgoff+0x17a/0x2f0 ksys_mmap_pgoff+0x2ee/0x460 do_syscall_64+0x68/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e Freed by task 607: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x4f/0x70 kfree+0x148/0x450 vms_clean_up_area+0x188/0x220 mmap_region+0xf1b/0x2590 do_mmap+0x71e/0xfe0 vm_mmap_pgoff+0x17a/0x2f0 ksys_mmap_pgoff+0x2ee/0x460 do_syscall_64+0x68/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] ================================================================== ``` --- mm/vma.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mm/vma.h b/mm/vma.h index 819f994cf727..ebd78f1577f3 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -241,15 +241,9 @@ static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms, * failure method of leaving a gap where the MAP_FIXED mapping failed. */ mas_set_range(mas, vms->start, vms->end - 1); - if (unlikely(mas_store_gfp(mas, NULL, GFP_KERNEL))) { - pr_warn_once("%s: (%d) Unable to abort munmap() operation\n", - current->comm, current->pid); - /* Leaving vmas detached and in-tree may hamper recovery */ - reattach_vmas(mas_detach); - } else { - /* Clean up the insertion of the unfortunate gap */ - vms_complete_munmap_vmas(vms, mas_detach); - } + mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL); + /* Clean up the insertion of the unfortunate gap */ + vms_complete_munmap_vmas(vms, mas_detach); } int --- base-commit: eca631b8fe808748d7585059c4307005ca5c5820 change-id: 20241016-fix-munmap-abort-2330b61332aa -- Jann Horn <jannh@xxxxxxxxxx>