Hello,I have found a memory leak bug in kvm module by syzkaller.It was found in linux-5.4 but it also could be reproduced in the latest linux version. Here is the bug info: ioctl$KVM_CREATE_PIT2(r1, 0x4040ae77, &(0x7f0000000040)) ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60) ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60) ioctl$KVM_SET_PIT2(r1, 0x4070aea0, &(0x7f0000000080)) BUG: memory leak unreferenced object 0xffff888112a54880 (size 64): comm "syz-executor.2", pid 5258, jiffies 4297861402 (age 14.129s) hex dump (first 32 bytes): 38 c7 67 15 00 c9 ff ff 38 c7 67 15 00 c9 ff ff 8.g.....8.g..... e0 c7 e1 83 ff ff ff ff 00 30 67 15 00 c9 ff ff .........0g..... backtrace: [<0000000006995a8a>] kmalloc include/linux/slab.h:556 [inline] [<0000000006995a8a>] kzalloc include/linux/slab.h:690 [inline] [<0000000006995a8a>] kvm_vm_ioctl_register_coalesced_mmio+0x8e/0x3d0 arch/x86/kvm/../../../virt/kvm/coalesced_mmio.c:150 [<00000000022550c2>] kvm_vm_ioctl+0x47d/0x1600 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3323 [<000000008a75102f>] vfs_ioctl fs/ioctl.c:46 [inline] [<000000008a75102f>] file_ioctl fs/ioctl.c:509 [inline] [<000000008a75102f>] do_vfs_ioctl+0xbab/0x1160 fs/ioctl.c:696 [<0000000080e3f669>] ksys_ioctl+0x76/0xa0 fs/ioctl.c:713 [<0000000059ef4888>] __do_sys_ioctl fs/ioctl.c:720 [inline] [<0000000059ef4888>] __se_sys_ioctl fs/ioctl.c:718 [inline] [<0000000059ef4888>] __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:718 [<000000006444fa05>] do_syscall_64+0x9f/0x4e0 arch/x86/entry/common.c:290 [<000000009a4ed50b>] entry_SYSCALL_64_after_hwframe+0x49/0xbe BUG: leak checking failed And here is the reproduce code by syzkaller,the bug can be reproduced by open kmemleak config: // autogenerated by syzkaller (https://github.com/google/syzkaller) #define _GNU_SOURCE #include <dirent.h> #include <endian.h> #include <errno.h> #include <fcntl.h> #include <signal.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/prctl.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <time.h> #include <unistd.h> static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } static int inject_fault(int nth) { int fd; fd = open("/proc/thread-self/fail-nth", O_RDWR); if (fd == -1) exit(1); char buf[16]; sprintf(buf, "%d", nth + 1); if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) exit(1); return fd; } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); for (int i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } static void setup_test() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); write_file("/proc/self/oom_score_adj", "1000"); } static void setup_fault() { static struct { const char* file; const char* val; bool fatal; } files[] = { {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true}, {"/sys/kernel/debug/fail_futex/ignore-private", "N", false}, {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false}, {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false}, {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false}, }; unsigned i; for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) { if (!write_file(files[i].file, files[i].val)) { if (files[i].fatal) exit(1); } } } #define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak" static void setup_leak() { if (!write_file(KMEMLEAK_FILE, "scan")) exit(1); sleep(5); if (!write_file(KMEMLEAK_FILE, "scan")) exit(1); if (!write_file(KMEMLEAK_FILE, "clear")) exit(1); } static void check_leaks(void) { int fd = open(KMEMLEAK_FILE, O_RDWR); if (fd == -1) exit(1); uint64_t start = current_time_ms(); if (write(fd, "scan", 4) != 4) exit(1); sleep(1); while (current_time_ms() - start < 4 * 1000) sleep(1); if (write(fd, "scan", 4) != 4) exit(1); static char buf[128 << 10]; ssize_t n = read(fd, buf, sizeof(buf) - 1); if (n < 0) exit(1); int nleaks = 0; if (n != 0) { sleep(1); if (write(fd, "scan", 4) != 4) exit(1); if (lseek(fd, 0, SEEK_SET) < 0) exit(1); n = read(fd, buf, sizeof(buf) - 1); if (n < 0) exit(1); buf[n] = 0; char* pos = buf; char* end = buf + n; while (pos < end) { char* next = strstr(pos + 1, "unreferenced object"); if (!next) next = end; char prev = *next; *next = 0; fprintf(stderr, "BUG: memory leak\n%s\n", pos); *next = prev; pos = next; nleaks++; } } if (write(fd, "clear", 5) != 5) exit(1); close(fd); if (nleaks) exit(1); } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { int iter = 0; for (;; iter++) { int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { setup_test(); execute_one(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; sleep_ms(1); if (current_time_ms() - start < 5000) { continue; } kill_and_wait(pid, &status); break; } check_leaks(); } } uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff}; void execute_one(void) { intptr_t res = 0; memcpy((void*)0x20000080, "/dev/kvm\000", 9); res = syscall(__NR_openat, 0xffffffffffffff9cul, 0x20000080ul, 0ul, 0ul); if (res != -1) r[0] = res; res = syscall(__NR_ioctl, r[0], 0xae01, 0ul); if (res != -1) r[1] = res; *(uint64_t*)0x20000100 = 0; *(uint32_t*)0x20000108 = 0; *(uint32_t*)0x2000010c = 0; syscall(__NR_ioctl, r[1], 0x4010ae67, 0x20000100ul); *(uint64_t*)0x20000000 = 0; *(uint32_t*)0x20000008 = 0x101000; *(uint32_t*)0x2000000c = 0; syscall(__NR_ioctl, r[1], 0x4010ae67, 0x20000000ul); *(uint64_t*)0x200000c0 = 0; *(uint32_t*)0x200000c8 = 0x10000; *(uint32_t*)0x200000cc = 0; inject_fault(0); syscall(__NR_ioctl, r[1], 0x4010ae68, 0x200000c0ul); } int main(void) { syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); setup_leak(); setup_fault(); loop(); return 0; } Here is the memory leak function int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone) { int ret; struct kvm_coalesced_mmio_dev *dev; if (zone->pio != 1 && zone->pio != 0) return -EINVAL; dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev),//Here is the memory leak alloc GFP_KERNEL_ACCOUNT); if (!dev) return -ENOMEM; kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); dev->kvm = kvm; dev->zone = *zone; mutex_lock(&kvm->slots_lock); ret = kvm_io_bus_register_dev(kvm, zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, zone->addr, zone->size, &dev->dev); if (ret < 0) goto out_free_dev; list_add_tail(&dev->list, &kvm->coalesced_zones); mutex_unlock(&kvm->slots_lock); return 0; out_free_dev: mutex_unlock(&kvm->slots_lock); kfree(dev); return ret; }