[MM list + secretmem author CC-d] On Thu, 21 Mar 2024 at 08:52, xingwei lee <xrivendell7@xxxxxxxxx> wrote: > > Hello I found a bug titled "BUG: unable to handle kernel paging > request in fuse_copy_do” with modified syzkaller, and maybe it is > related to fs/fuse. > I also confirmed in the latest upstream. > > If you fix this issue, please add the following tag to the commit: > Reported-by: xingwei lee <xrivendell7@xxxxxxxxx> > Reported-by: yue sun <samsun1006219@xxxxxxxxx> Thanks for the report. This looks like a secretmem vs get_user_pages issue. I reduced the syz reproducer to a minimal one that isn't dependent on fuse: === repro.c === #define _GNU_SOURCE #include <fcntl.h> #include <unistd.h> #include <sys/mman.h> #include <sys/syscall.h> #include <sys/socket.h> int main(void) { int fd1, fd2, fd3; int pip[2]; struct iovec iov; void *addr; fd1 = syscall(__NR_memfd_secret, 0); addr = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0); ftruncate(fd1, 7); fd2 = socket(AF_INET, SOCK_DGRAM, 0); getsockopt(fd2, 0, 0, NULL, addr); pipe(pip); iov.iov_base = addr; iov.iov_len = 0x50; vmsplice(pip[1], &iov, 1, 0); fd3 = open("/tmp/repro-secretmem.test", O_RDWR | O_CREAT, 0x600); splice(pip[0], NULL, fd3, NULL, 0x50, 0); return 0; } ======= Thanks, Miklos > > kernel: upstream 23956900041d968f9ad0f30db6dede4daccd7aa9 > kernel config: https://syzkaller.appspot.com/text?tag=KernelConfig&x=9f47e8dfa53b0b11 > with KASAN enabled > compiler: gcc (Debian 12.2.0-14) 12.2.0 > > BUG: unable to handle kernel paging request in fuse_copy_do > UDPLite: UDP-Lite is deprecated and scheduled to be removed in 2025, > please contact the netdev mailing list > BUG: unable to handle page fault for address: ffff88802c29c000 > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > PGD 13001067 P4D 13001067 PUD 13002067 PMD 24c8d063 PTE 800fffffd3d63060 > Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI > CPU: 1 PID: 8221 Comm: 1e9 Not tainted 6.8.0-05202-g9187210eee7d-dirty #21 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS > 1.16.2-1.fc38 04/01/2014 > RIP: 0010:memcpy+0xc/0x20 arch/x86/lib/memcpy_64.S:38 > Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90 > 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 66 90 48 89 f80 > RSP: 0018:ffffc9001065f9c8 EFLAGS: 00010246 > RAX: ffffc9001065fb10 RBX: ffffc9001065fc78 RCX: 0000000000000010 > RDX: 0000000000000010 RSI: ffff88802c29c000 RDI: ffffc9001065fb10 > RBP: 0000000000000010 R08: ffff88802c29c000 R09: 0000000000000001 > R10: ffffffff8ea82ed7 R11: ffffc9001065fd98 R12: ffffc9001065fac0 > R13: 0000000000000010 R14: ffffc9001065faf0 R15: ffffc9001065fcbc > FS: 000000000f82d480(0000) GS:ffff88823bc00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: ffff88802c29c000 CR3: 000000002dd7c000 CR4: 0000000000750ef0 > PKRU: 55555554 > Call Trace: > <TASK> > fuse_copy_do+0x152/0x340 fs/fuse/dev.c:758 > fuse_copy_one fs/fuse/dev.c:1007 [inline] > fuse_dev_do_write+0x1df/0x26a0 fs/fuse/dev.c:1863 > fuse_dev_write+0x129/0x1b0 fs/fuse/dev.c:1960 > call_write_iter include/linux/fs.h:2108 [inline] > new_sync_write fs/read_write.c:497 [inline] > vfs_write+0x62e/0x10a0 fs/read_write.c:590 > ksys_write+0xf6/0x1d0 fs/read_write.c:643 > do_syscall_x64 arch/x86/entry/common.c:52 [inline] > do_syscall_64+0x7c/0x1d0 arch/x86/entry/common.c:83 > entry_SYSCALL_64_after_hwframe+0x6c/0x74 > > =* repro.c =* > #define _GNU_SOURCE > > #include <dirent.h> > #include <endian.h> > #include <errno.h> > #include <fcntl.h> > #include <setjmp.h> > #include <signal.h> > #include <stdarg.h> > #include <stdbool.h> > #include <stdint.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > #include <sys/prctl.h> > #include <sys/stat.h> > #include <sys/syscall.h> > #include <sys/types.h> > #include <sys/wait.h> > #include <time.h> > #include <unistd.h> > > #ifndef __NR_memfd_secret > #define __NR_memfd_secret 447 > #endif > > static __thread int clone_ongoing; > static __thread int skip_segv; > static __thread jmp_buf segv_env; > > static void segv_handler(int sig, siginfo_t* info, void* ctx) { > if (__atomic_load_n(&clone_ongoing, __ATOMIC_RELAXED) != 0) { > exit(sig); > } > uintptr_t addr = (uintptr_t)info->si_addr; > const uintptr_t prog_start = 1 << 20; > const uintptr_t prog_end = 100 << 20; > int skip = __atomic_load_n(&skip_segv, __ATOMIC_RELAXED) != 0; > int valid = addr < prog_start || addr > prog_end; > if (skip && valid) { > _longjmp(segv_env, 1); > } > exit(sig); > } > > static void install_segv_handler(void) { > struct sigaction sa; > memset(&sa, 0, sizeof(sa)); > sa.sa_handler = SIG_IGN; > syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); > syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8); > memset(&sa, 0, sizeof(sa)); > sa.sa_sigaction = segv_handler; > sa.sa_flags = SA_NODEFER | SA_SIGINFO; > sigaction(SIGSEGV, &sa, NULL); > sigaction(SIGBUS, &sa, NULL); > } > > #define NONFAILING(...) \ > ({ \ > int ok = 1; \ > __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ > if (_setjmp(segv_env) == 0) { \ > __VA_ARGS__; \ > } else \ > ok = 0; \ > __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ > ok; \ > }) > > static void sleep_ms(uint64_t ms) { > usleep(ms * 1000); > } > > static uint64_t current_time_ms(void) { > struct timespec ts; > if (clock_gettime(CLOCK_MONOTONIC, &ts)) > exit(1); > return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; > } > > static bool write_file(const char* file, const char* what, ...) { > char buf[1024]; > va_list args; > va_start(args, what); > vsnprintf(buf, sizeof(buf), what, args); > va_end(args); > buf[sizeof(buf) - 1] = 0; > int len = strlen(buf); > int fd = open(file, O_WRONLY | O_CLOEXEC); > if (fd == -1) > return false; > if (write(fd, buf, len) != len) { > int err = errno; > close(fd); > errno = err; > return false; > } > close(fd); > return true; > } > > static void kill_and_wait(int pid, int* status) { > kill(-pid, SIGKILL); > kill(pid, SIGKILL); > for (int i = 0; i < 100; i++) { > if (waitpid(-1, status, WNOHANG | __WALL) == pid) > return; > usleep(1000); > } > DIR* dir = opendir("/sys/fs/fuse/connections"); > if (dir) { > for (;;) { > struct dirent* ent = readdir(dir); > if (!ent) > break; > if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) > continue; > char abort[300]; > snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", > ent->d_name); > int fd = open(abort, O_WRONLY); > if (fd == -1) { > continue; > } > if (write(fd, abort, 1) < 0) { > } > close(fd); > } > closedir(dir); > } else { > } > while (waitpid(-1, status, __WALL) != pid) { > } > } > > static void setup_test() { > prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); > setpgrp(); > write_file("/proc/self/oom_score_adj", "1000"); > } > > static void execute_one(void); > > #define WAIT_FLAGS __WALL > > static void loop(void) { > int iter = 0; > for (;; iter++) { > int pid = fork(); > if (pid < 0) > exit(1); > if (pid == 0) { > setup_test(); > execute_one(); > exit(0); > } > int status = 0; > uint64_t start = current_time_ms(); > for (;;) { > if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) > break; > sleep_ms(1); > if (current_time_ms() - start < 5000) > continue; > kill_and_wait(pid, &status); > break; > } > } > } > > uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff}; > > void execute_one(void) { > intptr_t res = 0; > NONFAILING(memcpy((void*)0x20002040, "./file0\000", 8)); > syscall(__NR_mkdirat, /*fd=*/0xffffff9c, /*path=*/0x20002040ul, /*mode=*/0ul); > NONFAILING(memcpy((void*)0x20002080, "/dev/fuse\000", 10)); > res = syscall(__NR_openat, /*fd=*/0xffffffffffffff9cul, /*file=*/0x20002080ul, > /*flags=*/2ul, /*mode=*/0ul); > if (res != -1) > r[0] = res; > NONFAILING(memcpy((void*)0x200020c0, "./file0\000", 8)); > NONFAILING(memcpy((void*)0x20002100, "fuse\000", 5)); > NONFAILING(memcpy((void*)0x20002140, "fd", 2)); > NONFAILING(*(uint8_t*)0x20002142 = 0x3d); > NONFAILING(sprintf((char*)0x20002143, "0x%016llx", (long long)r[0])); > NONFAILING(*(uint8_t*)0x20002155 = 0x2c); > NONFAILING(memcpy((void*)0x20002156, "rootmode", 8)); > NONFAILING(*(uint8_t*)0x2000215e = 0x3d); > NONFAILING(sprintf((char*)0x2000215f, "%023llo", (long long)0x4000)); > NONFAILING(*(uint8_t*)0x20002176 = 0x2c); > NONFAILING(memcpy((void*)0x20002177, "user_id", 7)); > NONFAILING(*(uint8_t*)0x2000217e = 0x3d); > NONFAILING(sprintf((char*)0x2000217f, "%020llu", (long long)0)); > NONFAILING(*(uint8_t*)0x20002193 = 0x2c); > NONFAILING(memcpy((void*)0x20002194, "group_id", 8)); > NONFAILING(*(uint8_t*)0x2000219c = 0x3d); > NONFAILING(sprintf((char*)0x2000219d, "%020llu", (long long)0)); > NONFAILING(*(uint8_t*)0x200021b1 = 0x2c); > NONFAILING(*(uint8_t*)0x200021b2 = 0); > syscall(__NR_mount, /*src=*/0ul, /*dst=*/0x200020c0ul, /*type=*/0x20002100ul, > /*flags=*/0ul, /*opts=*/0x20002140ul); > res = syscall(__NR_memfd_secret, /*flags=*/0ul); > if (res != -1) > r[1] = res; > syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0xb36000ul, > /*prot=PROT_GROWSUP|PROT_READ*/ 0x2000001ul, > /*flags=MAP_STACK|MAP_POPULATE|MAP_FIXED|MAP_SHARED*/ 0x28011ul, > /*fd=*/r[1], /*offset=*/0ul); > syscall(__NR_ftruncate, /*fd=*/r[1], /*len=*/7ul); > res = syscall(__NR_socket, /*domain=*/2ul, /*type=*/2ul, /*proto=*/0x88); > if (res != -1) > r[2] = res; > NONFAILING(*(uint32_t*)0x20000280 = 0); > syscall(__NR_getsockopt, /*fd=*/r[2], /*level=*/1, /*optname=*/0x11, > /*optval=*/0ul, /*optlen=*/0x20000280ul); > NONFAILING(*(uint32_t*)0x20000000 = 0x50); > NONFAILING(*(uint32_t*)0x20000004 = 0); > NONFAILING(*(uint64_t*)0x20000008 = 0); > NONFAILING(*(uint32_t*)0x20000010 = 7); > NONFAILING(*(uint32_t*)0x20000014 = 0x27); > NONFAILING(*(uint32_t*)0x20000018 = 0); > NONFAILING(*(uint32_t*)0x2000001c = 0); > NONFAILING(*(uint16_t*)0x20000020 = 0); > NONFAILING(*(uint16_t*)0x20000022 = 0); > NONFAILING(*(uint32_t*)0x20000024 = 0); > NONFAILING(*(uint32_t*)0x20000028 = 0); > NONFAILING(*(uint16_t*)0x2000002c = 0); > NONFAILING(*(uint16_t*)0x2000002e = 0); > NONFAILING(memset((void*)0x20000030, 0, 32)); > syscall(__NR_write, /*fd=*/r[0], /*arg=*/0x20000000ul, /*len=*/0x50ul); > } > int main(void) { > syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul, > /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1, > /*offset=*/0ul); > syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul, > /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul, > /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1, > /*offset=*/0ul); > syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul, > /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1, > /*offset=*/0ul); > install_segv_handler(); > loop(); > return 0; > } > > =* repro.txt =* > mkdirat(0xffffffffffffff9c, &(0x7f0000002040)='./file0\x00', 0x0) > r0 = openat$fuse(0xffffffffffffff9c, &(0x7f0000002080), 0x2, 0x0) > mount$fuse(0x0, &(0x7f00000020c0)='./file0\x00', &(0x7f0000002100), > 0x0, &(0x7f0000002140)={{'fd', 0x3d, r0}, 0x2c, {'rootmode', 0x3d, > 0x4000}}) > r1 = memfd_secret(0x0) > mmap(&(0x7f0000000000/0xb36000)=nil, 0xb36000, 0x2000001, 0x28011, r1, 0x0) > ftruncate(r1, 0x7) > r2 = socket$inet_udplite(0x2, 0x2, 0x88) > getsockopt$sock_cred(r2, 0x1, 0x11, 0x0, &(0x7f0000000280)) > write$FUSE_INIT(r0, &(0x7f0000000000)={0x50}, 0x50) > > > see aslo https://gist.github.com/xrivendell7/961be96ae091c9671bb56efea902cec4. > > I hope it helps. > best regards. > xingwei Lee