On Wed, 2019-01-02 at 19:54 +0100, Dmitry Vyukov wrote: > On Wed, Jan 2, 2019 at 7:51 PM Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote: > > On Wed, Jan 2, 2019 at 7:20 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > > On Wed, 2019-01-02 at 02:31 -0800, syzbot wrote: > > > > Hello, > > > > > > > > syzbot found the following crash on: > > > > > > > > HEAD commit: e1ef035d272e Merge tag 'armsoc-defconfig' of git://git.ker.. > > > > git tree: upstream > > > > console output: https://syzkaller.appspot.com/x/log.txt?x=16bb4c4b400000 > > > > kernel config: https://syzkaller.appspot.com/x/.config?x=9c6a26e22579190b > > > > dashboard link: https://syzkaller.appspot.com/bug?extid=239d99847eb49ecb3899 > > > > compiler: gcc (GCC) 9.0.0 20181231 (experimental) > > > > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=128aa377400000 > > > > > > > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > > > > Reported-by: syzbot+239d99847eb49ecb3899@xxxxxxxxxxxxxxxxxxxxxxxxx > > > > > > > > IPv6: ADDRCONF(NETDEV_UP): vxcan1: link is not ready > > > > IPv6: ADDRCONF(NETDEV_UP): vxcan1: link is not ready > > > > 8021q: adding VLAN 0 to HW filter on device batadv0 > > > > 8021q: adding VLAN 0 to HW filter on device batadv0 > > > > ================================================================== > > > > BUG: KASAN: use-after-free in what_owner_is_waiting_for fs/locks.c:1000 > > > > [inline] > > > > BUG: KASAN: use-after-free in posix_locks_deadlock fs/locks.c:1023 [inline] > > > > BUG: KASAN: use-after-free in posix_lock_inode+0x1f9e/0x2750 fs/locks.c:1163 > > > > Read of size 8 at addr ffff88808791b000 by task syz-executor2/10100 > > > > > > > > CPU: 1 PID: 10100 Comm: syz-executor2 Not tainted 4.20.0+ #3 > > > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS > > > > Google 01/01/2011 > > > > Call Trace: > > > > __dump_stack lib/dump_stack.c:77 [inline] > > > > dump_stack+0x1db/0x2d0 lib/dump_stack.c:113 > > > > print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 > > > > kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 > > > > __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 > > > > what_owner_is_waiting_for fs/locks.c:1000 [inline] > > > > posix_locks_deadlock fs/locks.c:1023 [inline] > > > > posix_lock_inode+0x1f9e/0x2750 fs/locks.c:1163 > > > > posix_lock_file fs/locks.c:1346 [inline] > > > > vfs_lock_file fs/locks.c:2314 [inline] > > > > vfs_lock_file+0xc7/0xf0 fs/locks.c:2309 > > > > do_lock_file_wait.part.0+0xe5/0x260 fs/locks.c:2328 > > > > do_lock_file_wait fs/locks.c:2324 [inline] > > > > fcntl_setlk+0x2f1/0xfe0 fs/locks.c:2413 > > > > do_fcntl+0x843/0x12b0 fs/fcntl.c:370 > > > > __do_sys_fcntl fs/fcntl.c:463 [inline] > > > > __se_sys_fcntl fs/fcntl.c:448 [inline] > > > > __x64_sys_fcntl+0x16d/0x1e0 fs/fcntl.c:448 > > > > do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 > > > > entry_SYSCALL_64_after_hwframe+0x49/0xbe > > > > RIP: 0033:0x457ec9 > > > > Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 > > > > 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff > > > > ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 > > > > RSP: 002b:00007f58bbb50c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000048 > > > > RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457ec9 > > > > RDX: 0000000020000140 RSI: 0000000000000007 RDI: 0000000000000003 > > > > RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 > > > > R10: 0000000000000000 R11: 0000000000000246 R12: 00007f58bbb516d4 > > > > R13: 00000000004be5f0 R14: 00000000004ceab0 R15: 00000000ffffffff > > > > > > > > Allocated by task 10100: > > > > save_stack+0x45/0xd0 mm/kasan/common.c:73 > > > > set_track mm/kasan/common.c:85 [inline] > > > > kasan_kmalloc mm/kasan/common.c:482 [inline] > > > > kasan_kmalloc+0xcf/0xe0 mm/kasan/common.c:455 > > > > kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:397 > > > > kmem_cache_alloc+0x12d/0x710 mm/slab.c:3541 > > > > kmem_cache_zalloc include/linux/slab.h:730 [inline] > > > > locks_alloc_lock+0x8e/0x2f0 fs/locks.c:344 > > > > fcntl_setlk+0xa9/0xfe0 fs/locks.c:2362 > > > > do_fcntl+0x843/0x12b0 fs/fcntl.c:370 > > > > __do_sys_fcntl fs/fcntl.c:463 [inline] > > > > __se_sys_fcntl fs/fcntl.c:448 [inline] > > > > __x64_sys_fcntl+0x16d/0x1e0 fs/fcntl.c:448 > > > > do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 > > > > entry_SYSCALL_64_after_hwframe+0x49/0xbe > > > > > > > > Freed by task 10100: > > > > save_stack+0x45/0xd0 mm/kasan/common.c:73 > > > > set_track mm/kasan/common.c:85 [inline] > > > > __kasan_slab_free+0x102/0x150 mm/kasan/common.c:444 > > > > kasan_slab_free+0xe/0x10 mm/kasan/common.c:452 > > > > __cache_free mm/slab.c:3485 [inline] > > > > kmem_cache_free+0x86/0x260 mm/slab.c:3747 > > > > locks_free_lock+0x27a/0x3f0 fs/locks.c:381 > > > > fcntl_setlk+0x7b5/0xfe0 fs/locks.c:2439 > > > > do_fcntl+0x843/0x12b0 fs/fcntl.c:370 > > > > __do_sys_fcntl fs/fcntl.c:463 [inline] > > > > __se_sys_fcntl fs/fcntl.c:448 [inline] > > > > __x64_sys_fcntl+0x16d/0x1e0 fs/fcntl.c:448 > > > > do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 > > > > entry_SYSCALL_64_after_hwframe+0x49/0xbe > > > > > > > > The buggy address belongs to the object at ffff88808791b000 > > > > which belongs to the cache file_lock_cache of size 264 > > > > The buggy address is located 0 bytes inside of > > > > 264-byte region [ffff88808791b000, ffff88808791b108) > > > > The buggy address belongs to the page: > > > > page:ffffea00021e46c0 count:1 mapcount:0 mapping:ffff8880aa16a1c0 index:0x0 > > > > flags: 0x1fffc0000000200(slab) > > > > raw: 01fffc0000000200 ffffea0002333508 ffffea00021d76c8 ffff8880aa16a1c0 > > > > raw: 0000000000000000 ffff88808791b000 000000010000000c 0000000000000000 > > > > page dumped because: kasan: bad access detected > > > > > > > > Memory state around the buggy address: > > > > ffff88808791af00: fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc > > > > ffff88808791af80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > > > > > ffff88808791b000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > > > > ^ > > > > ffff88808791b080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > > > > ffff88808791b100: fb fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb > > > > ================================================================== > > > > > > > > > > > > > > I've given this a harder look and I really don't quite grok what > > > this output is telling me: > > > > > > All 3 stack traces say they come from PID 10100, but the use-after-free > > > seems to occur well before the free could have occurred in the context > > > of the current fcntl call. > > > > Interestingly it is the case in all crashes for this bug. It may be > > something inherent, or maybe just the particular program that > > triggered this is such that these accesses happen in the same thread. > > > > > So, I guess that leaves the possibility that we freed a lock request > > > from an earlier fcntl call without removing it properly from the tree, > > > but (a) I don't see how that could happen, _and_ (b) why didn't that > > > trip the BUG_ONs in locks_free_lock? > > > > > > I'll keep looking at this, but I'm a bit stumped at the moment. > > > > The simplest repro for this is: > > > > # See https://goo.gl/kgGztJ for information about syzkaller reproducers. > > #{"threaded":true,"collide":true,"repeat":true,"procs":6,"sandbox":"none","fault_call":-1,"tun":true,"tmpdir":true,"cgroups":true,"netdev":true,"resetnet":true,"segv":true} > > r0 = epoll_create1(0x0) > > fcntl$lock(r0, 0x7, &(0x7f0000000080)) > > fcntl$lock(r0, 0x7, &(0x7f0000000140)={0x1000000000001, 0x0, 0x1000000}) > > > > "collide":true means that the 2 fcntl's were executed in parallel. But > > still the alloc/free/access always happened in the same thread, so the > > thread interaction seems to be somewhat unusual. > > Looking at frequency of this crash, repro properties and some other > signals my money are on a race/atomicity violation with a narrow > inconsistency window. E.g. unlock something and then expect things > have not changed after re-lock, or remove from list and reset a > pointer few instructions later. Thanks Dmitry, The good news is that it's quite reproducible. I used syz-prog2c on the reproducer and got the attached program (build with -lpthread). I didn't have KASAN enabled on my throwaway VM, but I got a GPF and stack trace much like the one above when I ran this on a kernel with the thundering herd set. A kernel based on a commit from earlier in the merge window didn't show the problem. I tested commenting out some lockless shortcuts, but they didn't fix it. I'll keep playing with it. Cheers, -- Jeff Layton <jlayton@xxxxxxxxxx>
// autogenerated by syzkaller (https://github.com/google/syzkaller) #define _GNU_SOURCE #include <arpa/inet.h> #include <dirent.h> #include <endian.h> #include <errno.h> #include <fcntl.h> #include <net/if.h> #include <net/if_arp.h> #include <netinet/in.h> #include <pthread.h> #include <sched.h> #include <setjmp.h> #include <signal.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/mount.h> #include <sys/prctl.h> #include <sys/resource.h> #include <sys/socket.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/time.h> #include <sys/types.h> #include <sys/uio.h> #include <sys/wait.h> #include <time.h> #include <unistd.h> #include <linux/capability.h> #include <linux/futex.h> #include <linux/if_addr.h> #include <linux/if_ether.h> #include <linux/if_link.h> #include <linux/if_tun.h> #include <linux/in6.h> #include <linux/ip.h> #include <linux/neighbour.h> #include <linux/net.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/tcp.h> #include <linux/veth.h> unsigned long long procid; static __thread int skip_segv; static __thread jmp_buf segv_env; static void segv_handler(int sig, siginfo_t* info, void* ctx) { uintptr_t addr = (uintptr_t)info->si_addr; const uintptr_t prog_start = 1 << 20; const uintptr_t prog_end = 100 << 20; if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED) && (addr < prog_start || addr > prog_end)) { _longjmp(segv_env, 1); } exit(sig); } static void install_segv_handler(void) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_IGN; syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8); memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = segv_handler; sa.sa_flags = SA_NODEFER | SA_SIGINFO; sigaction(SIGSEGV, &sa, NULL); sigaction(SIGBUS, &sa, NULL); } #define NONFAILING(...) \ { \ __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ if (_setjmp(segv_env) == 0) { \ __VA_ARGS__; \ } \ __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ } static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static void use_temporary_dir(void) { char tmpdir_template[] = "./syzkaller.XXXXXX"; char* tmpdir = mkdtemp(tmpdir_template); if (!tmpdir) exit(1); if (chmod(tmpdir, 0777)) exit(1); if (chdir(tmpdir)) exit(1); } static void thread_start(void* (*fn)(void*), void* arg) { pthread_t th; pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, 128 << 10); if (pthread_create(&th, &attr, fn, arg)) exit(1); pthread_attr_destroy(&attr); } typedef struct { int state; } event_t; static void event_init(event_t* ev) { ev->state = 0; } static void event_reset(event_t* ev) { ev->state = 0; } static void event_set(event_t* ev) { if (ev->state) exit(1); __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG); } static void event_wait(event_t* ev) { while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0); } static int event_isset(event_t* ev) { return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); } static int event_timedwait(event_t* ev, uint64_t timeout) { uint64_t start = current_time_ms(); uint64_t now = start; for (;;) { uint64_t remain = timeout - (now - start); struct timespec ts; ts.tv_sec = remain / 1000; ts.tv_nsec = (remain % 1000) * 1000 * 1000; syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts); if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED)) return 1; now = current_time_ms(); if (now - start > timeout) return 0; } } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } static struct { char* pos; int nesting; struct nlattr* nested[8]; char buf[1024]; } nlmsg; static void netlink_init(int typ, int flags, const void* data, int size) { memset(&nlmsg, 0, sizeof(nlmsg)); struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg.buf; hdr->nlmsg_type = typ; hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; memcpy(hdr + 1, data, size); nlmsg.pos = (char*)(hdr + 1) + NLMSG_ALIGN(size); } static void netlink_attr(int typ, const void* data, int size) { struct nlattr* attr = (struct nlattr*)nlmsg.pos; attr->nla_len = sizeof(*attr) + size; attr->nla_type = typ; memcpy(attr + 1, data, size); nlmsg.pos += NLMSG_ALIGN(attr->nla_len); } static void netlink_nest(int typ) { struct nlattr* attr = (struct nlattr*)nlmsg.pos; attr->nla_type = typ; nlmsg.pos += sizeof(*attr); nlmsg.nested[nlmsg.nesting++] = attr; } static void netlink_done(void) { struct nlattr* attr = nlmsg.nested[--nlmsg.nesting]; attr->nla_len = nlmsg.pos - (char*)attr; } static int netlink_send(int sock) { if (nlmsg.pos > nlmsg.buf + sizeof(nlmsg.buf) || nlmsg.nesting) exit(1); struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg.buf; hdr->nlmsg_len = nlmsg.pos - nlmsg.buf; struct sockaddr_nl addr; memset(&addr, 0, sizeof(addr)); addr.nl_family = AF_NETLINK; unsigned n = sendto(sock, nlmsg.buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr)); if (n != hdr->nlmsg_len) exit(1); n = recv(sock, nlmsg.buf, sizeof(nlmsg.buf), 0); if (n < sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr)) exit(1); if (hdr->nlmsg_type != NLMSG_ERROR) exit(1); return -((struct nlmsgerr*)(hdr + 1))->error; } static void netlink_add_device_impl(const char* type, const char* name) { struct ifinfomsg hdr; memset(&hdr, 0, sizeof(hdr)); netlink_init(RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); if (name) netlink_attr(IFLA_IFNAME, name, strlen(name)); netlink_nest(IFLA_LINKINFO); netlink_attr(IFLA_INFO_KIND, type, strlen(type)); } static void netlink_add_device(int sock, const char* type, const char* name) { netlink_add_device_impl(type, name); netlink_done(); int err = netlink_send(sock); (void)err; } static void netlink_add_veth(int sock, const char* name, const char* peer) { netlink_add_device_impl("veth", name); netlink_nest(IFLA_INFO_DATA); netlink_nest(VETH_INFO_PEER); nlmsg.pos += sizeof(struct ifinfomsg); netlink_attr(IFLA_IFNAME, peer, strlen(peer)); netlink_done(); netlink_done(); netlink_done(); int err = netlink_send(sock); (void)err; } static void netlink_add_hsr(int sock, const char* name, const char* slave1, const char* slave2) { netlink_add_device_impl("hsr", name); netlink_nest(IFLA_INFO_DATA); int ifindex1 = if_nametoindex(slave1); netlink_attr(IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1)); int ifindex2 = if_nametoindex(slave2); netlink_attr(IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2)); netlink_done(); netlink_done(); int err = netlink_send(sock); (void)err; } static void netlink_device_change(int sock, const char* name, bool up, const char* master, const void* mac, int macsize) { struct ifinfomsg hdr; memset(&hdr, 0, sizeof(hdr)); if (up) hdr.ifi_flags = hdr.ifi_change = IFF_UP; netlink_init(RTM_NEWLINK, 0, &hdr, sizeof(hdr)); netlink_attr(IFLA_IFNAME, name, strlen(name)); if (master) { int ifindex = if_nametoindex(master); netlink_attr(IFLA_MASTER, &ifindex, sizeof(ifindex)); } if (macsize) netlink_attr(IFLA_ADDRESS, mac, macsize); int err = netlink_send(sock); (void)err; } static int netlink_add_addr(int sock, const char* dev, const void* addr, int addrsize) { struct ifaddrmsg hdr; memset(&hdr, 0, sizeof(hdr)); hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6; hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120; hdr.ifa_scope = RT_SCOPE_UNIVERSE; hdr.ifa_index = if_nametoindex(dev); netlink_init(RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr)); netlink_attr(IFA_LOCAL, addr, addrsize); netlink_attr(IFA_ADDRESS, addr, addrsize); return netlink_send(sock); } static void netlink_add_addr4(int sock, const char* dev, const char* addr) { struct in_addr in_addr; inet_pton(AF_INET, addr, &in_addr); int err = netlink_add_addr(sock, dev, &in_addr, sizeof(in_addr)); (void)err; } static void netlink_add_addr6(int sock, const char* dev, const char* addr) { struct in6_addr in6_addr; inet_pton(AF_INET6, addr, &in6_addr); int err = netlink_add_addr(sock, dev, &in6_addr, sizeof(in6_addr)); (void)err; } #define DEV_IPV4 "172.20.20.%d" #define DEV_IPV6 "fe80::%02hx" #define DEV_MAC 0x00aaaaaaaaaa static void initialize_netdevices(void) { char netdevsim[16]; sprintf(netdevsim, "netdevsim%d", (int)procid); struct { const char* type; const char* dev; } devtypes[] = { {"ip6gretap", "ip6gretap0"}, {"bridge", "bridge0"}, {"vcan", "vcan0"}, {"bond", "bond0"}, {"team", "team0"}, {"dummy", "dummy0"}, {"nlmon", "nlmon0"}, {"caif", "caif0"}, {"batadv", "batadv0"}, {"vxcan", "vxcan1"}, {"netdevsim", netdevsim}, {"veth", 0}, }; const char* devmasters[] = {"bridge", "bond", "team"}; struct { const char* name; int macsize; bool noipv6; } devices[] = { {"lo", ETH_ALEN}, {"sit0", 0}, {"bridge0", ETH_ALEN}, {"vcan0", 0, true}, {"tunl0", 0}, {"gre0", 0}, {"gretap0", ETH_ALEN}, {"ip_vti0", 0}, {"ip6_vti0", 0}, {"ip6tnl0", 0}, {"ip6gre0", 0}, {"ip6gretap0", ETH_ALEN}, {"erspan0", ETH_ALEN}, {"bond0", ETH_ALEN}, {"veth0", ETH_ALEN}, {"veth1", ETH_ALEN}, {"team0", ETH_ALEN}, {"veth0_to_bridge", ETH_ALEN}, {"veth1_to_bridge", ETH_ALEN}, {"veth0_to_bond", ETH_ALEN}, {"veth1_to_bond", ETH_ALEN}, {"veth0_to_team", ETH_ALEN}, {"veth1_to_team", ETH_ALEN}, {"veth0_to_hsr", ETH_ALEN}, {"veth1_to_hsr", ETH_ALEN}, {"hsr0", 0}, {"dummy0", ETH_ALEN}, {"nlmon0", 0}, {"vxcan1", 0, true}, {"caif0", ETH_ALEN}, {"batadv0", ETH_ALEN}, {netdevsim, ETH_ALEN}, }; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) exit(1); unsigned i; for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) netlink_add_device(sock, devtypes[i].type, devtypes[i].dev); for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) { char master[32], slave0[32], veth0[32], slave1[32], veth1[32]; sprintf(slave0, "%s_slave_0", devmasters[i]); sprintf(veth0, "veth0_to_%s", devmasters[i]); netlink_add_veth(sock, slave0, veth0); sprintf(slave1, "%s_slave_1", devmasters[i]); sprintf(veth1, "veth1_to_%s", devmasters[i]); netlink_add_veth(sock, slave1, veth1); sprintf(master, "%s0", devmasters[i]); netlink_device_change(sock, slave0, false, master, 0, 0); netlink_device_change(sock, slave1, false, master, 0, 0); } netlink_device_change(sock, "bridge_slave_0", true, 0, 0, 0); netlink_device_change(sock, "bridge_slave_1", true, 0, 0, 0); netlink_add_veth(sock, "hsr_slave_0", "veth0_to_hsr"); netlink_add_veth(sock, "hsr_slave_1", "veth1_to_hsr"); netlink_add_hsr(sock, "hsr0", "hsr_slave_0", "hsr_slave_1"); netlink_device_change(sock, "hsr_slave_0", true, 0, 0, 0); netlink_device_change(sock, "hsr_slave_1", true, 0, 0, 0); for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) { char addr[32]; sprintf(addr, DEV_IPV4, i + 10); netlink_add_addr4(sock, devices[i].name, addr); if (!devices[i].noipv6) { sprintf(addr, DEV_IPV6, i + 10); netlink_add_addr6(sock, devices[i].name, addr); } uint64_t macaddr = DEV_MAC + ((i + 10ull) << 40); netlink_device_change(sock, devices[i].name, true, 0, &macaddr, devices[i].macsize); } close(sock); } static void initialize_netdevices_init(void) { int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) exit(1); struct { const char* type; int macsize; bool noipv6; bool noup; } devtypes[] = { {"nr", 7, true}, {"rose", 5, true, true}, }; unsigned i; for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) { char dev[32], addr[32]; sprintf(dev, "%s%d", devtypes[i].type, (int)procid); sprintf(addr, "172.30.%d.%d", i, (int)procid + 1); netlink_add_addr4(sock, dev, addr); if (!devtypes[i].noipv6) { sprintf(addr, "fe88::%02hx:%02hx", i, (int)procid + 1); netlink_add_addr6(sock, dev, addr); } int macsize = devtypes[i].macsize; uint64_t macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) + (procid << (8 * (macsize - 1))); netlink_device_change(sock, dev, !devtypes[i].noup, 0, &macaddr, macsize); } close(sock); } #define XT_TABLE_SIZE 1536 #define XT_MAX_ENTRIES 10 struct xt_counters { uint64_t pcnt, bcnt; }; struct ipt_getinfo { char name[32]; unsigned int valid_hooks; unsigned int hook_entry[5]; unsigned int underflow[5]; unsigned int num_entries; unsigned int size; }; struct ipt_get_entries { char name[32]; unsigned int size; void* entrytable[XT_TABLE_SIZE / sizeof(void*)]; }; struct ipt_replace { char name[32]; unsigned int valid_hooks; unsigned int num_entries; unsigned int size; unsigned int hook_entry[5]; unsigned int underflow[5]; unsigned int num_counters; struct xt_counters* counters; char entrytable[XT_TABLE_SIZE]; }; struct ipt_table_desc { const char* name; struct ipt_getinfo info; struct ipt_replace replace; }; static struct ipt_table_desc ipv4_tables[] = { {.name = "filter"}, {.name = "nat"}, {.name = "mangle"}, {.name = "raw"}, {.name = "security"}, }; static struct ipt_table_desc ipv6_tables[] = { {.name = "filter"}, {.name = "nat"}, {.name = "mangle"}, {.name = "raw"}, {.name = "security"}, }; #define IPT_BASE_CTL 64 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) #define IPT_SO_GET_INFO (IPT_BASE_CTL) #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) struct arpt_getinfo { char name[32]; unsigned int valid_hooks; unsigned int hook_entry[3]; unsigned int underflow[3]; unsigned int num_entries; unsigned int size; }; struct arpt_get_entries { char name[32]; unsigned int size; void* entrytable[XT_TABLE_SIZE / sizeof(void*)]; }; struct arpt_replace { char name[32]; unsigned int valid_hooks; unsigned int num_entries; unsigned int size; unsigned int hook_entry[3]; unsigned int underflow[3]; unsigned int num_counters; struct xt_counters* counters; char entrytable[XT_TABLE_SIZE]; }; struct arpt_table_desc { const char* name; struct arpt_getinfo info; struct arpt_replace replace; }; static struct arpt_table_desc arpt_tables[] = { {.name = "filter"}, }; #define ARPT_BASE_CTL 96 #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) #define ARPT_SO_GET_INFO (ARPT_BASE_CTL) #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) { struct ipt_get_entries entries; socklen_t optlen; int fd, i; fd = socket(family, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: return; } exit(1); } for (i = 0; i < num_tables; i++) { struct ipt_table_desc* table = &tables[i]; strcpy(table->info.name, table->name); strcpy(table->replace.name, table->name); optlen = sizeof(table->info); if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) { switch (errno) { case EPERM: case ENOENT: case ENOPROTOOPT: continue; } exit(1); } if (table->info.size > sizeof(table->replace.entrytable)) exit(1); if (table->info.num_entries > XT_MAX_ENTRIES) exit(1); memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); table->replace.valid_hooks = table->info.valid_hooks; table->replace.num_entries = table->info.num_entries; table->replace.size = table->info.size; memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); memcpy(table->replace.entrytable, entries.entrytable, table->info.size); } close(fd); } static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) { struct xt_counters counters[XT_MAX_ENTRIES]; struct ipt_get_entries entries; struct ipt_getinfo info; socklen_t optlen; int fd, i; fd = socket(family, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: return; } exit(1); } for (i = 0; i < num_tables; i++) { struct ipt_table_desc* table = &tables[i]; if (table->info.valid_hooks == 0) continue; memset(&info, 0, sizeof(info)); strcpy(info.name, table->name); optlen = sizeof(info); if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen)) exit(1); if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) continue; } table->replace.num_counters = info.num_entries; table->replace.counters = counters; optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen)) exit(1); } close(fd); } static void checkpoint_arptables(void) { struct arpt_get_entries entries; socklen_t optlen; unsigned i; int fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: return; } exit(1); } for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { struct arpt_table_desc* table = &arpt_tables[i]; strcpy(table->info.name, table->name); strcpy(table->replace.name, table->name); optlen = sizeof(table->info); if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) { switch (errno) { case EPERM: case ENOENT: case ENOPROTOOPT: continue; } exit(1); } if (table->info.size > sizeof(table->replace.entrytable)) exit(1); if (table->info.num_entries > XT_MAX_ENTRIES) exit(1); memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); table->replace.valid_hooks = table->info.valid_hooks; table->replace.num_entries = table->info.num_entries; table->replace.size = table->info.size; memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); memcpy(table->replace.entrytable, entries.entrytable, table->info.size); } close(fd); } static void reset_arptables() { struct xt_counters counters[XT_MAX_ENTRIES]; struct arpt_get_entries entries; struct arpt_getinfo info; socklen_t optlen; unsigned i; int fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: return; } exit(1); } for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { struct arpt_table_desc* table = &arpt_tables[i]; if (table->info.valid_hooks == 0) continue; memset(&info, 0, sizeof(info)); strcpy(info.name, table->name); optlen = sizeof(info); if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen)) exit(1); if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { memset(&entries, 0, sizeof(entries)); strcpy(entries.name, table->name); entries.size = table->info.size; optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) exit(1); if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) continue; } else { } table->replace.num_counters = info.num_entries; table->replace.counters = counters; optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen)) exit(1); } close(fd); } #define NF_BR_NUMHOOKS 6 #define EBT_TABLE_MAXNAMELEN 32 #define EBT_CHAIN_MAXNAMELEN 32 #define EBT_BASE_CTL 128 #define EBT_SO_SET_ENTRIES (EBT_BASE_CTL) #define EBT_SO_GET_INFO (EBT_BASE_CTL) #define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1) #define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1) #define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1) struct ebt_replace { char name[EBT_TABLE_MAXNAMELEN]; unsigned int valid_hooks; unsigned int nentries; unsigned int entries_size; struct ebt_entries* hook_entry[NF_BR_NUMHOOKS]; unsigned int num_counters; struct ebt_counter* counters; char* entries; }; struct ebt_entries { unsigned int distinguisher; char name[EBT_CHAIN_MAXNAMELEN]; unsigned int counter_offset; int policy; unsigned int nentries; char data[0] __attribute__((aligned(__alignof__(struct ebt_replace)))); }; struct ebt_table_desc { const char* name; struct ebt_replace replace; char entrytable[XT_TABLE_SIZE]; }; static struct ebt_table_desc ebt_tables[] = { {.name = "filter"}, {.name = "nat"}, {.name = "broute"}, }; static void checkpoint_ebtables(void) { socklen_t optlen; unsigned i; int fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: return; } exit(1); } for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { struct ebt_table_desc* table = &ebt_tables[i]; strcpy(table->replace.name, table->name); optlen = sizeof(table->replace); if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) { switch (errno) { case EPERM: case ENOENT: case ENOPROTOOPT: continue; } exit(1); } if (table->replace.entries_size > sizeof(table->entrytable)) exit(1); table->replace.num_counters = 0; table->replace.entries = table->entrytable; optlen = sizeof(table->replace) + table->replace.entries_size; if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen)) exit(1); } close(fd); } static void reset_ebtables() { struct ebt_replace replace; char entrytable[XT_TABLE_SIZE]; socklen_t optlen; unsigned i, j, h; int fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { switch (errno) { case EAFNOSUPPORT: case ENOPROTOOPT: return; } exit(1); } for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { struct ebt_table_desc* table = &ebt_tables[i]; if (table->replace.valid_hooks == 0) continue; memset(&replace, 0, sizeof(replace)); strcpy(replace.name, table->name); optlen = sizeof(replace); if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen)) exit(1); replace.num_counters = 0; table->replace.entries = 0; for (h = 0; h < NF_BR_NUMHOOKS; h++) table->replace.hook_entry[h] = 0; if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) { memset(&entrytable, 0, sizeof(entrytable)); replace.entries = entrytable; optlen = sizeof(replace) + replace.entries_size; if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen)) exit(1); if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0) continue; } for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) { if (table->replace.valid_hooks & (1 << h)) { table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j; j++; } } table->replace.entries = table->entrytable; optlen = sizeof(table->replace) + table->replace.entries_size; if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen)) exit(1); } close(fd); } static void checkpoint_net_namespace(void) { checkpoint_ebtables(); checkpoint_arptables(); checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); } static void reset_net_namespace(void) { reset_ebtables(); reset_arptables(); reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); } static void setup_cgroups() { if (mkdir("/syzcgroup", 0777)) { } if (mkdir("/syzcgroup/unified", 0777)) { } if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) { } if (chmod("/syzcgroup/unified", 0777)) { } write_file("/syzcgroup/unified/cgroup.subtree_control", "+cpu +memory +io +pids +rdma"); if (mkdir("/syzcgroup/cpu", 0777)) { } if (mount("none", "/syzcgroup/cpu", "cgroup", 0, "cpuset,cpuacct,perf_event,hugetlb")) { } write_file("/syzcgroup/cpu/cgroup.clone_children", "1"); if (chmod("/syzcgroup/cpu", 0777)) { } if (mkdir("/syzcgroup/net", 0777)) { } if (mount("none", "/syzcgroup/net", "cgroup", 0, "net_cls,net_prio,devices,freezer")) { } if (chmod("/syzcgroup/net", 0777)) { } } static void setup_binfmt_misc() { if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) { } write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:"); write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC"); } static void setup_common() { if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { } setup_cgroups(); setup_binfmt_misc(); } static void loop(); static void sandbox_common() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); setsid(); struct rlimit rlim; rlim.rlim_cur = rlim.rlim_max = 200 << 20; setrlimit(RLIMIT_AS, &rlim); rlim.rlim_cur = rlim.rlim_max = 32 << 20; setrlimit(RLIMIT_MEMLOCK, &rlim); rlim.rlim_cur = rlim.rlim_max = 136 << 20; setrlimit(RLIMIT_FSIZE, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = rlim.rlim_max = 0; setrlimit(RLIMIT_CORE, &rlim); rlim.rlim_cur = rlim.rlim_max = 256; setrlimit(RLIMIT_NOFILE, &rlim); if (unshare(CLONE_NEWNS)) { } if (unshare(CLONE_NEWIPC)) { } if (unshare(0x02000000)) { } if (unshare(CLONE_NEWUTS)) { } if (unshare(CLONE_SYSVSEM)) { } typedef struct { const char* name; const char* value; } sysctl_t; static const sysctl_t sysctls[] = { {"/proc/sys/kernel/shmmax", "16777216"}, {"/proc/sys/kernel/shmall", "536870912"}, {"/proc/sys/kernel/shmmni", "1024"}, {"/proc/sys/kernel/msgmax", "8192"}, {"/proc/sys/kernel/msgmni", "1024"}, {"/proc/sys/kernel/msgmnb", "1024"}, {"/proc/sys/kernel/sem", "1024 1048576 500 1024"}, }; unsigned i; for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++) write_file(sysctls[i].name, sysctls[i].value); } int wait_for_loop(int pid) { if (pid < 0) exit(1); int status = 0; while (waitpid(-1, &status, __WALL) != pid) { } return WEXITSTATUS(status); } static int real_uid; static int real_gid; __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20]; static int namespace_sandbox_proc(void* arg) { sandbox_common(); write_file("/proc/self/setgroups", "deny"); if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) exit(1); if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) exit(1); initialize_netdevices_init(); if (unshare(CLONE_NEWNET)) exit(1); initialize_netdevices(); if (mkdir("./syz-tmp", 0777)) exit(1); if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) exit(1); if (mkdir("./syz-tmp/newroot", 0777)) exit(1); if (mkdir("./syz-tmp/newroot/dev", 0700)) exit(1); unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL)) exit(1); if (mkdir("./syz-tmp/newroot/proc", 0700)) exit(1); if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL)) exit(1); if (mkdir("./syz-tmp/newroot/selinux", 0700)) exit(1); const char* selinux_path = "./syz-tmp/newroot/selinux"; if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) { if (errno != ENOENT) exit(1); if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); } if (mkdir("./syz-tmp/newroot/sys", 0700)) exit(1); if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL)) exit(1); if (mkdir("./syz-tmp/newroot/syzcgroup", 0700)) exit(1); if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700)) exit(1); if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700)) exit(1); if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700)) exit(1); if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) { } if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) { } if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) { } if (mkdir("./syz-tmp/pivot", 0777)) exit(1); if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { if (chdir("./syz-tmp")) exit(1); } else { if (chdir("/")) exit(1); if (umount2("./pivot", MNT_DETACH)) exit(1); } if (chroot("./newroot")) exit(1); if (chdir("/")) exit(1); struct __user_cap_header_struct cap_hdr = {}; struct __user_cap_data_struct cap_data[2] = {}; cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; cap_hdr.pid = getpid(); if (syscall(SYS_capget, &cap_hdr, &cap_data)) exit(1); cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE); cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE); cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE); if (syscall(SYS_capset, &cap_hdr, &cap_data)) exit(1); loop(); exit(1); } #define SYZ_HAVE_SANDBOX_NAMESPACE 1 static int do_sandbox_namespace(void) { int pid; setup_common(); real_uid = getuid(); real_gid = getgid(); mprotect(sandbox_stack, 4096, PROT_NONE); pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64], CLONE_NEWUSER | CLONE_NEWPID, 0); return wait_for_loop(pid); } #define FS_IOC_SETFLAGS _IOW('f', 2, long) static void remove_dir(const char* dir) { DIR* dp; struct dirent* ep; int iter = 0; retry: while (umount2(dir, MNT_DETACH) == 0) { } dp = opendir(dir); if (dp == NULL) { if (errno == EMFILE) { exit(1); } exit(1); } while ((ep = readdir(dp))) { if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) continue; char filename[FILENAME_MAX]; snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); while (umount2(filename, MNT_DETACH) == 0) { } struct stat st; if (lstat(filename, &st)) exit(1); if (S_ISDIR(st.st_mode)) { remove_dir(filename); continue; } int i; for (i = 0;; i++) { if (unlink(filename) == 0) break; if (errno == EPERM) { int fd = open(filename, O_RDONLY); if (fd != -1) { long flags = 0; if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) close(fd); continue; } } if (errno == EROFS) { break; } if (errno != EBUSY || i > 100) exit(1); if (umount2(filename, MNT_DETACH)) exit(1); } } closedir(dp); int i; for (i = 0;; i++) { if (rmdir(dir) == 0) break; if (i < 100) { if (errno == EPERM) { int fd = open(dir, O_RDONLY); if (fd != -1) { long flags = 0; if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) close(fd); continue; } } if (errno == EROFS) { break; } if (errno == EBUSY) { if (umount2(dir, MNT_DETACH)) exit(1); continue; } if (errno == ENOTEMPTY) { if (iter < 100) { iter++; goto retry; } } } exit(1); } } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); int i; for (i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } #define SYZ_HAVE_SETUP_LOOP 1 static void setup_loop() { int pid = getpid(); char cgroupdir[64]; char file[128]; snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); if (mkdir(cgroupdir, 0777)) { } snprintf(file, sizeof(file), "%s/pids.max", cgroupdir); write_file(file, "32"); snprintf(file, sizeof(file), "%s/memory.low", cgroupdir); write_file(file, "%d", 298 << 20); snprintf(file, sizeof(file), "%s/memory.high", cgroupdir); write_file(file, "%d", 299 << 20); snprintf(file, sizeof(file), "%s/memory.max", cgroupdir); write_file(file, "%d", 300 << 20); snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); write_file(file, "%d", pid); snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); if (mkdir(cgroupdir, 0777)) { } snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); write_file(file, "%d", pid); snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); if (mkdir(cgroupdir, 0777)) { } snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); write_file(file, "%d", pid); checkpoint_net_namespace(); } #define SYZ_HAVE_RESET_LOOP 1 static void reset_loop() { reset_net_namespace(); } #define SYZ_HAVE_SETUP_TEST 1 static void setup_test() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); char cgroupdir[64]; snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); if (symlink(cgroupdir, "./cgroup")) { } snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); if (symlink(cgroupdir, "./cgroup.cpu")) { } snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); if (symlink(cgroupdir, "./cgroup.net")) { } write_file("/proc/self/oom_score_adj", "1000"); } #define SYZ_HAVE_RESET_TEST 1 static void reset_test() { int fd; for (fd = 3; fd < 30; fd++) close(fd); } struct thread_t { int created, call; event_t ready, done; }; static struct thread_t threads[16]; static void execute_call(int call); static int running; static void* thr(void* arg) { struct thread_t* th = (struct thread_t*)arg; for (;;) { event_wait(&th->ready); event_reset(&th->ready); execute_call(th->call); __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED); event_set(&th->done); } return 0; } static void execute_one(void) { int i, call, thread; for (call = 0; call < 6; call++) { for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); thread++) { struct thread_t* th = &threads[thread]; if (!th->created) { th->created = 1; event_init(&th->ready); event_init(&th->done); event_set(&th->done); thread_start(thr, th); } if (!event_isset(&th->done)) continue; event_reset(&th->done); th->call = call; __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED); event_set(&th->ready); event_timedwait(&th->done, 45); break; } } for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++) sleep_ms(1); } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { setup_loop(); int iter; for (iter = 0;; iter++) { char cwdbuf[32]; sprintf(cwdbuf, "./%d", iter); if (mkdir(cwdbuf, 0777)) exit(1); reset_loop(); int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { if (chdir(cwdbuf)) exit(1); setup_test(); execute_one(); reset_test(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; sleep_ms(1); if (current_time_ms() - start < 5 * 1000) continue; kill_and_wait(pid, &status); break; } remove_dir(cwdbuf); } } uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff}; void execute_call(int call) { long res; switch (call) { case 0: res = syscall(__NR_epoll_create1, 0); if (res != -1) r[0] = res; break; case 1: NONFAILING(*(uint16_t*)0x20000080 = 0); NONFAILING(*(uint16_t*)0x20000082 = 0); NONFAILING(*(uint64_t*)0x20000088 = 0); NONFAILING(*(uint64_t*)0x20000090 = 0); NONFAILING(*(uint32_t*)0x20000098 = 0); syscall(__NR_fcntl, r[0], 7, 0x20000080); break; case 2: NONFAILING(*(uint16_t*)0x20000140 = 1); NONFAILING(*(uint16_t*)0x20000142 = 0); NONFAILING(*(uint64_t*)0x20000148 = 0x1000000); NONFAILING(*(uint64_t*)0x20000150 = 0); NONFAILING(*(uint32_t*)0x20000158 = 0); syscall(__NR_fcntl, r[0], 7, 0x20000140); break; case 3: syscall(__NR_bind, -1, 0, 0); break; case 4: res = syscall(__NR_socket, 0xa, 1, 0x84); if (res != -1) r[1] = res; break; case 5: syscall(__NR_sendto, r[1], 0, 0, 0, 0, 0); break; } } int main(void) { syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0); install_segv_handler(); for (procid = 0; procid < 6; procid++) { if (fork() == 0) { use_temporary_dir(); do_sandbox_namespace(); } } sleep(1000000); return 0; }