race condition kernel BUG in arch/x86/kvm/x86.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,

This program will trigger kvm_spurious_fault cause a kernel bug when
cpus are executing vmx_vcpu_load and __loaded_vmcs_clear function. The
root cause should be hidden some race condition.

reproducer + panic report is provided, tested on master available at
the time of writing and on 4.20-rc3 as well as 4.20-rc2, 4.18 and 4.15
is not affected.

Thank you!

ww9210



-----
// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <arpa/inet.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <pthread.h>
#include <sched.h>
#include <setjmp.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/futex.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/kvm.h>
#include <linux/net.h>
#include <linux/tcp.h>

unsigned long long procid;

static __thread int skip_segv;
static __thread jmp_buf segv_env;

static void segv_handler(int sig, siginfo_t* info, void* ctx)
{
  uintptr_t addr = (uintptr_t)info->si_addr;
  const uintptr_t prog_start = 1 << 20;
  const uintptr_t prog_end = 100 << 20;
  if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED) &&
      (addr < prog_start || addr > prog_end)) {
    _longjmp(segv_env, 1);
  }
  exit(sig);
}

static void install_segv_handler(void)
{
  struct sigaction sa;
  memset(&sa, 0, sizeof(sa));
  sa.sa_handler = SIG_IGN;
  syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
  syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
  memset(&sa, 0, sizeof(sa));
  sa.sa_sigaction = segv_handler;
  sa.sa_flags = SA_NODEFER | SA_SIGINFO;
  sigaction(SIGSEGV, &sa, NULL);
  sigaction(SIGBUS, &sa, NULL);
}

#define NONFAILING(...)                                                        \
  {                                                                            \
    __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST);                       \
    if (_setjmp(segv_env) == 0) {                                              \
      __VA_ARGS__;                                                             \
    }                                                                          \
    __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST);                       \
  }

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void use_temporary_dir(void)
{
  char tmpdir_template[] = "./syzkaller.XXXXXX";
  char* tmpdir = mkdtemp(tmpdir_template);
  if (!tmpdir)
    exit(1);
  if (chmod(tmpdir, 0777))
    exit(1);
  if (chdir(tmpdir))
    exit(1);
}

static void thread_start(void* (*fn)(void*), void* arg)
{
  pthread_t th;
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setstacksize(&attr, 128 << 10);
  if (pthread_create(&th, &attr, fn, arg))
    exit(1);
  pthread_attr_destroy(&attr);
}

typedef struct {
  int state;
} event_t;

static void event_init(event_t* ev)
{
  ev->state = 0;
}

static void event_reset(event_t* ev)
{
  ev->state = 0;
}

static void event_set(event_t* ev)
{
  if (ev->state)
    exit(1);
  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
  syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG);
}

static void event_wait(event_t* ev)
{
  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
  uint64_t start = current_time_ms();
  uint64_t now = start;
  for (;;) {
    uint64_t remain = timeout - (now - start);
    struct timespec ts;
    ts.tv_sec = remain / 1000;
    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
      return 1;
    now = current_time_ms();
    if (now - start > timeout)
      return 0;
  }
}

static void vsnprintf_check(char* str, size_t size, const char* format,
                            va_list args)
{
  int rv;
  rv = vsnprintf(str, size, format, args);
  if (rv < 0)
    exit(1);
  if ((size_t)rv >= size)
    exit(1);
}

#define COMMAND_MAX_LEN 128
#define PATH_PREFIX                                                            \
  "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin "
#define PATH_PREFIX_LEN (sizeof(PATH_PREFIX) - 1)

static void execute_command(bool panic, const char* format, ...)
{
  va_list args;
  char command[PATH_PREFIX_LEN + COMMAND_MAX_LEN];
  int rv;
  va_start(args, format);
  memcpy(command, PATH_PREFIX, PATH_PREFIX_LEN);
  vsnprintf_check(command + PATH_PREFIX_LEN, COMMAND_MAX_LEN, format, args);
  va_end(args);
  rv = system(command);
  if (rv) {
    if (panic)
      exit(1);
  }
}

static int tunfd = -1;
static int tun_frags_enabled;
#define SYZ_TUN_MAX_PACKET_SIZE 1000

#define TUN_IFACE "syz_tun"

#define LOCAL_MAC "aa:aa:aa:aa:aa:aa"
#define REMOTE_MAC "aa:aa:aa:aa:aa:bb"

#define LOCAL_IPV4 "172.20.20.170"
#define REMOTE_IPV4 "172.20.20.187"

#define LOCAL_IPV6 "fe80::aa"
#define REMOTE_IPV6 "fe80::bb"

#define IFF_NAPI 0x0010
#define IFF_NAPI_FRAGS 0x0020

static void initialize_tun(void)
{
  tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
  if (tunfd == -1) {
    printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n");
    printf("otherwise fuzzing or reproducing might not work as intended\n");
    return;
  }
  const int kTunFd = 240;
  if (dup2(tunfd, kTunFd) < 0)
    exit(1);
  close(tunfd);
  tunfd = kTunFd;
  struct ifreq ifr;
  memset(&ifr, 0, sizeof(ifr));
  strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ);
  ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS;
  if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) {
    ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
    if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0)
      exit(1);
  }
  if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0)
    exit(1);
  tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0;
  execute_command(0, "sysctl -w net.ipv6.conf.%s.accept_dad=0", TUN_IFACE);
  execute_command(0, "sysctl -w net.ipv6.conf.%s.router_solicitations=0",
                  TUN_IFACE);
  execute_command(1, "ip link set dev %s address %s", TUN_IFACE, LOCAL_MAC);
  execute_command(1, "ip addr add %s/24 dev %s", LOCAL_IPV4, TUN_IFACE);
  execute_command(1, "ip neigh add %s lladdr %s dev %s nud permanent",
                  REMOTE_IPV4, REMOTE_MAC, TUN_IFACE);
  execute_command(0, "ip -6 addr add %s/120 dev %s", LOCAL_IPV6, TUN_IFACE);
  execute_command(0, "ip -6 neigh add %s lladdr %s dev %s nud permanent",
                  REMOTE_IPV6, REMOTE_MAC, TUN_IFACE);
  execute_command(1, "ip link set dev %s up", TUN_IFACE);
}

#define DEV_IPV4 "172.20.20.%d"
#define DEV_IPV6 "fe80::%02hx"
#define DEV_MAC "aa:aa:aa:aa:aa:%02hx"

static void snprintf_check(char* str, size_t size, const char* format, ...)
{
  va_list args;
  va_start(args, format);
  vsnprintf_check(str, size, format, args);
  va_end(args);
}
static void initialize_netdevices(void)
{
  unsigned i;
  const char* devtypes[] = {"ip6gretap", "bridge", "vcan", "bond", "team"};
  const char* devnames[] = {"lo",
                            "sit0",
                            "bridge0",
                            "vcan0",
                            "tunl0",
                            "gre0",
                            "gretap0",
                            "ip_vti0",
                            "ip6_vti0",
                            "ip6tnl0",
                            "ip6gre0",
                            "ip6gretap0",
                            "erspan0",
                            "bond0",
                            "veth0",
                            "veth1",
                            "team0",
                            "veth0_to_bridge",
                            "veth1_to_bridge",
                            "veth0_to_bond",
                            "veth1_to_bond",
                            "veth0_to_team",
                            "veth1_to_team"};
  const char* devmasters[] = {"bridge", "bond", "team"};
  for (i = 0; i < sizeof(devtypes) / (sizeof(devtypes[0])); i++)
    execute_command(0, "ip link add dev %s0 type %s", devtypes[i], devtypes[i]);
  execute_command(0, "ip link add type veth");
  for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
    execute_command(
        0, "ip link add name %s_slave_0 type veth peer name veth0_to_%s",
        devmasters[i], devmasters[i]);
    execute_command(
        0, "ip link add name %s_slave_1 type veth peer name veth1_to_%s",
        devmasters[i], devmasters[i]);
    execute_command(0, "ip link set %s_slave_0 master %s0", devmasters[i],
                    devmasters[i]);
    execute_command(0, "ip link set %s_slave_1 master %s0", devmasters[i],
                    devmasters[i]);
    execute_command(0, "ip link set veth0_to_%s up", devmasters[i]);
    execute_command(0, "ip link set veth1_to_%s up", devmasters[i]);
  }
  execute_command(0, "ip link set bridge_slave_0 up");
  execute_command(0, "ip link set bridge_slave_1 up");
  for (i = 0; i < sizeof(devnames) / (sizeof(devnames[0])); i++) {
    char addr[32];
    snprintf_check(addr, sizeof(addr), DEV_IPV4, i + 10);
    execute_command(0, "ip -4 addr add %s/24 dev %s", addr, devnames[i]);
    snprintf_check(addr, sizeof(addr), DEV_IPV6, i + 10);
    execute_command(0, "ip -6 addr add %s/120 dev %s", addr, devnames[i]);
    snprintf_check(addr, sizeof(addr), DEV_MAC, i + 10);
    execute_command(0, "ip link set dev %s address %s", devnames[i], addr);
    execute_command(0, "ip link set dev %s up", devnames[i]);
  }
}

static int read_tun(char* data, int size)
{
  if (tunfd < 0)
    return -1;
  int rv = read(tunfd, data, size);
  if (rv < 0) {
    if (errno == EAGAIN)
      return -1;
    if (errno == EBADFD)
      return -1;
    exit(1);
  }
  return rv;
}

static void flush_tun()
{
  char data[SYZ_TUN_MAX_PACKET_SIZE];
  while (read_tun(&data[0], sizeof(data)) != -1) {
  }
}

const char kvm_asm16_cpl3[] =
    "\x0f\x20\xc0\x66\x83\xc8\x01\x0f\x22\xc0\xb8\xa0\x00\x0f\x00\xd8\xb8\x2b"
    "\x00\x8e\xd8\x8e\xc0\x8e\xe0\x8e\xe8\xbc\x00\x01\xc7\x06\x00\x01\x1d\xba"
    "\xc7\x06\x02\x01\x23\x00\xc7\x06\x04\x01\x00\x01\xc7\x06\x06\x01\x2b\x00"
    "\xcb";
const char kvm_asm32_paged[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0";
const char kvm_asm32_vm86[] =
    "\x66\xb8\xb8\x00\x0f\x00\xd8\xea\x00\x00\x00\x00\xd0\x00";
const char kvm_asm32_paged_vm86[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\x66\xb8\xb8\x00\x0f\x00\xd8"
    "\xea\x00\x00\x00\x00\xd0\x00";
const char kvm_asm64_vm86[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\x66\xb8\xb8\x00\x0f\x00\xd8"
    "\xea\x00\x00\x00\x00\xd0\x00";
const char kvm_asm64_enable_long[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00"
    "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8";
const char kvm_asm64_init_vm[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00"
    "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc1\x3a\x00\x00\x00\x0f"
    "\x32\x48\x83\xc8\x05\x0f\x30\x0f\x20\xe0\x48\x0d\x00\x20\x00\x00\x0f\x22"
    "\xe0\x48\xc7\xc1\x80\x04\x00\x00\x0f\x32\x48\xc7\xc2\x00\x60\x00\x00\x89"
    "\x02\x48\xc7\xc2\x00\x70\x00\x00\x89\x02\x48\xc7\xc0\x00\x5f\x00\x00\xf3"
    "\x0f\xc7\x30\x48\xc7\xc0\x08\x5f\x00\x00\x66\x0f\xc7\x30\x0f\xc7\x30\x48"
    "\xc7\xc1\x81\x04\x00\x00\x0f\x32\x48\x83\xc8\x3f\x48\x21\xd0\x48\xc7\xc2"
    "\x00\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x40\x00\x00\x48\xb8\x84\x9e"
    "\x99\xf3\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x40\x00\x00\x48\xc7"
    "\xc0\x81\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x83\x04\x00\x00\x0f\x32\x48"
    "\x0d\xff\x6f\x03\x00\x48\x21\xd0\x48\xc7\xc2\x0c\x40\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc1\x84\x04\x00\x00\x0f\x32\x48\x0d\xff\x17\x00\x00\x48\x21\xd0"
    "\x48\xc7\xc2\x12\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x2c\x00\x00\x48"
    "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x28\x00\x00\x48\xc7"
    "\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x02\x0c\x00\x00\x48\xc7\xc0"
    "\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc0\x58\x00\x00\x00\x48\xc7\xc2\x00"
    "\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x0c\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x06\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x0c\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc2\x0a\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc0\xd8\x00\x00\x00\x48"
    "\xc7\xc2\x0c\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x2c\x00\x00\x48\xc7"
    "\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x4c\x00\x00\x48\xc7\xc0"
    "\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x6c\x00\x00\x48\xc7\xc0\x00"
    "\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x6c\x00\x00\x48\xc7\xc0\x00\x00"
    "\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00\x6c\x00\x00\x48\x89\xc0"
    "\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x6c\x00\x00\x48\x89\xc0\x0f\x79"
    "\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x6c\x00\x00\x48\x89\xc0\x0f\x79\xd0\x48"
    "\xc7\xc2\x06\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x08\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x0a\x6c\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0c"
    "\x6c\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x6c"
    "\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x14\x6c\x00"
    "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x16\x6c\x00\x00"
    "\x48\x8b\x04\x25\x10\x5f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x00\x00\x00"
    "\x48\xc7\xc0\x01\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x00\x00\x00\x48"
    "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x20\x00\x00\x48\xc7"
    "\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x20\x00\x00\x48\xc7\xc0"
    "\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x20\x00\x00\x48\xc7\xc0\x00"
    "\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x20\x00\x00\x48\xc7\xc0\x00\x00"
    "\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x77\x02\x00\x00\x0f\x32\x48\xc1\xe2\x20"
    "\x48\x09\xd0\x48\xc7\xc2\x00\x2c\x00\x00\x48\x89\xc0\x0f\x79\xd0\x48\xc7"
    "\xc2\x04\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x0a\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e"
    "\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x40"
    "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x16\x40\x00"
    "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x14\x40\x00\x00"
    "\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x60\x00\x00\x48"
    "\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x02\x60\x00\x00\x48\xc7"
    "\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x1c\x20\x00\x00\x48\xc7\xc0"
    "\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x20\x00\x00\x48\xc7\xc0\x00"
    "\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20\x20\x00\x00\x48\xc7\xc0\x00\x00"
    "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x22\x20\x00\x00\x48\xc7\xc0\x00\x00\x00"
    "\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x08\x00\x00\x48\xc7\xc0\x58\x00\x00\x00"
    "\x0f\x79\xd0\x48\xc7\xc2\x02\x08\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f"
    "\x79\xd0\x48\xc7\xc2\x04\x08\x00\x00\x48\xc7\xc0\x58\x00\x00\x00\x0f\x79"
    "\xd0\x48\xc7\xc2\x06\x08\x00\x00\x48\xc7\xc0\x58\x00\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc2\x08\x08\x00\x00\x48\xc7\xc0\x58\x00\x00\x00\x0f\x79\xd0\x48"
    "\xc7\xc2\x0a\x08\x00\x00\x48\xc7\xc0\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x0c\x08\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x0e\x08\x00\x00\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12"
    "\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x14\x68"
    "\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x16\x68\x00"
    "\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x18\x68\x00\x00"
    "\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x48\x00\x00\x48"
    "\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x48\x00\x00\x48\xc7"
    "\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x48\x00\x00\x48\xc7\xc0"
    "\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x48\x00\x00\x48\xc7\xc0\xff"
    "\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x48\x00\x00\x48\xc7\xc0\xff\xff"
    "\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f"
    "\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x48\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
    "\x0f\x79\xd0\x48\xc7\xc2\x0e\x48\x00\x00\x48\xc7\xc0\xff\x1f\x00\x00\x0f"
    "\x79\xd0\x48\xc7\xc2\x10\x48\x00\x00\x48\xc7\xc0\xff\x1f\x00\x00\x0f\x79"
    "\xd0\x48\xc7\xc2\x12\x48\x00\x00\x48\xc7\xc0\xff\x1f\x00\x00\x0f\x79\xd0"
    "\x48\xc7\xc2\x14\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0\x48"
    "\xc7\xc2\x16\x48\x00\x00\x48\xc7\xc0\x9b\x20\x00\x00\x0f\x79\xd0\x48\xc7"
    "\xc2\x18\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
    "\x1a\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1c"
    "\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x48"
    "\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20\x48\x00"
    "\x00\x48\xc7\xc0\x82\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x22\x48\x00\x00"
    "\x48\xc7\xc0\x8b\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1c\x68\x00\x00\x48"
    "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x68\x00\x00\x48\xc7"
    "\xc0\x00\x91\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20\x68\x00\x00\x48\xc7\xc0"
    "\x02\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x28\x00\x00\x48\xc7\xc0\x00"
    "\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x28\x00\x00\x48\xc7\xc0\x00\x00"
    "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x28\x00\x00\x48\xc7\xc0\x00\x00\x00"
    "\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x28\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
    "\x0f\x79\xd0\x48\xc7\xc2\x10\x28\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f"
    "\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00\x68\x00\x00\x48\x89\xc0\x0f\x79\xd0"
    "\x0f\x20\xd8\x48\xc7\xc2\x02\x68\x00\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20"
    "\xe0\x48\xc7\xc2\x04\x68\x00\x00\x48\x89\xc0\x0f\x79\xd0\x48\xc7\xc0\x18"
    "\x5f\x00\x00\x48\x8b\x10\x48\xc7\xc0\x20\x5f\x00\x00\x48\x8b\x08\x48\x31"
    "\xc0\x0f\x78\xd0\x48\x31\xc8\x0f\x79\xd0\x0f\x01\xc2\x48\xc7\xc2\x00\x44"
    "\x00\x00\x0f\x78\xd0\xf4";
const char kvm_asm64_vm_exit[] =
    "\x48\xc7\xc3\x00\x44\x00\x00\x0f\x78\xda\x48\xc7\xc3\x02\x44\x00\x00\x0f"
    "\x78\xd9\x48\xc7\xc0\x00\x64\x00\x00\x0f\x78\xc0\x48\xc7\xc3\x1e\x68\x00"
    "\x00\x0f\x78\xdb\xf4";
const char kvm_asm64_cpl3[] =
    "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00"
    "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc0\x6b\x00\x00\x00\x8e"
    "\xd8\x8e\xc0\x8e\xe0\x8e\xe8\x48\xc7\xc4\x80\x0f\x00\x00\x48\xc7\x04\x24"
    "\x1d\xba\x00\x00\x48\xc7\x44\x24\x04\x63\x00\x00\x00\x48\xc7\x44\x24\x08"
    "\x80\x0f\x00\x00\x48\xc7\x44\x24\x0c\x6b\x00\x00\x00\xcb";

#define ADDR_TEXT 0x0000
#define ADDR_GDT 0x1000
#define ADDR_LDT 0x1800
#define ADDR_PML4 0x2000
#define ADDR_PDP 0x3000
#define ADDR_PD 0x4000
#define ADDR_STACK0 0x0f80
#define ADDR_VAR_HLT 0x2800
#define ADDR_VAR_SYSRET 0x2808
#define ADDR_VAR_SYSEXIT 0x2810
#define ADDR_VAR_IDT 0x3800
#define ADDR_VAR_TSS64 0x3a00
#define ADDR_VAR_TSS64_CPL3 0x3c00
#define ADDR_VAR_TSS16 0x3d00
#define ADDR_VAR_TSS16_2 0x3e00
#define ADDR_VAR_TSS16_CPL3 0x3f00
#define ADDR_VAR_TSS32 0x4800
#define ADDR_VAR_TSS32_2 0x4a00
#define ADDR_VAR_TSS32_CPL3 0x4c00
#define ADDR_VAR_TSS32_VM86 0x4e00
#define ADDR_VAR_VMXON_PTR 0x5f00
#define ADDR_VAR_VMCS_PTR 0x5f08
#define ADDR_VAR_VMEXIT_PTR 0x5f10
#define ADDR_VAR_VMWRITE_FLD 0x5f18
#define ADDR_VAR_VMWRITE_VAL 0x5f20
#define ADDR_VAR_VMXON 0x6000
#define ADDR_VAR_VMCS 0x7000
#define ADDR_VAR_VMEXIT_CODE 0x9000
#define ADDR_VAR_USER_CODE 0x9100
#define ADDR_VAR_USER_CODE2 0x9120

#define SEL_LDT (1 << 3)
#define SEL_CS16 (2 << 3)
#define SEL_DS16 (3 << 3)
#define SEL_CS16_CPL3 ((4 << 3) + 3)
#define SEL_DS16_CPL3 ((5 << 3) + 3)
#define SEL_CS32 (6 << 3)
#define SEL_DS32 (7 << 3)
#define SEL_CS32_CPL3 ((8 << 3) + 3)
#define SEL_DS32_CPL3 ((9 << 3) + 3)
#define SEL_CS64 (10 << 3)
#define SEL_DS64 (11 << 3)
#define SEL_CS64_CPL3 ((12 << 3) + 3)
#define SEL_DS64_CPL3 ((13 << 3) + 3)
#define SEL_CGATE16 (14 << 3)
#define SEL_TGATE16 (15 << 3)
#define SEL_CGATE32 (16 << 3)
#define SEL_TGATE32 (17 << 3)
#define SEL_CGATE64 (18 << 3)
#define SEL_CGATE64_HI (19 << 3)
#define SEL_TSS16 (20 << 3)
#define SEL_TSS16_2 (21 << 3)
#define SEL_TSS16_CPL3 ((22 << 3) + 3)
#define SEL_TSS32 (23 << 3)
#define SEL_TSS32_2 (24 << 3)
#define SEL_TSS32_CPL3 ((25 << 3) + 3)
#define SEL_TSS32_VM86 (26 << 3)
#define SEL_TSS64 (27 << 3)
#define SEL_TSS64_HI (28 << 3)
#define SEL_TSS64_CPL3 ((29 << 3) + 3)
#define SEL_TSS64_CPL3_HI (30 << 3)

#define MSR_IA32_FEATURE_CONTROL 0x3a
#define MSR_IA32_VMX_BASIC 0x480
#define MSR_IA32_SMBASE 0x9e
#define MSR_IA32_SYSENTER_CS 0x174
#define MSR_IA32_SYSENTER_ESP 0x175
#define MSR_IA32_SYSENTER_EIP 0x176
#define MSR_IA32_STAR 0xC0000081
#define MSR_IA32_LSTAR 0xC0000082
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48B

#define NEXT_INSN $0xbadc0de
#define PREFIX_SIZE 0xba1d

#define KVM_SMI _IO(KVMIO, 0xb7)

#define CR0_PE 1
#define CR0_MP (1 << 1)
#define CR0_EM (1 << 2)
#define CR0_TS (1 << 3)
#define CR0_ET (1 << 4)
#define CR0_NE (1 << 5)
#define CR0_WP (1 << 16)
#define CR0_AM (1 << 18)
#define CR0_NW (1 << 29)
#define CR0_CD (1 << 30)
#define CR0_PG (1 << 31)

#define CR4_VME 1
#define CR4_PVI (1 << 1)
#define CR4_TSD (1 << 2)
#define CR4_DE (1 << 3)
#define CR4_PSE (1 << 4)
#define CR4_PAE (1 << 5)
#define CR4_MCE (1 << 6)
#define CR4_PGE (1 << 7)
#define CR4_PCE (1 << 8)
#define CR4_OSFXSR (1 << 8)
#define CR4_OSXMMEXCPT (1 << 10)
#define CR4_UMIP (1 << 11)
#define CR4_VMXE (1 << 13)
#define CR4_SMXE (1 << 14)
#define CR4_FSGSBASE (1 << 16)
#define CR4_PCIDE (1 << 17)
#define CR4_OSXSAVE (1 << 18)
#define CR4_SMEP (1 << 20)
#define CR4_SMAP (1 << 21)
#define CR4_PKE (1 << 22)

#define EFER_SCE 1
#define EFER_LME (1 << 8)
#define EFER_LMA (1 << 10)
#define EFER_NXE (1 << 11)
#define EFER_SVME (1 << 12)
#define EFER_LMSLE (1 << 13)
#define EFER_FFXSR (1 << 14)
#define EFER_TCE (1 << 15)
#define PDE32_PRESENT 1
#define PDE32_RW (1 << 1)
#define PDE32_USER (1 << 2)
#define PDE32_PS (1 << 7)
#define PDE64_PRESENT 1
#define PDE64_RW (1 << 1)
#define PDE64_USER (1 << 2)
#define PDE64_ACCESSED (1 << 5)
#define PDE64_DIRTY (1 << 6)
#define PDE64_PS (1 << 7)
#define PDE64_G (1 << 8)

struct tss16 {
  uint16_t prev;
  uint16_t sp0;
  uint16_t ss0;
  uint16_t sp1;
  uint16_t ss1;
  uint16_t sp2;
  uint16_t ss2;
  uint16_t ip;
  uint16_t flags;
  uint16_t ax;
  uint16_t cx;
  uint16_t dx;
  uint16_t bx;
  uint16_t sp;
  uint16_t bp;
  uint16_t si;
  uint16_t di;
  uint16_t es;
  uint16_t cs;
  uint16_t ss;
  uint16_t ds;
  uint16_t ldt;
} __attribute__((packed));

struct tss32 {
  uint16_t prev, prevh;
  uint32_t sp0;
  uint16_t ss0, ss0h;
  uint32_t sp1;
  uint16_t ss1, ss1h;
  uint32_t sp2;
  uint16_t ss2, ss2h;
  uint32_t cr3;
  uint32_t ip;
  uint32_t flags;
  uint32_t ax;
  uint32_t cx;
  uint32_t dx;
  uint32_t bx;
  uint32_t sp;
  uint32_t bp;
  uint32_t si;
  uint32_t di;
  uint16_t es, esh;
  uint16_t cs, csh;
  uint16_t ss, ssh;
  uint16_t ds, dsh;
  uint16_t fs, fsh;
  uint16_t gs, gsh;
  uint16_t ldt, ldth;
  uint16_t trace;
  uint16_t io_bitmap;
} __attribute__((packed));

struct tss64 {
  uint32_t reserved0;
  uint64_t rsp[3];
  uint64_t reserved1;
  uint64_t ist[7];
  uint64_t reserved2;
  uint32_t reserved3;
  uint32_t io_bitmap;
} __attribute__((packed));

static void fill_segment_descriptor(uint64_t* dt, uint64_t* lt,
                                    struct kvm_segment* seg)
{
  uint16_t index = seg->selector >> 3;
  uint64_t limit = seg->g ? seg->limit >> 12 : seg->limit;
  uint64_t sd = (limit & 0xffff) | (seg->base & 0xffffff) << 16 |
                (uint64_t)seg->type << 40 | (uint64_t)seg->s << 44 |
                (uint64_t)seg->dpl << 45 | (uint64_t)seg->present << 47 |
                (limit & 0xf0000ULL) << 48 | (uint64_t)seg->avl << 52 |
                (uint64_t)seg->l << 53 | (uint64_t)seg->db << 54 |
                (uint64_t)seg->g << 55 | (seg->base & 0xff000000ULL) << 56;
  NONFAILING(dt[index] = sd);
  NONFAILING(lt[index] = sd);
}

static void fill_segment_descriptor_dword(uint64_t* dt, uint64_t* lt,
                                          struct kvm_segment* seg)
{
  fill_segment_descriptor(dt, lt, seg);
  uint16_t index = seg->selector >> 3;
  NONFAILING(dt[index + 1] = 0);
  NONFAILING(lt[index + 1] = 0);
}

static void setup_syscall_msrs(int cpufd, uint16_t sel_cs, uint16_t sel_cs_cpl3)
{
  char buf[sizeof(struct kvm_msrs) + 5 * sizeof(struct kvm_msr_entry)];
  memset(buf, 0, sizeof(buf));
  struct kvm_msrs* msrs = (struct kvm_msrs*)buf;
  struct kvm_msr_entry* entries = msrs->entries;
  msrs->nmsrs = 5;
  entries[0].index = MSR_IA32_SYSENTER_CS;
  entries[0].data = sel_cs;
  entries[1].index = MSR_IA32_SYSENTER_ESP;
  entries[1].data = ADDR_STACK0;
  entries[2].index = MSR_IA32_SYSENTER_EIP;
  entries[2].data = ADDR_VAR_SYSEXIT;
  entries[3].index = MSR_IA32_STAR;
  entries[3].data = ((uint64_t)sel_cs << 32) | ((uint64_t)sel_cs_cpl3 << 48);
  entries[4].index = MSR_IA32_LSTAR;
  entries[4].data = ADDR_VAR_SYSRET;
  ioctl(cpufd, KVM_SET_MSRS, msrs);
}

static void setup_32bit_idt(struct kvm_sregs* sregs, char* host_mem,
                            uintptr_t guest_mem)
{
  sregs->idt.base = guest_mem + ADDR_VAR_IDT;
  sregs->idt.limit = 0x1ff;
  uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base);
  int i;
  for (i = 0; i < 32; i++) {
    struct kvm_segment gate;
    gate.selector = i << 3;
    switch (i % 6) {
    case 0:
      gate.type = 6;
      gate.base = SEL_CS16;
      break;
    case 1:
      gate.type = 7;
      gate.base = SEL_CS16;
      break;
    case 2:
      gate.type = 3;
      gate.base = SEL_TGATE16;
      break;
    case 3:
      gate.type = 14;
      gate.base = SEL_CS32;
      break;
    case 4:
      gate.type = 15;
      gate.base = SEL_CS32;
      break;
    case 6:
      gate.type = 11;
      gate.base = SEL_TGATE32;
      break;
    }
    gate.limit = guest_mem + ADDR_VAR_USER_CODE2;
    gate.present = 1;
    gate.dpl = 0;
    gate.s = 0;
    gate.g = 0;
    gate.db = 0;
    gate.l = 0;
    gate.avl = 0;
    fill_segment_descriptor(idt, idt, &gate);
  }
}

static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem,
                            uintptr_t guest_mem)
{
  sregs->idt.base = guest_mem + ADDR_VAR_IDT;
  sregs->idt.limit = 0x1ff;
  uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base);
  int i;
  for (i = 0; i < 32; i++) {
    struct kvm_segment gate;
    gate.selector = (i * 2) << 3;
    gate.type = (i & 1) ? 14 : 15;
    gate.base = SEL_CS64;
    gate.limit = guest_mem + ADDR_VAR_USER_CODE2;
    gate.present = 1;
    gate.dpl = 0;
    gate.s = 0;
    gate.g = 0;
    gate.db = 0;
    gate.l = 0;
    gate.avl = 0;
    fill_segment_descriptor_dword(idt, idt, &gate);
  }
}

struct kvm_text {
  uintptr_t typ;
  const void* text;
  uintptr_t size;
};

struct kvm_opt {
  uint64_t typ;
  uint64_t val;
};

#define KVM_SETUP_PAGING (1 << 0)
#define KVM_SETUP_PAE (1 << 1)
#define KVM_SETUP_PROTECTED (1 << 2)
#define KVM_SETUP_CPL3 (1 << 3)
#define KVM_SETUP_VIRT86 (1 << 4)
#define KVM_SETUP_SMM (1 << 5)
#define KVM_SETUP_VM (1 << 6)
static uintptr_t syz_kvm_setup_cpu(uintptr_t a0, uintptr_t a1, uintptr_t a2,
                                   uintptr_t a3, uintptr_t a4, uintptr_t a5,
                                   uintptr_t a6, uintptr_t a7)
{
  const int vmfd = a0;
  const int cpufd = a1;
  char* const host_mem = (char*)a2;
  const struct kvm_text* const text_array_ptr = (struct kvm_text*)a3;
  const uintptr_t text_count = a4;
  const uintptr_t flags = a5;
  const struct kvm_opt* const opt_array_ptr = (struct kvm_opt*)a6;
  uintptr_t opt_count = a7;
  const uintptr_t page_size = 4 << 10;
  const uintptr_t ioapic_page = 10;
  const uintptr_t guest_mem_size = 24 * page_size;
  const uintptr_t guest_mem = 0;
  (void)text_count;
  int text_type = 0;
  const void* text = 0;
  uintptr_t text_size = 0;
  NONFAILING(text_type = text_array_ptr[0].typ);
  NONFAILING(text = text_array_ptr[0].text);
  NONFAILING(text_size = text_array_ptr[0].size);
  uintptr_t i;
  for (i = 0; i < guest_mem_size / page_size; i++) {
    struct kvm_userspace_memory_region memreg;
    memreg.slot = i;
    memreg.flags = 0;
    memreg.guest_phys_addr = guest_mem + i * page_size;
    if (i == ioapic_page)
      memreg.guest_phys_addr = 0xfec00000;
    memreg.memory_size = page_size;
    memreg.userspace_addr = (uintptr_t)host_mem + i * page_size;
    ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
  }
  struct kvm_userspace_memory_region memreg;
  memreg.slot = 1 + (1 << 16);
  memreg.flags = 0;
  memreg.guest_phys_addr = 0x30000;
  memreg.memory_size = 64 << 10;
  memreg.userspace_addr = (uintptr_t)host_mem;
  ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
  struct kvm_sregs sregs;
  if (ioctl(cpufd, KVM_GET_SREGS, &sregs))
    return -1;
  struct kvm_regs regs;
  memset(&regs, 0, sizeof(regs));
  regs.rip = guest_mem + ADDR_TEXT;
  regs.rsp = ADDR_STACK0;
  sregs.gdt.base = guest_mem + ADDR_GDT;
  sregs.gdt.limit = 256 * sizeof(uint64_t) - 1;
  uint64_t* gdt = (uint64_t*)(host_mem + sregs.gdt.base);
  struct kvm_segment seg_ldt;
  seg_ldt.selector = SEL_LDT;
  seg_ldt.type = 2;
  seg_ldt.base = guest_mem + ADDR_LDT;
  seg_ldt.limit = 256 * sizeof(uint64_t) - 1;
  seg_ldt.present = 1;
  seg_ldt.dpl = 0;
  seg_ldt.s = 0;
  seg_ldt.g = 0;
  seg_ldt.db = 1;
  seg_ldt.l = 0;
  sregs.ldt = seg_ldt;
  uint64_t* ldt = (uint64_t*)(host_mem + sregs.ldt.base);
  struct kvm_segment seg_cs16;
  seg_cs16.selector = SEL_CS16;
  seg_cs16.type = 11;
  seg_cs16.base = 0;
  seg_cs16.limit = 0xfffff;
  seg_cs16.present = 1;
  seg_cs16.dpl = 0;
  seg_cs16.s = 1;
  seg_cs16.g = 0;
  seg_cs16.db = 0;
  seg_cs16.l = 0;
  struct kvm_segment seg_ds16 = seg_cs16;
  seg_ds16.selector = SEL_DS16;
  seg_ds16.type = 3;
  struct kvm_segment seg_cs16_cpl3 = seg_cs16;
  seg_cs16_cpl3.selector = SEL_CS16_CPL3;
  seg_cs16_cpl3.dpl = 3;
  struct kvm_segment seg_ds16_cpl3 = seg_ds16;
  seg_ds16_cpl3.selector = SEL_DS16_CPL3;
  seg_ds16_cpl3.dpl = 3;
  struct kvm_segment seg_cs32 = seg_cs16;
  seg_cs32.selector = SEL_CS32;
  seg_cs32.db = 1;
  struct kvm_segment seg_ds32 = seg_ds16;
  seg_ds32.selector = SEL_DS32;
  seg_ds32.db = 1;
  struct kvm_segment seg_cs32_cpl3 = seg_cs32;
  seg_cs32_cpl3.selector = SEL_CS32_CPL3;
  seg_cs32_cpl3.dpl = 3;
  struct kvm_segment seg_ds32_cpl3 = seg_ds32;
  seg_ds32_cpl3.selector = SEL_DS32_CPL3;
  seg_ds32_cpl3.dpl = 3;
  struct kvm_segment seg_cs64 = seg_cs16;
  seg_cs64.selector = SEL_CS64;
  seg_cs64.l = 1;
  struct kvm_segment seg_ds64 = seg_ds32;
  seg_ds64.selector = SEL_DS64;
  struct kvm_segment seg_cs64_cpl3 = seg_cs64;
  seg_cs64_cpl3.selector = SEL_CS64_CPL3;
  seg_cs64_cpl3.dpl = 3;
  struct kvm_segment seg_ds64_cpl3 = seg_ds64;
  seg_ds64_cpl3.selector = SEL_DS64_CPL3;
  seg_ds64_cpl3.dpl = 3;
  struct kvm_segment seg_tss32;
  seg_tss32.selector = SEL_TSS32;
  seg_tss32.type = 9;
  seg_tss32.base = ADDR_VAR_TSS32;
  seg_tss32.limit = 0x1ff;
  seg_tss32.present = 1;
  seg_tss32.dpl = 0;
  seg_tss32.s = 0;
  seg_tss32.g = 0;
  seg_tss32.db = 0;
  seg_tss32.l = 0;
  struct kvm_segment seg_tss32_2 = seg_tss32;
  seg_tss32_2.selector = SEL_TSS32_2;
  seg_tss32_2.base = ADDR_VAR_TSS32_2;
  struct kvm_segment seg_tss32_cpl3 = seg_tss32;
  seg_tss32_cpl3.selector = SEL_TSS32_CPL3;
  seg_tss32_cpl3.base = ADDR_VAR_TSS32_CPL3;
  struct kvm_segment seg_tss32_vm86 = seg_tss32;
  seg_tss32_vm86.selector = SEL_TSS32_VM86;
  seg_tss32_vm86.base = ADDR_VAR_TSS32_VM86;
  struct kvm_segment seg_tss16 = seg_tss32;
  seg_tss16.selector = SEL_TSS16;
  seg_tss16.base = ADDR_VAR_TSS16;
  seg_tss16.limit = 0xff;
  seg_tss16.type = 1;
  struct kvm_segment seg_tss16_2 = seg_tss16;
  seg_tss16_2.selector = SEL_TSS16_2;
  seg_tss16_2.base = ADDR_VAR_TSS16_2;
  seg_tss16_2.dpl = 0;
  struct kvm_segment seg_tss16_cpl3 = seg_tss16;
  seg_tss16_cpl3.selector = SEL_TSS16_CPL3;
  seg_tss16_cpl3.base = ADDR_VAR_TSS16_CPL3;
  seg_tss16_cpl3.dpl = 3;
  struct kvm_segment seg_tss64 = seg_tss32;
  seg_tss64.selector = SEL_TSS64;
  seg_tss64.base = ADDR_VAR_TSS64;
  seg_tss64.limit = 0x1ff;
  struct kvm_segment seg_tss64_cpl3 = seg_tss64;
  seg_tss64_cpl3.selector = SEL_TSS64_CPL3;
  seg_tss64_cpl3.base = ADDR_VAR_TSS64_CPL3;
  seg_tss64_cpl3.dpl = 3;
  struct kvm_segment seg_cgate16;
  seg_cgate16.selector = SEL_CGATE16;
  seg_cgate16.type = 4;
  seg_cgate16.base = SEL_CS16 | (2 << 16);
  seg_cgate16.limit = ADDR_VAR_USER_CODE2;
  seg_cgate16.present = 1;
  seg_cgate16.dpl = 0;
  seg_cgate16.s = 0;
  seg_cgate16.g = 0;
  seg_cgate16.db = 0;
  seg_cgate16.l = 0;
  seg_cgate16.avl = 0;
  struct kvm_segment seg_tgate16 = seg_cgate16;
  seg_tgate16.selector = SEL_TGATE16;
  seg_tgate16.type = 3;
  seg_cgate16.base = SEL_TSS16_2;
  seg_tgate16.limit = 0;
  struct kvm_segment seg_cgate32 = seg_cgate16;
  seg_cgate32.selector = SEL_CGATE32;
  seg_cgate32.type = 12;
  seg_cgate32.base = SEL_CS32 | (2 << 16);
  struct kvm_segment seg_tgate32 = seg_cgate32;
  seg_tgate32.selector = SEL_TGATE32;
  seg_tgate32.type = 11;
  seg_tgate32.base = SEL_TSS32_2;
  seg_tgate32.limit = 0;
  struct kvm_segment seg_cgate64 = seg_cgate16;
  seg_cgate64.selector = SEL_CGATE64;
  seg_cgate64.type = 12;
  seg_cgate64.base = SEL_CS64;
  int kvmfd = open("/dev/kvm", O_RDWR);
  char buf[sizeof(struct kvm_cpuid2) + 128 * sizeof(struct kvm_cpuid_entry2)];
  memset(buf, 0, sizeof(buf));
  struct kvm_cpuid2* cpuid = (struct kvm_cpuid2*)buf;
  cpuid->nent = 128;
  ioctl(kvmfd, KVM_GET_SUPPORTED_CPUID, cpuid);
  ioctl(cpufd, KVM_SET_CPUID2, cpuid);
  close(kvmfd);
  const char* text_prefix = 0;
  int text_prefix_size = 0;
  char* host_text = host_mem + ADDR_TEXT;
  if (text_type == 8) {
    if (flags & KVM_SETUP_SMM) {
      if (flags & KVM_SETUP_PROTECTED) {
        sregs.cs = seg_cs16;
        sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16;
        sregs.cr0 |= CR0_PE;
      } else {
        sregs.cs.selector = 0;
        sregs.cs.base = 0;
      }
      NONFAILING(*(host_mem + ADDR_TEXT) = 0xf4);
      host_text = host_mem + 0x8000;
      ioctl(cpufd, KVM_SMI, 0);
    } else if (flags & KVM_SETUP_VIRT86) {
      sregs.cs = seg_cs32;
      sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
      sregs.cr0 |= CR0_PE;
      sregs.efer |= EFER_SCE;
      setup_syscall_msrs(cpufd, SEL_CS32, SEL_CS32_CPL3);
      setup_32bit_idt(&sregs, host_mem, guest_mem);
      if (flags & KVM_SETUP_PAGING) {
        uint64_t pd_addr = guest_mem + ADDR_PD;
        uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD);
        NONFAILING(pd[0] = PDE32_PRESENT | PDE32_RW | PDE32_USER | PDE32_PS);
        sregs.cr3 = pd_addr;
        sregs.cr4 |= CR4_PSE;
        text_prefix = kvm_asm32_paged_vm86;
        text_prefix_size = sizeof(kvm_asm32_paged_vm86) - 1;
      } else {
        text_prefix = kvm_asm32_vm86;
        text_prefix_size = sizeof(kvm_asm32_vm86) - 1;
      }
    } else {
      sregs.cs.selector = 0;
      sregs.cs.base = 0;
    }
  } else if (text_type == 16) {
    if (flags & KVM_SETUP_CPL3) {
      sregs.cs = seg_cs16;
      sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16;
      text_prefix = kvm_asm16_cpl3;
      text_prefix_size = sizeof(kvm_asm16_cpl3) - 1;
    } else {
      sregs.cr0 |= CR0_PE;
      sregs.cs = seg_cs16;
      sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16;
    }
  } else if (text_type == 32) {
    sregs.cr0 |= CR0_PE;
    sregs.efer |= EFER_SCE;
    setup_syscall_msrs(cpufd, SEL_CS32, SEL_CS32_CPL3);
    setup_32bit_idt(&sregs, host_mem, guest_mem);
    if (flags & KVM_SETUP_SMM) {
      sregs.cs = seg_cs32;
      sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
      NONFAILING(*(host_mem + ADDR_TEXT) = 0xf4);
      host_text = host_mem + 0x8000;
      ioctl(cpufd, KVM_SMI, 0);
    } else if (flags & KVM_SETUP_PAGING) {
      sregs.cs = seg_cs32;
      sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
      uint64_t pd_addr = guest_mem + ADDR_PD;
      uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD);
      NONFAILING(pd[0] = PDE32_PRESENT | PDE32_RW | PDE32_USER | PDE32_PS);
      sregs.cr3 = pd_addr;
      sregs.cr4 |= CR4_PSE;
      text_prefix = kvm_asm32_paged;
      text_prefix_size = sizeof(kvm_asm32_paged) - 1;
    } else if (flags & KVM_SETUP_CPL3) {
      sregs.cs = seg_cs32_cpl3;
      sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32_cpl3;
    } else {
      sregs.cs = seg_cs32;
      sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
    }
  } else {
    sregs.efer |= EFER_LME | EFER_SCE;
    sregs.cr0 |= CR0_PE;
    setup_syscall_msrs(cpufd, SEL_CS64, SEL_CS64_CPL3);
    setup_64bit_idt(&sregs, host_mem, guest_mem);
    sregs.cs = seg_cs32;
    sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
    uint64_t pml4_addr = guest_mem + ADDR_PML4;
    uint64_t* pml4 = (uint64_t*)(host_mem + ADDR_PML4);
    uint64_t pdpt_addr = guest_mem + ADDR_PDP;
    uint64_t* pdpt = (uint64_t*)(host_mem + ADDR_PDP);
    uint64_t pd_addr = guest_mem + ADDR_PD;
    uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD);
    NONFAILING(pml4[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | pdpt_addr);
    NONFAILING(pdpt[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | pd_addr);
    NONFAILING(pd[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | PDE64_PS);
    sregs.cr3 = pml4_addr;
    sregs.cr4 |= CR4_PAE;
    if (flags & KVM_SETUP_VM) {
      sregs.cr0 |= CR0_NE;
      NONFAILING(*((uint64_t*)(host_mem + ADDR_VAR_VMXON_PTR)) =
                     ADDR_VAR_VMXON);
      NONFAILING(*((uint64_t*)(host_mem + ADDR_VAR_VMCS_PTR)) = ADDR_VAR_VMCS);
      NONFAILING(memcpy(host_mem + ADDR_VAR_VMEXIT_CODE, kvm_asm64_vm_exit,
                        sizeof(kvm_asm64_vm_exit) - 1));
      NONFAILING(*((uint64_t*)(host_mem + ADDR_VAR_VMEXIT_PTR)) =
                     ADDR_VAR_VMEXIT_CODE);
      text_prefix = kvm_asm64_init_vm;
      text_prefix_size = sizeof(kvm_asm64_init_vm) - 1;
    } else if (flags & KVM_SETUP_CPL3) {
      text_prefix = kvm_asm64_cpl3;
      text_prefix_size = sizeof(kvm_asm64_cpl3) - 1;
    } else {
      text_prefix = kvm_asm64_enable_long;
      text_prefix_size = sizeof(kvm_asm64_enable_long) - 1;
    }
  }
  struct tss16 tss16;
  memset(&tss16, 0, sizeof(tss16));
  tss16.ss0 = tss16.ss1 = tss16.ss2 = SEL_DS16;
  tss16.sp0 = tss16.sp1 = tss16.sp2 = ADDR_STACK0;
  tss16.ip = ADDR_VAR_USER_CODE2;
  tss16.flags = (1 << 1);
  tss16.cs = SEL_CS16;
  tss16.es = tss16.ds = tss16.ss = SEL_DS16;
  tss16.ldt = SEL_LDT;
  struct tss16* tss16_addr = (struct tss16*)(host_mem + seg_tss16_2.base);
  NONFAILING(memcpy(tss16_addr, &tss16, sizeof(tss16)));
  memset(&tss16, 0, sizeof(tss16));
  tss16.ss0 = tss16.ss1 = tss16.ss2 = SEL_DS16;
  tss16.sp0 = tss16.sp1 = tss16.sp2 = ADDR_STACK0;
  tss16.ip = ADDR_VAR_USER_CODE2;
  tss16.flags = (1 << 1);
  tss16.cs = SEL_CS16_CPL3;
  tss16.es = tss16.ds = tss16.ss = SEL_DS16_CPL3;
  tss16.ldt = SEL_LDT;
  struct tss16* tss16_cpl3_addr =
      (struct tss16*)(host_mem + seg_tss16_cpl3.base);
  NONFAILING(memcpy(tss16_cpl3_addr, &tss16, sizeof(tss16)));
  struct tss32 tss32;
  memset(&tss32, 0, sizeof(tss32));
  tss32.ss0 = tss32.ss1 = tss32.ss2 = SEL_DS32;
  tss32.sp0 = tss32.sp1 = tss32.sp2 = ADDR_STACK0;
  tss32.ip = ADDR_VAR_USER_CODE;
  tss32.flags = (1 << 1) | (1 << 17);
  tss32.ldt = SEL_LDT;
  tss32.cr3 = sregs.cr3;
  tss32.io_bitmap = offsetof(struct tss32, io_bitmap);
  struct tss32* tss32_addr = (struct tss32*)(host_mem + seg_tss32_vm86.base);
  NONFAILING(memcpy(tss32_addr, &tss32, sizeof(tss32)));
  memset(&tss32, 0, sizeof(tss32));
  tss32.ss0 = tss32.ss1 = tss32.ss2 = SEL_DS32;
  tss32.sp0 = tss32.sp1 = tss32.sp2 = ADDR_STACK0;
  tss32.ip = ADDR_VAR_USER_CODE;
  tss32.flags = (1 << 1);
  tss32.cr3 = sregs.cr3;
  tss32.es = tss32.ds = tss32.ss = tss32.gs = tss32.fs = SEL_DS32;
  tss32.cs = SEL_CS32;
  tss32.ldt = SEL_LDT;
  tss32.cr3 = sregs.cr3;
  tss32.io_bitmap = offsetof(struct tss32, io_bitmap);
  struct tss32* tss32_cpl3_addr = (struct tss32*)(host_mem + seg_tss32_2.base);
  NONFAILING(memcpy(tss32_cpl3_addr, &tss32, sizeof(tss32)));
  struct tss64 tss64;
  memset(&tss64, 0, sizeof(tss64));
  tss64.rsp[0] = ADDR_STACK0;
  tss64.rsp[1] = ADDR_STACK0;
  tss64.rsp[2] = ADDR_STACK0;
  tss64.io_bitmap = offsetof(struct tss64, io_bitmap);
  struct tss64* tss64_addr = (struct tss64*)(host_mem + seg_tss64.base);
  NONFAILING(memcpy(tss64_addr, &tss64, sizeof(tss64)));
  memset(&tss64, 0, sizeof(tss64));
  tss64.rsp[0] = ADDR_STACK0;
  tss64.rsp[1] = ADDR_STACK0;
  tss64.rsp[2] = ADDR_STACK0;
  tss64.io_bitmap = offsetof(struct tss64, io_bitmap);
  struct tss64* tss64_cpl3_addr =
      (struct tss64*)(host_mem + seg_tss64_cpl3.base);
  NONFAILING(memcpy(tss64_cpl3_addr, &tss64, sizeof(tss64)));
  if (text_size > 1000)
    text_size = 1000;
  if (text_prefix) {
    NONFAILING(memcpy(host_text, text_prefix, text_prefix_size));
    void* patch = 0;
    NONFAILING(patch =
                   memmem(host_text, text_prefix_size, "\xde\xc0\xad\x0b", 4));
    if (patch)
      NONFAILING(*((uint32_t*)patch) =
                     guest_mem + ADDR_TEXT + ((char*)patch - host_text) + 6);
    uint16_t magic = PREFIX_SIZE;
    patch = 0;
    NONFAILING(patch =
                   memmem(host_text, text_prefix_size, &magic, sizeof(magic)));
    if (patch)
      NONFAILING(*((uint16_t*)patch) =
                     guest_mem + ADDR_TEXT + text_prefix_size);
  }
  NONFAILING(memcpy((void*)(host_text + text_prefix_size), text, text_size));
  NONFAILING(*(host_text + text_prefix_size + text_size) = 0xf4);
  NONFAILING(memcpy(host_mem + ADDR_VAR_USER_CODE, text, text_size));
  NONFAILING(*(host_mem + ADDR_VAR_USER_CODE + text_size) = 0xf4);
  NONFAILING(*(host_mem + ADDR_VAR_HLT) = 0xf4);
  NONFAILING(memcpy(host_mem + ADDR_VAR_SYSRET, "\x0f\x07\xf4", 3));
  NONFAILING(memcpy(host_mem + ADDR_VAR_SYSEXIT, "\x0f\x35\xf4", 3));
  NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_FLD) = 0);
  NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_VAL) = 0);
  if (opt_count > 2)
    opt_count = 2;
  for (i = 0; i < opt_count; i++) {
    uint64_t typ = 0;
    uint64_t val = 0;
    NONFAILING(typ = opt_array_ptr[i].typ);
    NONFAILING(val = opt_array_ptr[i].val);
    switch (typ % 9) {
    case 0:
      sregs.cr0 ^= val & (CR0_MP | CR0_EM | CR0_ET | CR0_NE | CR0_WP | CR0_AM |
                          CR0_NW | CR0_CD);
      break;
    case 1:
      sregs.cr4 ^=
          val & (CR4_VME | CR4_PVI | CR4_TSD | CR4_DE | CR4_MCE | CR4_PGE |
                 CR4_PCE | CR4_OSFXSR | CR4_OSXMMEXCPT | CR4_UMIP | CR4_VMXE |
                 CR4_SMXE | CR4_FSGSBASE | CR4_PCIDE | CR4_OSXSAVE | CR4_SMEP |
                 CR4_SMAP | CR4_PKE);
      break;
    case 2:
      sregs.efer ^= val & (EFER_SCE | EFER_NXE | EFER_SVME | EFER_LMSLE |
                           EFER_FFXSR | EFER_TCE);
      break;
    case 3:
      val &=
          ((1 << 8) | (1 << 9) | (1 << 10) | (1 << 12) | (1 << 13) | (1 << 14) |
           (1 << 15) | (1 << 18) | (1 << 19) | (1 << 20) | (1 << 21));
      regs.rflags ^= val;
      NONFAILING(tss16_addr->flags ^= val);
      NONFAILING(tss16_cpl3_addr->flags ^= val);
      NONFAILING(tss32_addr->flags ^= val);
      NONFAILING(tss32_cpl3_addr->flags ^= val);
      break;
    case 4:
      seg_cs16.type = val & 0xf;
      seg_cs32.type = val & 0xf;
      seg_cs64.type = val & 0xf;
      break;
    case 5:
      seg_cs16_cpl3.type = val & 0xf;
      seg_cs32_cpl3.type = val & 0xf;
      seg_cs64_cpl3.type = val & 0xf;
      break;
    case 6:
      seg_ds16.type = val & 0xf;
      seg_ds32.type = val & 0xf;
      seg_ds64.type = val & 0xf;
      break;
    case 7:
      seg_ds16_cpl3.type = val & 0xf;
      seg_ds32_cpl3.type = val & 0xf;
      seg_ds64_cpl3.type = val & 0xf;
      break;
    case 8:
      NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_FLD) =
                     (val & 0xffff));
      NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_VAL) = (val >> 16));
      break;
    default:
      exit(1);
    }
  }
  regs.rflags |= 2;
  fill_segment_descriptor(gdt, ldt, &seg_ldt);
  fill_segment_descriptor(gdt, ldt, &seg_cs16);
  fill_segment_descriptor(gdt, ldt, &seg_ds16);
  fill_segment_descriptor(gdt, ldt, &seg_cs16_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_ds16_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_cs32);
  fill_segment_descriptor(gdt, ldt, &seg_ds32);
  fill_segment_descriptor(gdt, ldt, &seg_cs32_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_ds32_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_cs64);
  fill_segment_descriptor(gdt, ldt, &seg_ds64);
  fill_segment_descriptor(gdt, ldt, &seg_cs64_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_ds64_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_tss32);
  fill_segment_descriptor(gdt, ldt, &seg_tss32_2);
  fill_segment_descriptor(gdt, ldt, &seg_tss32_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_tss32_vm86);
  fill_segment_descriptor(gdt, ldt, &seg_tss16);
  fill_segment_descriptor(gdt, ldt, &seg_tss16_2);
  fill_segment_descriptor(gdt, ldt, &seg_tss16_cpl3);
  fill_segment_descriptor_dword(gdt, ldt, &seg_tss64);
  fill_segment_descriptor_dword(gdt, ldt, &seg_tss64_cpl3);
  fill_segment_descriptor(gdt, ldt, &seg_cgate16);
  fill_segment_descriptor(gdt, ldt, &seg_tgate16);
  fill_segment_descriptor(gdt, ldt, &seg_cgate32);
  fill_segment_descriptor(gdt, ldt, &seg_tgate32);
  fill_segment_descriptor_dword(gdt, ldt, &seg_cgate64);
  if (ioctl(cpufd, KVM_SET_SREGS, &sregs))
    return -1;
  if (ioctl(cpufd, KVM_SET_REGS, &regs))
    return -1;
  return 0;
}

static bool write_file(const char* file, const char* what, ...)
{
  char buf[1024];
  va_list args;
  va_start(args, what);
  vsnprintf(buf, sizeof(buf), what, args);
  va_end(args);
  buf[sizeof(buf) - 1] = 0;
  int len = strlen(buf);
  int fd = open(file, O_WRONLY | O_CLOEXEC);
  if (fd == -1)
    return false;
  if (write(fd, buf, len) != len) {
    int err = errno;
    close(fd);
    errno = err;
    return false;
  }
  close(fd);
  return true;
}

#define XT_TABLE_SIZE 1536
#define XT_MAX_ENTRIES 10

struct xt_counters {
  uint64_t pcnt, bcnt;
};

struct ipt_getinfo {
  char name[32];
  unsigned int valid_hooks;
  unsigned int hook_entry[5];
  unsigned int underflow[5];
  unsigned int num_entries;
  unsigned int size;
};

struct ipt_get_entries {
  char name[32];
  unsigned int size;
  void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};

struct ipt_replace {
  char name[32];
  unsigned int valid_hooks;
  unsigned int num_entries;
  unsigned int size;
  unsigned int hook_entry[5];
  unsigned int underflow[5];
  unsigned int num_counters;
  struct xt_counters* counters;
  char entrytable[XT_TABLE_SIZE];
};

struct ipt_table_desc {
  const char* name;
  struct ipt_getinfo info;
  struct ipt_replace replace;
};

static struct ipt_table_desc ipv4_tables[] = {
    {.name = "filter"}, {.name = "nat"},      {.name = "mangle"},
    {.name = "raw"},    {.name = "security"},
};

static struct ipt_table_desc ipv6_tables[] = {
    {.name = "filter"}, {.name = "nat"},      {.name = "mangle"},
    {.name = "raw"},    {.name = "security"},
};

#define IPT_BASE_CTL 64
#define IPT_SO_SET_REPLACE (IPT_BASE_CTL)
#define IPT_SO_GET_INFO (IPT_BASE_CTL)
#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1)

struct arpt_getinfo {
  char name[32];
  unsigned int valid_hooks;
  unsigned int hook_entry[3];
  unsigned int underflow[3];
  unsigned int num_entries;
  unsigned int size;
};

struct arpt_get_entries {
  char name[32];
  unsigned int size;
  void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};

struct arpt_replace {
  char name[32];
  unsigned int valid_hooks;
  unsigned int num_entries;
  unsigned int size;
  unsigned int hook_entry[3];
  unsigned int underflow[3];
  unsigned int num_counters;
  struct xt_counters* counters;
  char entrytable[XT_TABLE_SIZE];
};

struct arpt_table_desc {
  const char* name;
  struct arpt_getinfo info;
  struct arpt_replace replace;
};

static struct arpt_table_desc arpt_tables[] = {
    {.name = "filter"},
};

#define ARPT_BASE_CTL 96
#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
#define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)

static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables,
                                int family, int level)
{
  struct ipt_get_entries entries;
  socklen_t optlen;
  int fd, i;
  fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
  if (fd == -1) {
    switch (errno) {
    case EAFNOSUPPORT:
    case ENOPROTOOPT:
      return;
    }
    exit(1);
  }
  for (i = 0; i < num_tables; i++) {
    struct ipt_table_desc* table = &tables[i];
    strcpy(table->info.name, table->name);
    strcpy(table->replace.name, table->name);
    optlen = sizeof(table->info);
    if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) {
      switch (errno) {
      case EPERM:
      case ENOENT:
      case ENOPROTOOPT:
        continue;
      }
      exit(1);
    }
    if (table->info.size > sizeof(table->replace.entrytable))
      exit(1);
    if (table->info.num_entries > XT_MAX_ENTRIES)
      exit(1);
    memset(&entries, 0, sizeof(entries));
    strcpy(entries.name, table->name);
    entries.size = table->info.size;
    optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
    if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
      exit(1);
    table->replace.valid_hooks = table->info.valid_hooks;
    table->replace.num_entries = table->info.num_entries;
    table->replace.size = table->info.size;
    memcpy(table->replace.hook_entry, table->info.hook_entry,
           sizeof(table->replace.hook_entry));
    memcpy(table->replace.underflow, table->info.underflow,
           sizeof(table->replace.underflow));
    memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
  }
  close(fd);
}

static void reset_iptables(struct ipt_table_desc* tables, int num_tables,
                           int family, int level)
{
  struct xt_counters counters[XT_MAX_ENTRIES];
  struct ipt_get_entries entries;
  struct ipt_getinfo info;
  socklen_t optlen;
  int fd, i;
  fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
  if (fd == -1) {
    switch (errno) {
    case EAFNOSUPPORT:
    case ENOPROTOOPT:
      return;
    }
    exit(1);
  }
  for (i = 0; i < num_tables; i++) {
    struct ipt_table_desc* table = &tables[i];
    if (table->info.valid_hooks == 0)
      continue;
    memset(&info, 0, sizeof(info));
    strcpy(info.name, table->name);
    optlen = sizeof(info);
    if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen))
      exit(1);
    if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
      memset(&entries, 0, sizeof(entries));
      strcpy(entries.name, table->name);
      entries.size = table->info.size;
      optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
      if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
        exit(1);
      if (memcmp(table->replace.entrytable, entries.entrytable,
                 table->info.size) == 0)
        continue;
    }
    table->replace.num_counters = info.num_entries;
    table->replace.counters = counters;
    optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) +
             table->replace.size;
    if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen))
      exit(1);
  }
  close(fd);
}

static void checkpoint_arptables(void)
{
  struct arpt_get_entries entries;
  socklen_t optlen;
  unsigned i;
  int fd;
  fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if (fd == -1) {
    switch (errno) {
    case EAFNOSUPPORT:
    case ENOPROTOOPT:
      return;
    }
    exit(1);
  }
  for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
    struct arpt_table_desc* table = &arpt_tables[i];
    strcpy(table->info.name, table->name);
    strcpy(table->replace.name, table->name);
    optlen = sizeof(table->info);
    if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) {
      switch (errno) {
      case EPERM:
      case ENOENT:
      case ENOPROTOOPT:
        continue;
      }
      exit(1);
    }
    if (table->info.size > sizeof(table->replace.entrytable))
      exit(1);
    if (table->info.num_entries > XT_MAX_ENTRIES)
      exit(1);
    memset(&entries, 0, sizeof(entries));
    strcpy(entries.name, table->name);
    entries.size = table->info.size;
    optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
    if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
      exit(1);
    table->replace.valid_hooks = table->info.valid_hooks;
    table->replace.num_entries = table->info.num_entries;
    table->replace.size = table->info.size;
    memcpy(table->replace.hook_entry, table->info.hook_entry,
           sizeof(table->replace.hook_entry));
    memcpy(table->replace.underflow, table->info.underflow,
           sizeof(table->replace.underflow));
    memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
  }
  close(fd);
}

static void reset_arptables()
{
  struct xt_counters counters[XT_MAX_ENTRIES];
  struct arpt_get_entries entries;
  struct arpt_getinfo info;
  socklen_t optlen;
  unsigned i;
  int fd;
  fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if (fd == -1) {
    switch (errno) {
    case EAFNOSUPPORT:
    case ENOPROTOOPT:
      return;
    }
    exit(1);
  }
  for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
    struct arpt_table_desc* table = &arpt_tables[i];
    if (table->info.valid_hooks == 0)
      continue;
    memset(&info, 0, sizeof(info));
    strcpy(info.name, table->name);
    optlen = sizeof(info);
    if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen))
      exit(1);
    if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
      memset(&entries, 0, sizeof(entries));
      strcpy(entries.name, table->name);
      entries.size = table->info.size;
      optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
      if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
        exit(1);
      if (memcmp(table->replace.entrytable, entries.entrytable,
                 table->info.size) == 0)
        continue;
    } else {
    }
    table->replace.num_counters = info.num_entries;
    table->replace.counters = counters;
    optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) +
             table->replace.size;
    if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen))
      exit(1);
  }
  close(fd);
}

#define NF_BR_NUMHOOKS 6
#define EBT_TABLE_MAXNAMELEN 32
#define EBT_CHAIN_MAXNAMELEN 32
#define EBT_BASE_CTL 128
#define EBT_SO_SET_ENTRIES (EBT_BASE_CTL)
#define EBT_SO_GET_INFO (EBT_BASE_CTL)
#define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1)
#define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1)
#define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1)

struct ebt_replace {
  char name[EBT_TABLE_MAXNAMELEN];
  unsigned int valid_hooks;
  unsigned int nentries;
  unsigned int entries_size;
  struct ebt_entries* hook_entry[NF_BR_NUMHOOKS];
  unsigned int num_counters;
  struct ebt_counter* counters;
  char* entries;
};

struct ebt_entries {
  unsigned int distinguisher;
  char name[EBT_CHAIN_MAXNAMELEN];
  unsigned int counter_offset;
  int policy;
  unsigned int nentries;
  char data[0] __attribute__((aligned(__alignof__(struct ebt_replace))));
};

struct ebt_table_desc {
  const char* name;
  struct ebt_replace replace;
  char entrytable[XT_TABLE_SIZE];
};

static struct ebt_table_desc ebt_tables[] = {
    {.name = "filter"},
    {.name = "nat"},
    {.name = "broute"},
};

static void checkpoint_ebtables(void)
{
  socklen_t optlen;
  unsigned i;
  int fd;
  fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if (fd == -1) {
    switch (errno) {
    case EAFNOSUPPORT:
    case ENOPROTOOPT:
      return;
    }
    exit(1);
  }
  for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
    struct ebt_table_desc* table = &ebt_tables[i];
    strcpy(table->replace.name, table->name);
    optlen = sizeof(table->replace);
    if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace,
                   &optlen)) {
      switch (errno) {
      case EPERM:
      case ENOENT:
      case ENOPROTOOPT:
        continue;
      }
      exit(1);
    }
    if (table->replace.entries_size > sizeof(table->entrytable))
      exit(1);
    table->replace.num_counters = 0;
    table->replace.entries = table->entrytable;
    optlen = sizeof(table->replace) + table->replace.entries_size;
    if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace,
                   &optlen))
      exit(1);
  }
  close(fd);
}

static void reset_ebtables()
{
  struct ebt_replace replace;
  char entrytable[XT_TABLE_SIZE];
  socklen_t optlen;
  unsigned i, j, h;
  int fd;
  fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if (fd == -1) {
    switch (errno) {
    case EAFNOSUPPORT:
    case ENOPROTOOPT:
      return;
    }
    exit(1);
  }
  for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
    struct ebt_table_desc* table = &ebt_tables[i];
    if (table->replace.valid_hooks == 0)
      continue;
    memset(&replace, 0, sizeof(replace));
    strcpy(replace.name, table->name);
    optlen = sizeof(replace);
    if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen))
      exit(1);
    replace.num_counters = 0;
    table->replace.entries = 0;
    for (h = 0; h < NF_BR_NUMHOOKS; h++)
      table->replace.hook_entry[h] = 0;
    if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) {
      memset(&entrytable, 0, sizeof(entrytable));
      replace.entries = entrytable;
      optlen = sizeof(replace) + replace.entries_size;
      if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen))
        exit(1);
      if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0)
        continue;
    }
    for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) {
      if (table->replace.valid_hooks & (1 << h)) {
        table->replace.hook_entry[h] =
            (struct ebt_entries*)table->entrytable + j;
        j++;
      }
    }
    table->replace.entries = table->entrytable;
    optlen = sizeof(table->replace) + table->replace.entries_size;
    if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen))
      exit(1);
  }
  close(fd);
}

static void checkpoint_net_namespace(void)
{
  checkpoint_ebtables();
  checkpoint_arptables();
  checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]),
                      AF_INET, SOL_IP);
  checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]),
                      AF_INET6, SOL_IPV6);
}

static void reset_net_namespace(void)
{
  reset_ebtables();
  reset_arptables();
  reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]),
                 AF_INET, SOL_IP);
  reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]),
                 AF_INET6, SOL_IPV6);
}

static void setup_cgroups()
{
  if (mkdir("/syzcgroup", 0777)) {
  }
  if (mkdir("/syzcgroup/unified", 0777)) {
  }
  if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
  }
  if (chmod("/syzcgroup/unified", 0777)) {
  }
  if (!write_file("/syzcgroup/unified/cgroup.subtree_control",
                  "+cpu +memory +io +pids +rdma")) {
  }
  if (mkdir("/syzcgroup/cpu", 0777)) {
  }
  if (mount("none", "/syzcgroup/cpu", "cgroup", 0,
            "cpuset,cpuacct,perf_event,hugetlb")) {
  }
  if (!write_file("/syzcgroup/cpu/cgroup.clone_children", "1")) {
  }
  if (chmod("/syzcgroup/cpu", 0777)) {
  }
  if (mkdir("/syzcgroup/net", 0777)) {
  }
  if (mount("none", "/syzcgroup/net", "cgroup", 0,
            "net_cls,net_prio,devices,freezer")) {
  }
  if (chmod("/syzcgroup/net", 0777)) {
  }
}
static void setup_binfmt_misc()
{
  if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) {
  }
  if (!write_file("/proc/sys/fs/binfmt_misc/register",
                  ":syz0:M:0:\x01::./file0:")) {
  }
  if (!write_file("/proc/sys/fs/binfmt_misc/register",
                  ":syz1:M:1:\x02::./file0:POC")) {
  }
}

static void setup_common()
{
  if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
  }
  setup_cgroups();
  setup_binfmt_misc();
}

static void loop();

static void sandbox_common()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  setpgrp();
  setsid();
  struct rlimit rlim;
  rlim.rlim_cur = rlim.rlim_max = 200 << 20;
  setrlimit(RLIMIT_AS, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 32 << 20;
  setrlimit(RLIMIT_MEMLOCK, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 136 << 20;
  setrlimit(RLIMIT_FSIZE, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 1 << 20;
  setrlimit(RLIMIT_STACK, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 0;
  setrlimit(RLIMIT_CORE, &rlim);
  rlim.rlim_cur = rlim.rlim_max = 256;
  setrlimit(RLIMIT_NOFILE, &rlim);
  if (unshare(CLONE_NEWNS)) {
  }
  if (unshare(CLONE_NEWIPC)) {
  }
  if (unshare(0x02000000)) {
  }
  if (unshare(CLONE_NEWUTS)) {
  }
  if (unshare(CLONE_SYSVSEM)) {
  }
}

int wait_for_loop(int pid)
{
  if (pid < 0)
    exit(1);
  int status = 0;
  while (waitpid(-1, &status, __WALL) != pid) {
  }
  return WEXITSTATUS(status);
}

static int do_sandbox_none(void)
{
  if (unshare(CLONE_NEWPID)) {
  }
  int pid = fork();
  if (pid != 0)
    return wait_for_loop(pid);
  setup_common();
  sandbox_common();
  if (unshare(CLONE_NEWNET)) {
  }
  initialize_tun();
  initialize_netdevices();
  loop();
  exit(1);
}

#define FS_IOC_SETFLAGS _IOW('f', 2, long)
static void remove_dir(const char* dir)
{
  DIR* dp;
  struct dirent* ep;
  int iter = 0;
retry:
  while (umount2(dir, MNT_DETACH) == 0) {
  }
  dp = opendir(dir);
  if (dp == NULL) {
    if (errno == EMFILE) {
      exit(1);
    }
    exit(1);
  }
  while ((ep = readdir(dp))) {
    if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
      continue;
    char filename[FILENAME_MAX];
    snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
    while (umount2(filename, MNT_DETACH) == 0) {
    }
    struct stat st;
    if (lstat(filename, &st))
      exit(1);
    if (S_ISDIR(st.st_mode)) {
      remove_dir(filename);
      continue;
    }
    int i;
    for (i = 0;; i++) {
      if (unlink(filename) == 0)
        break;
      if (errno == EPERM) {
        int fd = open(filename, O_RDONLY);
        if (fd != -1) {
          long flags = 0;
          if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
            close(fd);
          continue;
        }
      }
      if (errno == EROFS) {
        break;
      }
      if (errno != EBUSY || i > 100)
        exit(1);
      if (umount2(filename, MNT_DETACH))
        exit(1);
    }
  }
  closedir(dp);
  int i;
  for (i = 0;; i++) {
    if (rmdir(dir) == 0)
      break;
    if (i < 100) {
      if (errno == EPERM) {
        int fd = open(dir, O_RDONLY);
        if (fd != -1) {
          long flags = 0;
          if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
            close(fd);
          continue;
        }
      }
      if (errno == EROFS) {
        break;
      }
      if (errno == EBUSY) {
        if (umount2(dir, MNT_DETACH))
          exit(1);
        continue;
      }
      if (errno == ENOTEMPTY) {
        if (iter < 100) {
          iter++;
          goto retry;
        }
      }
    }
    exit(1);
  }
}

static void kill_and_wait(int pid, int* status)
{
  kill(-pid, SIGKILL);
  kill(pid, SIGKILL);
  int i;
  for (i = 0; i < 100; i++) {
    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
      return;
    usleep(1000);
  }
  DIR* dir = opendir("/sys/fs/fuse/connections");
  if (dir) {
    for (;;) {
      struct dirent* ent = readdir(dir);
      if (!ent)
        break;
      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
        continue;
      char abort[300];
      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
               ent->d_name);
      int fd = open(abort, O_WRONLY);
      if (fd == -1) {
        continue;
      }
      if (write(fd, abort, 1) < 0) {
      }
      close(fd);
    }
    closedir(dir);
  } else {
  }
  while (waitpid(-1, status, __WALL) != pid) {
  }
}

#define SYZ_HAVE_SETUP_LOOP 1
static void setup_loop()
{
  int pid = getpid();
  char cgroupdir[64];
  char file[128];
  snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
  if (mkdir(cgroupdir, 0777)) {
  }
  snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
  if (!write_file(file, "32")) {
  }
  snprintf(file, sizeof(file), "%s/memory.low", cgroupdir);
  if (!write_file(file, "%d", 298 << 20)) {
  }
  snprintf(file, sizeof(file), "%s/memory.high", cgroupdir);
  if (!write_file(file, "%d", 299 << 20)) {
  }
  snprintf(file, sizeof(file), "%s/memory.max", cgroupdir);
  if (!write_file(file, "%d", 300 << 20)) {
  }
  snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  if (!write_file(file, "%d", pid)) {
  }
  snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
  if (mkdir(cgroupdir, 0777)) {
  }
  snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  if (!write_file(file, "%d", pid)) {
  }
  snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
  if (mkdir(cgroupdir, 0777)) {
  }
  snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  if (!write_file(file, "%d", pid)) {
  }
  checkpoint_net_namespace();
}

#define SYZ_HAVE_RESET_LOOP 1
static void reset_loop()
{
  reset_net_namespace();
}

#define SYZ_HAVE_SETUP_TEST 1
static void setup_test()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  setpgrp();
  char cgroupdir[64];
  snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
  if (symlink(cgroupdir, "./cgroup")) {
  }
  snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
  if (symlink(cgroupdir, "./cgroup.cpu")) {
  }
  snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
  if (symlink(cgroupdir, "./cgroup.net")) {
  }
  if (!write_file("/proc/self/oom_score_adj", "1000")) {
  }
  flush_tun();
}

#define SYZ_HAVE_RESET_TEST 1
static void reset_test()
{
  int fd;
  for (fd = 3; fd < 30; fd++)
    close(fd);
}

struct thread_t {
  int created, call;
  event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
  struct thread_t* th = (struct thread_t*)arg;
  for (;;) {
    event_wait(&th->ready);
    event_reset(&th->ready);
    execute_call(th->call);
    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
    event_set(&th->done);
  }
  return 0;
}

static void execute_one(void)
{
  int i, call, thread;
  int collide = 0;
again:
  for (call = 0; call < 6; call++) {
    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
         thread++) {
      struct thread_t* th = &threads[thread];
      if (!th->created) {
        th->created = 1;
        event_init(&th->ready);
        event_init(&th->done);
        event_set(&th->done);
        thread_start(thr, th);
      }
      if (!event_isset(&th->done))
        continue;
      event_reset(&th->done);
      th->call = call;
      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
      event_set(&th->ready);
      if (collide && (call % 2) == 0)
        break;
      event_timedwait(&th->done, 45);
      break;
    }
  }
  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
    sleep_ms(1);
  if (!collide) {
    collide = 1;
    goto again;
  }
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
  setup_loop();
  int iter;
  for (iter = 0;; iter++) {
    char cwdbuf[32];
    sprintf(cwdbuf, "./%d", iter);
    if (mkdir(cwdbuf, 0777))
      exit(1);
    reset_loop();
    int pid = fork();
    if (pid < 0)
      exit(1);
    if (pid == 0) {
      if (chdir(cwdbuf))
        exit(1);
      setup_test();
      execute_one();
      reset_test();
      exit(0);
    }
    int status = 0;
    uint64_t start = current_time_ms();
    for (;;) {
      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
        break;
      sleep_ms(1);
      if (current_time_ms() - start < 5 * 1000)
        continue;
      kill_and_wait(pid, &status);
      break;
    }
    remove_dir(cwdbuf);
  }
}

uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff};

void execute_call(int call)
{
  long res;
  switch (call) {
  case 0:
    NONFAILING(memcpy((void*)0x20000200, "/dev/kvm", 9));
    res = syscall(__NR_openat, 0xffffffffffffff9c, 0x20000200, 0, 0);
    if (res != -1)
      r[0] = res;
    break;
  case 1:
    res = syscall(__NR_ioctl, r[0], 0xae01, 0);
    if (res != -1)
      r[1] = res;
    break;
  case 2:
    res = syscall(__NR_ioctl, r[1], 0xae41, 0);
    if (res != -1)
      r[2] = res;
    break;
  case 3:
    NONFAILING(*(uint64_t*)0x20000080 = 0x40);
    NONFAILING(*(uint64_t*)0x20000088 = 0x20000000);
    NONFAILING(memcpy(
        (void*)0x20000000,
        "\x48\xb8\x00\x00\x00\x00\x00\x00\x00\x80\x0f\x23\xd0\x0f\x21\xf8\x35"
        "\x00\x00\x00\x04\x0f\x23\xf8\x44\x0f\x20\xc0\x35\x04\x00\x00\x00\x44"
        "\x0f\x22\xc0\xc4\xa2\x79\x34\x33\xc7\x44\x24\x00\x00\x50\x00\x00\xc7"
        "\x44\x24\x02\xdc\xa8\xe3\x94\xc7\x44\x24\x06\x00\x00\x00\x00\x0f\x01"
        "\x1c\x24\x66\xba\xf8\x0c\xb8\x18\x08\xae\x8f\xef\x66\xba\xfc\x0c\xed"
        "\xf3\x46\x0f\x1a\xd3\xc4\xe2\x2a\xf5\xa5\x00\x98\x00\x00\x0f\x20\xc0"
        "\x35\x00\x00\x00\x80\x0f\x22\xc0\x45\x0f\x07\x66\xf2\x41\xf4",
        117));
    NONFAILING(*(uint64_t*)0x20000090 = 0x75);
    syz_kvm_setup_cpu(r[1], r[2], 0x20fe8000, 0x20000080, 1, 0, 0x20000100, 0);
    break;
  case 4:
    syscall(__NR_ioctl, r[2], 0xae80, 0);
    break;
  case 5:
    NONFAILING(*(uint8_t*)0x20000100 = 1);
    NONFAILING(*(uint8_t*)0x20000101 = 3);
    NONFAILING(*(uint8_t*)0x20000102 = 2);
    NONFAILING(*(uint8_t*)0x20000103 = 0);
    NONFAILING(*(uint32_t*)0x20000104 = 0x66c2);
    NONFAILING(*(uint8_t*)0x20000108 = 0);
    NONFAILING(*(uint8_t*)0x20000109 = 0xfa);
    NONFAILING(*(uint8_t*)0x2000010a = 0);
    NONFAILING(*(uint8_t*)0x2000010b = 0x80);
    NONFAILING(*(uint8_t*)0x2000010c = 9);
    NONFAILING(*(uint8_t*)0x2000010d = -1);
    NONFAILING(*(uint8_t*)0x2000010e = 1);
    NONFAILING(*(uint8_t*)0x2000010f = 0);
    NONFAILING(*(uint32_t*)0x20000110 = 9);
    NONFAILING(*(uint32_t*)0x20000114 = 0);
    NONFAILING(*(uint8_t*)0x20000118 = 9);
    NONFAILING(*(uint8_t*)0x20000119 = 8);
    NONFAILING(*(uint8_t*)0x2000011a = 5);
    NONFAILING(*(uint8_t*)0x2000011b = 0x3b);
    syscall(__NR_ioctl, r[2], 0x4040aea0, 0x20000100);
    break;
  }
}
int main(void)
{
  syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
  install_segv_handler();
  for (procid = 0; procid < 8; procid++) {
    if (fork() == 0) {
      use_temporary_dir();
      do_sandbox_none();
    }
  }
  sleep(1000000);
  return 0;
}

-----
[   36.572752] ------------[ cut here ]------------
[   36.573580] kernel BUG at
/home/ww9210/kernel/linux-v4.20-rc2/arch/x86/kvm/x86.c:353!
[   36.574931] invalid opcode: 0000 [#1] SMP PTI
[   36.575685] CPU: 1 PID: 6654 Comm: poc Not tainted 4.20.0-rc2 #4
[   36.576711] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.10.2-1ubuntu1 04/01/2014
[   36.578234] RIP: 0010:kvm_spurious_fault+0x5/0x10
[   36.579042] Code: 70 19 00 89 1d 7c e7 70 01 5b 5d e9 b5 70 19 00
e8 b0 70 19 00 0f 0b 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 e8 9b
70 19 00 <0f> 0b 66 0f 1f 84 00 00 00 00 00 41 57 41 56 49 89 fe 41 55
41 54
[   36.582164] RSP: 0018:ffffc90003617c40 EFLAGS: 00010293
[   36.583050] RAX: ffff888079952880 RBX: 0000000075e5e000 RCX: ffffffff810379c5
[   36.584249] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff888075e5e000
[   36.585455] RBP: ffff8880f5e5e000 R08: 0000000000000000 R09: 0000000000001b37
[   36.586652] R10: ffffc90003617dc0 R11: 0000000000000001 R12: ffff888075e5e000
[   36.587846] R13: 0000000000000001 R14: ffff888075fad828 R15: 0000000000014110
[   36.589048] FS:  00000000024aa940(0000) GS:ffff88807db00000(0000)
knlGS:0000000000000000
[   36.590411] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   36.591382] CR2: ffffc90003617c50 CR3: 0000000075f3e003 CR4: 00000000001626e0
[   36.592584] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 8000000000000000
[   36.593792] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[   36.594990] Call Trace:
[   36.595441]  kvm_fastop_exception+0x264/0x549c
[   36.596204]  ? vmcs_load+0x56/0xb0
[   36.596799]  vmx_vcpu_load+0x10b/0x3f0
[   36.597446]  ? schedule_timeout+0x25b/0x480
[   36.598162]  ? __queue_delayed_work+0xb7/0x120
[   36.598924]  ? kvm_arch_vcpu_load+0x8f/0x320
[   36.599653]  ? __x86_set_memory_region+0x150/0x220
[   36.600471]  ? __synchronize_srcu.part.17+0x8c/0xc0
[   36.601307]  ? trace_event_raw_event_rcu_torture_read+0xf0/0xf0
[   36.602310]  ? handle_invpcid+0x450/0x450
[   36.602999]  kvm_arch_vcpu_load+0x8f/0x320
[   36.603704]  ? vcpu_load+0x24/0x30
[   36.604292]  vcpu_load+0x24/0x30
[   36.604856]  kvm_arch_destroy_vm+0xa5/0x1e0
[   36.605570]  kvm_destroy_vm+0x178/0x2e0
[   36.606228]  ? kvm_vm_compat_ioctl+0x100/0x100
[   36.606985]  kvm_vcpu_release+0x3b/0x50
[   36.607646]  __fput+0x108/0x260
[   36.608201]  task_work_run+0xaa/0xd0
[   36.608823]  exit_to_usermode_loop+0x110/0x120
[   36.609580]  do_syscall_64+0x122/0x150
[   36.610225]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   36.611083] RIP: 0033:0x40c4b7
[   36.611613] Code: 00 00 0f 05 48 3d 00 f0 ff ff 77 3f f3 c3 0f 1f
44 00 00 53 89 fb 48 83 ec 10 e8 f4 fb ff ff 89 df 89 c2 b8 03 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 2b 89 d7 89 44 24 0c e8 36 fc ff ff 8b
44 24
[   36.614761] RSP: 002b:00007ffef2bfffd0 EFLAGS: 00000293 ORIG_RAX:
0000000000000003
[   36.616035] RAX: 0000000000000000 RBX: 0000000000000005 RCX: 000000000040c4b7
[   36.617239] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005
[   36.618437] RBP: 00007ffef2c00000 R08: 0000000000000000 R09: 000000000000002d
[   36.619636] R10: 00007ffef2bfffb0 R11: 0000000000000293 R12: 000000000040e6f0
[   36.620839] R13: 0000000000000000 R14: 00000000006e0018 R15: 0000000000000000
[   36.622039] Modules linked in:
[   36.622570] Dumping ftrace buffer:
[   36.623156]    (ftrace buffer empty)
[   36.623800] ------------[ cut here ]------------
[   36.624586] kernel BUG at
/home/ww9210/kernel/linux-v4.20-rc2/arch/x86/kvm/x86.c:353!
[   36.625912] invalid opcode: 0000 [#2] SMP PTI
[   36.626655] CPU: 1 PID: 6654 Comm: poc Tainted: G      D
4.20.0-rc2 #4
[   36.627894] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.10.2-1ubuntu1 04/01/2014
[   36.629403] RIP: 0010:kvm_spurious_fault+0x5/0x10
[   36.630205] Code: 70 19 00 89 1d 7c e7 70 01 5b 5d e9 b5 70 19 00
e8 b0 70 19 00 0f 0b 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 e8 9b
70 19 00 <0f> 0b 66 0f 1f 84 00 00 00 00 00 41 57 41 56 49 89 fe 41 55
41 54
[   36.633325] RSP: 0018:ffff88807db03f50 EFLAGS: 00010006
[   36.634213] RAX: ffff888079952880 RBX: 000000007549c000 RCX: ffffffff810379c5
[   36.635409] RDX: 0000000000010000 RSI: 0000000000000000 RDI: ffff88807549c000
[   36.636607] RBP: ffff8880f549c000 R08: 0000000000000000 R09: 0000000000000000
[   36.637809] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88807549c000
[   36.639010] R13: ffff88807db00000 R14: ffff8880754cd828 R15: 0000000000000000
[   36.640211] FS:  00000000024aa940(0000) GS:ffff88807db00000(0000)
knlGS:0000000000000000
[   36.641574] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   36.642545] CR2: ffff88807db03f60 CR3: 0000000075f3e003 CR4: 00000000001626e0
[   36.643744] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 8000000000000000
[   36.644949] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[   36.646149] Call Trace:
[   36.646578]  <IRQ>
[   36.646940]  kvm_fastop_exception+0x24c/0x549c
[   36.647700]  ? vmcs_clear+0x4c/0xa0
[   36.648303]  __loaded_vmcs_clear+0x8b/0xf0
[   36.649009]  ? crash_vmclear_local_loaded_vmcss+0x80/0x80
[   36.649929]  flush_smp_call_function_queue+0x66/0x160
[   36.650788]  smp_call_function_single_interrupt+0x35/0xd0
[   36.651706]  call_function_single_interrupt+0xf/0x20
[   36.652548]  </IRQ>
[   36.652927] RIP: 0010:oops_end+0x56/0xc0
[   36.653602] Code: 00 00 00 bf 07 00 00 00 c7 05 22 03 58 01 ff ff
ff ff e8 dd e0 04 00 83 2d 7e 2c d6 01 01 75 07 c6 05 79 2c d6 01 00
41 54 9d <e8> 05 e6 04 00 be 02 00 00 00 48 c7 c7 80 41 e1 82 e8 74 b2
ff ff
[   36.656733] RSP: 0018:ffffc90003617b10 EFLAGS: 00000293 ORIG_RAX:
ffffffffffffff04
[   36.658010] RAX: ffff888079952880 RBX: 000000000000000b RCX: ffffffff810ff5d5
[   36.659213] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000007
[   36.660415] RBP: ffffc90003617b98 R08: 0000000c090c7fca R09: 0000000000000018
[   36.661625] R10: 0000000000000003 R11: 000000000000002b R12: 0000000000000293
[   36.662828] R13: 0000000000000004 R14: 0000000000000002 R15: ffffffff82402acb
[   36.664039]  ? add_taint+0x15/0x50
[   36.664637]  do_trap+0x81/0x100
[   36.665187]  ? kvm_spurious_fault+0x5/0x10
[   36.665887]  do_invalid_op+0x31/0x40
[   36.666502]  ? kvm_spurious_fault+0x5/0x10
[   36.667204]  invalid_op+0x14/0x20
[   36.667776] RIP: 0010:kvm_spurious_fault+0x5/0x10
[   36.668580] Code: 70 19 00 89 1d 7c e7 70 01 5b 5d e9 b5 70 19 00
e8 b0 70 19 00 0f 0b 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 e8 9b
70 19 00 <0f> 0b 66 0f 1f 84 00 00 00 00 00 41 57 41 56 49 89 fe 41 55
41 54
[   36.671710] RSP: 0018:ffffc90003617c40 EFLAGS: 00010293
[   36.672598] RAX: ffff888079952880 RBX: 0000000075e5e000 RCX: ffffffff810379c5
[   36.673809] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff888075e5e000
[   36.675011] RBP: ffff8880f5e5e000 R08: 0000000000000000 R09: 0000000000001b37
[   36.676207] R10: ffffc90003617dc0 R11: 0000000000000001 R12: ffff888075e5e000
[   36.677411] R13: 0000000000000001 R14: ffff888075fad828 R15: 0000000000014110
[   36.678618]  ? kvm_spurious_fault+0x5/0x10
[   36.679323]  kvm_fastop_exception+0x264/0x549c
[   36.680084]  ? vmcs_load+0x56/0xb0
[   36.680678]  vmx_vcpu_load+0x10b/0x3f0
[   36.681324]  ? schedule_timeout+0x25b/0x480
[   36.682042]  ? __queue_delayed_work+0xb7/0x120
[   36.682804]  ? kvm_arch_vcpu_load+0x8f/0x320
[   36.683534]  ? __x86_set_memory_region+0x150/0x220
[   36.684354]  ? __synchronize_srcu.part.17+0x8c/0xc0
[   36.685194]  ? trace_event_raw_event_rcu_torture_read+0xf0/0xf0
[   36.686196]  ? handle_invpcid+0x450/0x450
[   36.686884]  kvm_arch_vcpu_load+0x8f/0x320
[   36.687588]  ? vcpu_load+0x24/0x30
[   36.688175]  vcpu_load+0x24/0x30
[   36.688740]  kvm_arch_destroy_vm+0xa5/0x1e0
[   36.689454]  kvm_destroy_vm+0x178/0x2e0
[   36.690112]  ? kvm_vm_compat_ioctl+0x100/0x100
[   36.690870]  kvm_vcpu_release+0x3b/0x50
[   36.691533]  __fput+0x108/0x260
[   36.692078]  task_work_run+0xaa/0xd0
[   36.692701]  exit_to_usermode_loop+0x110/0x120
[   36.693481]  do_syscall_64+0x122/0x150
[   36.694128]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   36.694986] RIP: 0033:0x40c4b7
[   36.695516] Code: 00 00 0f 05 48 3d 00 f0 ff ff 77 3f f3 c3 0f 1f
44 00 00 53 89 fb 48 83 ec 10 e8 f4 fb ff ff 89 df 89 c2 b8 03 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 2b 89 d7 89 44 24 0c e8 36 fc ff ff 8b
44 24
[   36.698645] RSP: 002b:00007ffef2bfffd0 EFLAGS: 00000293 ORIG_RAX:
0000000000000003
[   36.699919] RAX: 0000000000000000 RBX: 0000000000000005 RCX: 000000000040c4b7
[   36.701126] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005
[   36.702329] RBP: 00007ffef2c00000 R08: 0000000000000000 R09: 000000000000002d
[   36.703532] R10: 00007ffef2bfffb0 R11: 0000000000000293 R12: 000000000040e6f0
[   36.704739] R13: 0000000000000000 R14: 00000000006e0018 R15: 0000000000000000
[   36.705942] Modules linked in:
[   36.706473] Dumping ftrace buffer:
[   36.707061]    (ftrace buffer empty)
[   36.707679] ---[ end trace 8155a549cb1669e1 ]---
[   36.708470] RIP: 0010:kvm_spurious_fault+0x5/0x10
[   36.709277] Code: 70 19 00 89 1d 7c e7 70 01 5b 5d e9 b5 70 19 00
e8 b0 70 19 00 0f 0b 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 e8 9b
70 19 00 <0f> 0b 66 0f 1f 84 00 00 00 00 00 41 57 41 56 49 89 fe 41 55
41 54
[   36.712392] RSP: 0018:ffffc90003617c40 EFLAGS: 00010293
[   36.713287] RAX: ffff888079952880 RBX: 0000000075e5e000 RCX: ffffffff810379c5
[   36.714485] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff888075e5e000
[   36.715686] RBP: ffff8880f5e5e000 R08: 0000000000000000 R09: 0000000000001b37
[   36.716891] R10: ffffc90003617dc0 R11: 0000000000000001 R12: ffff888075e5e000
[   36.718093] R13: 0000000000000001 R14: ffff888075fad828 R15: 0000000000014110
[   36.719296] FS:  00000000024aa940(0000) GS:ffff88807db00000(0000)
knlGS:0000000000000000
[   36.720661] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   36.721632] CR2: ffff88807db03f60 CR3: 0000000075f3e003 CR4: 00000000001626e0
[   36.722835] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 8000000000000000
[   36.724035] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[   36.725241] Kernel panic - not syncing: Fatal exception in interrupt
[   36.726356] Dumping ftrace buffer:
[   36.726939]    (ftrace buffer empty)
[   36.727572] Kernel Offset: disabled
[   36.728168] ---[ end Kernel panic - not syncing: Fatal exception in
interrupt ]---
[   36.729444] ------------[ cut here ]------------
[   36.730220] sched: Unexpected reschedule of offline CPU#0!
[   36.731153] WARNING: CPU: 1 PID: 6654 at
/home/ww9210/kernel/linux-v4.20-rc2/arch/x86/kernel/smp.c:128
native_smp_send_reschedule+0x2f/0x40
[   36.733220] Kernel panic - not syncing: panic_on_warn set ...
[   36.734199] Dumping ftrace buffer:
[   36.734776]    (ftrace buffer empty)
[   36.735381] Kernel Offset: disabled
[   36.735972] ---[ end Kernel panic - not syncing: panic_on_warn set ... ]---




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux