[io-uring] use-after-free in io_cqring_wait

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Syzkaller hit 'KASAN: use-after-free Read in io_cqring_wait' bug.

==================================================================
BUG: KASAN: use-after-free in io_cqring_wait+0x16bc/0x1780
io_uring/io_uring.c:2630
Read of size 4 at addr ffff88807d128008 by task syz-executor994/8389

CPU: 3 UID: 0 PID: 8389 Comm: syz-executor994 Not tainted
6.12.0-rc4-00089-g7eb75ce75271-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0x82/0xd0 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:377 [inline]
 print_report+0xc0/0x5e0 mm/kasan/report.c:488
 kasan_report+0xbd/0xf0 mm/kasan/report.c:601
 io_cqring_wait+0x16bc/0x1780 io_uring/io_uring.c:2630
 __do_sys_io_uring_enter+0xf37/0x15d0 io_uring/io_uring.c:3434
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xcb/0x250 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f1112ee1eed
Code: c3 e8 d7 1e 00 00 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 89 f8 48
89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d
01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f1112e87198 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa
RAX: ffffffffffffffda RBX: 00007f1112f7c208 RCX: 00007f1112ee1eed
RDX: 0000000000001737 RSI: 0000000000002751 RDI: 0000000000000003
RBP: 00007f1112f7c200 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000005 R11: 0000000000000246 R12: 000000000000481a
R13: 0000000000000003 R14: 00007f1112eab0e0 R15: 00007f1112e67000
 </TASK>

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000
index:0xffff88807d12e000 pfn:0x7d128
flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff)
raw: 00fff00000000000 ffffea0000b56708 ffffea0001f44b08 0000000000000000
raw: ffff88807d12e000 0000000000000000 00000000ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as freed
page last allocated via order 2, migratetype Unmovable, gfp_mask
0x442dc0(GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_COMP|__GFP_ZERO), pid
8389, tgid 8388 (syz-executor994), ts 35530280269, free_ts 35581689370
 set_page_owner include/linux/page_owner.h:32 [inline]
 post_alloc_hook+0x2e7/0x350 mm/page_alloc.c:1537
 prep_new_page mm/page_alloc.c:1545 [inline]
 get_page_from_freelist+0xdf6/0x2800 mm/page_alloc.c:3457
 __alloc_pages_noprof+0x219/0x21e0 mm/page_alloc.c:4733
 alloc_pages_mpol_noprof+0x1cc/0x510 mm/mempolicy.c:2265
 io_mem_alloc_compound io_uring/memmap.c:29 [inline]
 io_pages_map+0xe5/0x500 io_uring/memmap.c:73
 io_register_resize_rings+0x377/0x14b0 io_uring/register.c:442
 __io_uring_register+0x1821/0x2290 io_uring/register.c:810
 __do_sys_io_uring_register io_uring/register.c:907 [inline]
 __se_sys_io_uring_register io_uring/register.c:884 [inline]
 __x64_sys_io_uring_register+0x178/0x2b0 io_uring/register.c:884
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xcb/0x250 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
page last free pid 8391 tgid 8388 stack trace:
 reset_page_owner include/linux/page_owner.h:25 [inline]
 free_pages_prepare mm/page_alloc.c:1108 [inline]
 free_unref_page+0x63f/0xdd0 mm/page_alloc.c:2638
 __folio_put+0x23f/0x2f0 mm/swap.c:126
 folio_put include/linux/mm.h:1478 [inline]
 put_page+0x21b/0x280 include/linux/mm.h:1550
 io_pages_unmap+0x1aa/0x3c0 io_uring/memmap.c:114
 io_register_free_rings.isra.0+0x67/0x1b0 io_uring/register.c:382
 io_register_resize_rings+0x101c/0x14b0 io_uring/register.c:565
 __io_uring_register+0x1821/0x2290 io_uring/register.c:810
 __do_sys_io_uring_register io_uring/register.c:907 [inline]
 __se_sys_io_uring_register io_uring/register.c:884 [inline]
 __x64_sys_io_uring_register+0x178/0x2b0 io_uring/register.c:884
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xcb/0x250 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Memory state around the buggy address:
 ffff88807d127f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88807d127f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88807d128000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                      ^
 ffff88807d128080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ffff88807d128100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
==================================================================


Syzkaller reproducer:
# {Threaded:true Repeat:false RepeatTimes:0 Procs:1 Slowdown:1
Sandbox: SandboxArg:0 Leak:false NetInjection:false NetDevices:false
NetReset:false Cgroups:false BinfmtMisc:false CloseFDs:false
KCSAN:false DevlinkPCI:false NicVF:false USB:false VhciInjection:false
Wifi:false IEEE802154:false Sysctl:false Swap:false UseTmpDir:false
HandleSegv:false Repro:false Trace:false LegacyOptions:{Collide:false
Fault:false FaultCall:0 FaultNth:0}}
r0 = syz_io_uring_setup(0x481a, &(0x7f0000003ac0)={0x0, 0x0, 0x2},
&(0x7f0000003b40), &(0x7f0000003b80))
io_uring_register$IORING_REGISTER_IOWQ_AFF(r0, 0x21,
&(0x7f00000025c0)="fc", 0x1)
io_uring_enter(r0, 0x2751, 0x1737, 0x5, 0x0, 0x0)
io_uring_register$IORING_REGISTER_IOWQ_AFF(r0, 0x21,
&(0x7f00000025c0)="fc", 0x1)


C reproducer:
// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <endian.h>
#include <errno.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>

#include <linux/futex.h>

#ifndef __NR_io_uring_enter
#define __NR_io_uring_enter 426
#endif
#ifndef __NR_io_uring_register
#define __NR_io_uring_register 427
#endif
#ifndef __NR_io_uring_setup
#define __NR_io_uring_setup 425
#endif

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void thread_start(void* (*fn)(void*), void* arg)
{
  pthread_t th;
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setstacksize(&attr, 128 << 10);
  int i = 0;
  for (; i < 100; i++) {
    if (pthread_create(&th, &attr, fn, arg) == 0) {
      pthread_attr_destroy(&attr);
      return;
    }
    if (errno == EAGAIN) {
      usleep(50);
      continue;
    }
    break;
  }
  exit(1);
}

typedef struct {
  int state;
} event_t;

static void event_init(event_t* ev)
{
  ev->state = 0;
}

static void event_reset(event_t* ev)
{
  ev->state = 0;
}

static void event_set(event_t* ev)
{
  if (ev->state)
    exit(1);
  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
  syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}

static void event_wait(event_t* ev)
{
  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
  uint64_t start = current_time_ms();
  uint64_t now = start;
  for (;;) {
    uint64_t remain = timeout - (now - start);
    struct timespec ts;
    ts.tv_sec = remain / 1000;
    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
      return 1;
    now = current_time_ms();
    if (now - start > timeout)
      return 0;
  }
}

#define SIZEOF_IO_URING_SQE 64
#define SIZEOF_IO_URING_CQE 16
#define SQ_HEAD_OFFSET 0
#define SQ_TAIL_OFFSET 64
#define SQ_RING_MASK_OFFSET 256
#define SQ_RING_ENTRIES_OFFSET 264
#define SQ_FLAGS_OFFSET 276
#define SQ_DROPPED_OFFSET 272
#define CQ_HEAD_OFFSET 128
#define CQ_TAIL_OFFSET 192
#define CQ_RING_MASK_OFFSET 260
#define CQ_RING_ENTRIES_OFFSET 268
#define CQ_RING_OVERFLOW_OFFSET 284
#define CQ_FLAGS_OFFSET 280
#define CQ_CQES_OFFSET 320

struct io_sqring_offsets {
  uint32_t head;
  uint32_t tail;
  uint32_t ring_mask;
  uint32_t ring_entries;
  uint32_t flags;
  uint32_t dropped;
  uint32_t array;
  uint32_t resv1;
  uint64_t resv2;
};

struct io_cqring_offsets {
  uint32_t head;
  uint32_t tail;
  uint32_t ring_mask;
  uint32_t ring_entries;
  uint32_t overflow;
  uint32_t cqes;
  uint64_t resv[2];
};

struct io_uring_params {
  uint32_t sq_entries;
  uint32_t cq_entries;
  uint32_t flags;
  uint32_t sq_thread_cpu;
  uint32_t sq_thread_idle;
  uint32_t features;
  uint32_t resv[4];
  struct io_sqring_offsets sq_off;
  struct io_cqring_offsets cq_off;
};

#define IORING_OFF_SQ_RING 0
#define IORING_OFF_SQES 0x10000000ULL
#define IORING_SETUP_SQE128 (1U << 10)
#define IORING_SETUP_CQE32 (1U << 11)

static long syz_io_uring_setup(volatile long a0, volatile long a1,
                               volatile long a2, volatile long a3)
{
  uint32_t entries = (uint32_t)a0;
  struct io_uring_params* setup_params = (struct io_uring_params*)a1;
  void** ring_ptr_out = (void**)a2;
  void** sqes_ptr_out = (void**)a3;
  setup_params->flags &= ~(IORING_SETUP_CQE32 | IORING_SETUP_SQE128);
  uint32_t fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);
  uint32_t sq_ring_sz =
      setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t);
  uint32_t cq_ring_sz = setup_params->cq_off.cqes +
                        setup_params->cq_entries * SIZEOF_IO_URING_CQE;
  uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
  *ring_ptr_out =
      mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
           fd_io_uring, IORING_OFF_SQ_RING);
  uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
  *sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE,
                       MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES);
  uint32_t* array =
      (uint32_t*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array);
  for (uint32_t index = 0; index < entries; index++)
    array[index] = index;
  return fd_io_uring;
}

struct thread_t {
  int created, call;
  event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
  struct thread_t* th = (struct thread_t*)arg;
  for (;;) {
    event_wait(&th->ready);
    event_reset(&th->ready);
    execute_call(th->call);
    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
    event_set(&th->done);
  }
  return 0;
}

static void loop(void)
{
  int i, call, thread;
  for (call = 0; call < 4; call++) {
    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
         thread++) {
      struct thread_t* th = &threads[thread];
      if (!th->created) {
        th->created = 1;
        event_init(&th->ready);
        event_init(&th->done);
        event_set(&th->done);
        thread_start(thr, th);
      }
      if (!event_isset(&th->done))
        continue;
      event_reset(&th->done);
      th->call = call;
      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
      event_set(&th->ready);
      event_timedwait(&th->done, 50);
      break;
    }
  }
  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
    sleep_ms(1);
}

uint64_t r[1] = {0xffffffffffffffff};

void execute_call(int call)
{
  intptr_t res = 0;
  switch (call) {
  case 0:
    *(uint32_t*)0x20003ac4 = 0;
    *(uint32_t*)0x20003ac8 = 2;
    *(uint32_t*)0x20003acc = 0;
    *(uint32_t*)0x20003ad0 = 0;
    *(uint32_t*)0x20003ad8 = -1;
    memset((void*)0x20003adc, 0, 12);
    res = -1;
    res = syz_io_uring_setup(/*entries=*/0x481a, /*params=*/0x20003ac0,
                             /*ring_ptr=*/0x20003b40, /*sqes_ptr=*/0x20003b80);
    if (res != -1)
      r[0] = res;
    break;
  case 1:
    memset((void*)0x200025c0, 252, 1);
    syscall(__NR_io_uring_register, /*fd=*/r[0], /*opcode=*/0x21ul,
            /*arg=*/0x200025c0ul, /*size=*/1ul);
    break;
  case 2:
    syscall(__NR_io_uring_enter, /*fd=*/r[0], /*to_submit=*/0x2751,
            /*min_complete=*/0x1737,
            /*flags=IORING_ENTER_SQ_WAIT|IORING_ENTER_GETEVENTS*/ 5ul,
            /*sigmask=*/0ul, /*size=*/0ul);
    break;
  case 3:
    memset((void*)0x200025c0, 252, 1);
    syscall(__NR_io_uring_register, /*fd=*/r[0], /*opcode=*/0x21ul,
            /*arg=*/0x200025c0ul, /*size=*/1ul);
    break;
  }
}
int main(void)
{
  syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
          /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  loop();
  return 0;
}




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux