I originally reported this on 3.4.76 but I see the same issue on 3.4.77. I've also tested with a 3.13 rc and could not reproduce the issue. All of these tests are done on a 12-core amd64 machines. I wrote this simple program (attached) to play around with kernel AIO. It simply does kernel AIO with O_DIRECT on a small temp file stored on an ext4 filesystem. When I run it with "LD_PRELOAD=libhugetlbfs.so", it triggers a "Bad page state" BUG on exit every time. Removing LD_PRELOAD from the command line fixes the problem. Note that my kernel does not use THP, it is NOT compiled with CONFIG_TRANSPARENT_HUGEPAGE. kernel: BUG: Bad page state in process aio_test pfn:1b7201 kernel: page:ffffea0006dc8040 count:0 mapcount:1 mapping: (null) index:0x91e kernel: page flags: 0x20000000008000(tail) kernel: Modules linked in: nfsd exportfs nfs nfs_acl auth_rpcgss fscache lockd sunrpc rdma_ucm rdma_cm ib_addr iw_cm ib_uverbs ib_cm ib_sa ib_mad ib_core ipmi_si ipmi_devintf ioatdma coretemp microcode i2c_i801 serio_raw pcspkr i2c_core dca dm_mod sg sr_mod cdrom crc32c_intel ahci libahci [last unloaded: scsi_wait_scan] kernel: Pid: 5170, comm: aio_test Tainted: G O 3.4.77bug #1 kernel: Call Trace: kernel: [<ffffffff810f3300>] ? is_free_buddy_page+0xa0/0xd0 kernel: [<ffffffff814c0861>] bad_page+0xe6/0xfc kernel: [<ffffffff810f3dbc>] free_pages_prepare+0xfc/0x110 kernel: [<ffffffff811afe20>] ? noalloc_get_block_write+0x30/0x30 kernel: [<ffffffff810f3dff>] __free_pages_ok+0x2f/0xd0 kernel: [<ffffffff810f4080>] __free_pages+0x20/0x40 kernel: [<ffffffff81124737>] update_and_free_page+0x77/0x80 kernel: [<ffffffff8112633e>] free_huge_page+0x16e/0x180 kernel: [<ffffffff810f8030>] __put_compound_page+0x20/0x50 kernel: [<ffffffff810f8108>] put_compound_page+0x78/0x140 kernel: [<ffffffff810f8546>] put_page+0x36/0x40 kernel: [<ffffffff81126ede>] __unmap_hugepage_range+0x1ce/0x230 kernel: [<ffffffff81127331>] unmap_hugepage_range+0x51/0x90 kernel: [<ffffffff8110e880>] unmap_single_vma+0x730/0x740 kernel: [<ffffffff8110f05f>] unmap_vmas+0x5f/0x80 kernel: [<ffffffff8111672c>] exit_mmap+0xbc/0x130 kernel: [<ffffffff8112e223>] ? kmem_cache_free+0xd3/0xe0 kernel: [<ffffffff81035155>] mmput+0x35/0xf0 kernel: [<ffffffff8103a58d>] exit_mm+0xfd/0x120 kernel: [<ffffffff8103bb6c>] do_exit+0x16c/0x8b0 kernel: [<ffffffff811540c4>] ? mntput+0x24/0x40 kernel: [<ffffffff81138962>] ? fput+0x192/0x250 kernel: [<ffffffff8103c5ff>] do_group_exit+0x3f/0xa0 kernel: [<ffffffff8103c677>] sys_exit_group+0x17/0x20 kernel: [<ffffffff814d0492>] system_call_fastpath+0x16/0x1b When I revert the following patch, I cannot reproduce the problem commit b07ef016454ff46f98e633b5a6247ca7e343fb67 Author: Khalid Aziz <khalid.aziz@xxxxxxxxxx> Date: Wed Sep 11 14:22:20 2013 -0700 This patch was added to the 3.4 branch for 3.4.69. 27c73ae759774e63313c1fbfeb17ba076cea64c5 might have fixed the issue in the dev branchbut I have not tried to backport it -- Guillaume Morin <guillaume@xxxxxxxxxxx>
#define _GNU_SOURCE #include <libaio.h> #include <errno.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <sys/eventfd.h> #include <sys/epoll.h> #include <sys/mman.h> #include <stdio.h> #include <stdlib.h> #define FILE_SIZE 4096 int main(void) { io_context_t ctx; int fd,fd_odirect,i,event_fd,epoll_fd; struct epoll_event ev; void *buf; size_t offset = 0; struct iocb cb; struct iocb * cbs[1] = { &cb }; fd = open("/tmp/foo",O_RDWR|O_CREAT); if (fd == -1) { perror("open"); return 1; } for (i = 0; i < FILE_SIZE; ++i) { char c = rand() % 255; write(fd, &c, 1); } close(fd); fd_odirect = open("/tmp/foo",O_RDONLY|O_DIRECT); if (fd_odirect == -1) { perror("open"); return 1; } memset(&ctx, 0, sizeof(ctx)); if (0 != io_queue_init(1, &ctx)) { perror("ctx"); return 1; } event_fd = eventfd(0, EFD_CLOEXEC); if (event_fd == -1) { perror("eventfd"); return -1; } epoll_fd = epoll_create(1); if (epoll_fd == -1) { perror("epoll_fd"); return 1; } ev.events = EPOLLIN; if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, event_fd, &ev) == -1) { perror("epoll_ctl"); return 1; } posix_memalign(&buf, 512, 32768); while (1) { struct timespec ts = { 0, 0 }; struct io_event ioev; int ret; long v; io_prep_pread(&cb, fd_odirect, buf + offset, 512, offset); io_set_eventfd(&cb, event_fd); if (1 != io_submit(ctx, 1, cbs)) { perror("io_submit"); return 1; } ret = epoll_wait(epoll_fd, &ev, 1, -1); if (ret != 1) { perror("epoll_wait"); } read(event_fd, &v, 8); printf("event_fd returned %ld\n", v); if (io_getevents(ctx, 1, 1, &ioev, &ts) != 1) { perror("io_getevents"); return 1; } printf("Read 1 res %ld res2 %ld\n", ioev.res, ioev.res2); offset += ioev.res; if (ioev.res == 0) { break; } if ((offset + 512) > 32768) { puts("ERROR - reading past buffer"); return 1; } } free(buf); io_destroy(ctx); close(event_fd); close(epoll_fd); close(fd_odirect); return 0; }