On Tue, Oct 9, 2018 at 3:40 AM Jan Kara <jack@xxxxxxx> wrote: > On Fri 05-10-18 17:06:22, Paul Moore wrote: > > On Tue, Sep 4, 2018 at 12:06 PM Jan Kara <jack@xxxxxxx> wrote: > > > Add stress test for stressing audit tree watches by adding and deleting > > > rules while events are generated and watched filesystems are mounted and > > > unmounted in parallel. > > > > > > Signed-off-by: Jan Kara <jack@xxxxxxx> > > > --- > > > tests/stress_tree/Makefile | 8 +++ > > > tests/stress_tree/test | 171 +++++++++++++++++++++++++++++++++++++++++++++ > > > 2 files changed, 179 insertions(+) > > > create mode 100644 tests/stress_tree/Makefile > > > create mode 100755 tests/stress_tree/test > > > > No commentary on the test itself, other than perhaps it should live > > under test_manual/, but in running the tests in a loop today I am > > reliably able to panic my test kernel after ~30m or so. > > Interesting. How do you run the test? Nothing fancy, just a simple bash loop: # cd tests/stress_tree # while ./test; do /bin/true; done > > I'm using the kernel linked below which is Fedora Rawhide + > > selinux/next + audit/next + audit/working-fsnotify_fixes; a link to > > the patches added to the Rawhide kernel can be found in the list > > archive linked below. > > > > * https://groups.google.com/forum/#!topic/kernel-secnext/SFv0d-ij3z8 > > * https://copr.fedorainfracloud.org/coprs/pcmoore/kernel-secnext/build/805949 > > > > The initial panic dump is below: > > I can try to reproduce this but could you perhaps find out which of the > list manipulations in audit_remove_tree_rule() hit the corrution? A quick dump of the audit_remove_tree_rule() function makes it look like it is the inner most list_del_init() call. Dump of assembler code for function audit_remove_tree_rule: 646 { 0xffffffff811ad900 <+0>: callq 0xffffffff81c01850 <__fentry__> 647 struct audit_tree *tree; 648 tree = rule->tree; 0xffffffff811ad905 <+5>: push %r14 0xffffffff811ad907 <+7>: xor %eax,%eax 0xffffffff811ad909 <+9>: push %r13 0xffffffff811ad90b <+11>: push %r12 0xffffffff811ad90d <+13>: push %rbp 0xffffffff811ad90e <+14>: push %rbx 0xffffffff811ad90f <+15>: mov 0x140(%rdi),%rbp 649 if (tree) { 0xffffffff811ad916 <+22>: test %rbp,%rbp 0xffffffff811ad919 <+25>: je 0xffffffff811ad990 <audit_remove_tree_rule+144> 0xffffffff811ad91b <+27>: mov %rdi,%rbx 650 spin_lock(&hash_lock); 651 list_del_init(&rule->rlist); 0xffffffff811ad92a <+42>: lea 0x150(%rbx),%r12 652 if (list_empty(&tree->rules) && !tree->goner) { 653 tree->root = NULL; 0xffffffff811ad999 <+153>: movq $0x0,0x8(%rbp) 654 list_del_init(&tree->same_root); 0xffffffff811ad9a1 <+161>: lea 0x40(%rbp),%r12 655 tree->goner = 1; 0xffffffff811ad9c8 <+200>: lea 0x30(%rbp),%r12 0xffffffff811ad9cc <+204>: movl $0x1,0x4(%rbp) 656 list_move(&tree->list, &prune_list); 657 rule->tree = NULL; 0xffffffff811ada21 <+289>: movq $0x0,0x140(%rbx) 658 spin_unlock(&hash_lock); 659 audit_schedule_prune(); 660 return 1; 0xffffffff811ada44 <+324>: mov $0x1,%eax 0xffffffff811ada49 <+329>: pop %rbx 0xffffffff811ada4a <+330>: pop %rbp 0xffffffff811ada4b <+331>: pop %r12 0xffffffff811ada4d <+333>: pop %r13 0xffffffff811ada4f <+335>: pop %r14 0xffffffff811ada51 <+337>: retq 0xffffffff811ada52: data16 nopw %cs:0x0(%rax,%rax,1) 0xffffffff811ada5d: nopl (%rax) 661 } 662 rule->tree = NULL; 0xffffffff811ad974 <+116>: movq $0x0,0x140(%rbx) 663 spin_unlock(&hash_lock); 664 return 1; 0xffffffff811ad98b <+139>: mov $0x1,%eax 0xffffffff811ad990 <+144>: pop %rbx 0xffffffff811ad991 <+145>: pop %rbp 0xffffffff811ad992 <+146>: pop %r12 0xffffffff811ad994 <+148>: pop %r13 0xffffffff811ad996 <+150>: pop %r14 0xffffffff811ad998 <+152>: retq 665 } 666 return 0; 667 } > > [ 139.619065] list_del corruption. prev->next should be > > ffff985fa98d4100, but was ffff985fae91e370 > > [ 139.622504] ------------[ cut here ]------------ > > [ 139.623402] kernel BUG at lib/list_debug.c:53! > > [ 139.624294] invalid opcode: 0000 [#1] SMP PTI > > [ 139.625439] CPU: 1 PID: 3248 Comm: auditctl Not tainted > > 4.19.0-0.rc6.git2.1.1.secnext.fc30.x86_64 #1 > > [ 139.630761] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 > > [ 139.631812] RIP: 0010:__list_del_entry_valid.cold.1+0x34/0x4c > > [ 139.632853] Code: ce 34 ac e8 b8 f7 c0 ff 0f 0b 48 c7 c7 a8 cf 34 > > ac e8 aa f7 c0 ff 0f 0b 48 89 f2 48 89 fe 48 c7 c7 68 cf 34 ac e8 96 > > f7 c0 ff <0f> 0b 48 89 fe 48 c7 c7 30 cf 34 ac e8 85 f7 c0 ff 0f 0b 90 > > 90 90 > > [ 139.636347] RSP: 0018:ffffb4890293fbb8 EFLAGS: 00010246 > > [ 139.637295] RAX: 0000000000000054 RBX: ffff985fae91e620 RCX: 0000000000000000 > > [ 139.638573] RDX: 0000000000000000 RSI: ffff985fb77d6be8 RDI: ffff985fb77d6be8 > > [ 139.639855] RBP: ffff985fa98d40c0 R08: 00046c1ac1fe8d00 R09: 0000000000000000 > > [ 139.641136] R10: 0000000000000000 R11: 0000000000000000 R12: ffff985fa98d4100 > > [ 139.642416] R13: 0000000000000000 R14: ffff985faf00fe20 R15: 00000000000003f4 > > [ 139.643699] FS: 00007f9898252b80(0000) GS:ffff985fb7600000(0000) > > knlGS:0000000000000000 > > [ 139.645199] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > [ 139.646244] CR2: 0000561d333b8218 CR3: 000000023110a000 CR4: 00000000001406e0 > > [ 139.647533] Call Trace: > > [ 139.647997] audit_remove_tree_rule+0xad/0x160 > > [ 139.648819] audit_del_rule+0x90/0x190 > > [ 139.649507] audit_rule_change+0x98e/0xbf0 > > [ 139.650259] audit_receive_msg+0x142/0x1160 > > [ 139.651030] ? netlink_deliver_tap+0x99/0x410 > > [ 139.651832] audit_receive+0x54/0xb0 > > [ 139.652495] netlink_unicast+0x181/0x210 > > [ 139.653211] netlink_sendmsg+0x218/0x3e0 > > [ 139.653936] sock_sendmsg+0x36/0x40 > > [ 139.654583] __sys_sendto+0xf1/0x160 > > [ 139.655244] ? syscall_trace_enter+0x1d3/0x330 > > [ 139.656055] ? trace_hardirqs_off_thunk+0x1a/0x1c > > [ 139.656912] __x64_sys_sendto+0x24/0x30 > > [ 139.657615] do_syscall_64+0x60/0x1f0 > > [ 139.658285] entry_SYSCALL_64_after_hwframe+0x49/0xbe > > [ 139.659192] RIP: 0033:0x7f989835a7fb > > [ 139.659847] Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3 > > 0f 1e fa 48 8d 05 25 6f 0c 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00 > > 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56 > > 41 89 > > [ 139.663170] RSP: 002b:00007ffc95ae6b48 EFLAGS: 00000246 ORIG_RAX: > > 000000000000002c > > [ 139.664531] RAX: ffffffffffffffda RBX: 0000000000000460 RCX: 00007f989835a7fb > > [ 139.665809] RDX: 0000000000000460 RSI: 00007ffc95ae6b80 RDI: 0000000000000003 > > [ 139.667087] RBP: 0000000000000003 R08: 00007ffc95ae6b6c R09: 000000000000000c > > [ 139.668370] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc95ae6b80 > > [ 139.669648] R13: 00007ffc95ae6b6c R14: 0000000000000002 R15: 000000000000044f > > [ 139.670935] Modules linked in: ib_isert iscsi_target_mod ib_srpt > > target_core_mod ib_srp scsi_transport_srp rpcrdma ib_umad rdma_ucm > > ib_iser ib_ipoib rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_cm > > mlx5_ib ib_uverbs ib_core crct10dif_pclmul crc32_pclmul > > ghash_clmulni_intel joydev virtio_balloon i2c_piix4 sunrpc > > drm_kms_helper ttm crc32c_intel mlx5_core drm virtio_console mlxfw > > serio_raw devlink virtio_blk virtio_net net_failover failover > > ata_generic pata_acpi qemu_fw_cfg > > [ 139.678676] ---[ end trace be98f2acb1e536e4 ]--- > -- > Jan Kara <jack@xxxxxxxx> > SUSE Labs, CR -- paul moore www.paul-moore.com