Foolowing up with the tests on hardware Adaptec RAID with XFS, I got this bug when running mount on a corrupted XFS volume: ------------[ cut here ]------------ kernel BUG at arch/x86/mm/pageattr.c:216! invalid opcode: 0000 [#2] SMP Modules linked in: nfsv3 nfsv4 ib_iser rdma_cm iw_cm ib_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nfs bonding md_mod dm_mod nfsd lockd nfs_acl auth_rpcgss oid_registry sunrpc ipv6 fuse af_packet snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_midi snd_rawmidi snd_seq_oss snd_seq_midi_event snd_seq snd_timer snd_seq_device snd virtio_net virtio_balloon soundcore loop virtio_blk virtio_pci virtio_ring virtio ata_piix xhci_hcd uhci_hcd usb_storage joydev usbhid kvm_amd kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper mgag200 evdev ttm cryptd drm_kms_helper e1000e drm microcode pcspkr sp5100_tco i2c_algo_bit psmouse k10temp ptp fam15h_power pps_core ohci_pci i2c_piix4 ohci_hcd ehci_pci ehci_hcd i2c_core ses usbcore enclosure usb_common sg myri10ge acpi_cpufreq dca processor thermal_sys button ata_generic aacraid pata _atiixp ahci libahci libata CPU: 5 PID: 18084 Comm: mount Not tainted 3.17.7-storiq64-opteron #1 Hardware name: Supermicro H8SGL/H8SGL, BIOS 3.0a 05/07/2013 task: ffff88040e1ad7f0 ti: ffff880037ca8000 task.ti: ffff880037ca8000 RIP: 0010:[<ffffffff8104e96d>] [<ffffffff8104e96d>] change_page_attr_set_clr+0x41d/0x420 RSP: 0018:ffff880037caa9a8 EFLAGS: 00010046 RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880037caa9d8 RBP: 0000000000000000 R08: 8000000037e10163 R09: 0000000000037e10 R10: ffff8800db14e958 R11: ffff880037caa830 R12: 0000000000000200 R13: 0000000000000010 R14: 0000000000000000 R15: 0000000000000005 FS: 00007ffee71207e0(0000) GS:ffff88041eca0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 00000000db274000 CR4: 00000000000407e0 Stack: 0000000000000004 ffffffff00000000 0000000000000000 0000000000000008 0000000000000000 0000000037e10000 0000000000000000 0000000000000000 0000000000000010 0000000000000000 0000000500000001 0000000000037e10 Call Trace: [<ffffffff8104eaed>] ? _set_pages_array+0xed/0x140 [<ffffffffa06539f7>] ? ttm_set_pages_caching+0x47/0x80 [ttm] [<ffffffffa0653add>] ? ttm_alloc_new_pages.isra.5+0xad/0x170 [ttm] [<ffffffffa06544e6>] ? ttm_pool_populate+0x3c6/0x4c0 [ttm] [<ffffffffa0650caa>] ? ttm_bo_move_memcpy+0x45a/0x4f0 [ttm] [<ffffffff81123024>] ? __vmalloc_node_range+0x164/0x260 [<ffffffffa064d3c0>] ? ttm_tt_init+0x60/0xa0 [ttm] [<ffffffffa064ebcf>] ? ttm_bo_handle_move_mem+0x25f/0x400 [ttm] [<ffffffffa064f5d8>] ? ttm_bo_mem_space+0xd8/0x350 [ttm] [<ffffffff8105c100>] ? walk_system_ram_range+0x70/0xc0 [<ffffffffa064fa3d>] ? ttm_bo_validate+0x1ed/0x200 [ttm] [<ffffffff81121548>] ? free_vmap_area_noflush+0x28/0x60 [<ffffffffa067190e>] ? mgag200_bo_push_sysram+0x6e/0xc0 [mgag200] [<ffffffffa066c8fc>] ? mga_crtc_do_set_base.isra.8.constprop.20+0x7c/0x400 [mgag200] [<ffffffff812dd730>] ? pci_bus_write_config_dword+0x70/0x90 [<ffffffffa066e0d0>] ? mga_crtc_mode_set+0x1450/0x2560 [mgag200] [<ffffffff812589e0>] ? xfs_inode_item_committed+0x70/0x70 [<ffffffff812589e0>] ? xfs_inode_item_committed+0x70/0x70 [<ffffffff812589e0>] ? xfs_inode_item_committed+0x70/0x70 [<ffffffff812b7196>] ? idr_mark_full+0x56/0x60 [<ffffffff812b7aaf>] ? idr_alloc+0x7f/0xf0 [<ffffffff812c1353>] ? delay_tsc+0x43/0x70 [<ffffffffa0603941>] ? drm_crtc_helper_set_mode+0x2d1/0x500 [drm_kms_helper] [<ffffffffa06044b1>] ? drm_crtc_helper_set_config+0x801/0xa20 [drm_kms_helper] [<ffffffffa055b92f>] ? drm_mode_set_config_internal+0x5f/0xe0 [drm] [<ffffffffa060b802>] ? drm_fb_helper_pan_display+0x82/0xe0 [drm_kms_helper] [<ffffffff81310989>] ? fb_pan_display+0xb9/0x180 [<ffffffff8130e881>] ? bit_update_start+0x21/0x50 [<ffffffff8130e259>] ? fbcon_switch+0x389/0x530 [<ffffffff8137587f>] ? redraw_screen+0x15f/0x230 [<ffffffff8130ca02>] ? fbcon_blank+0x232/0x2e0 [<ffffffff8109160a>] ? console_unlock+0x1da/0x440 [<ffffffff81091d20>] ? vprintk_emit+0x2b0/0x4e0 [<ffffffff8109fb03>] ? lock_timer_base.isra.36+0x33/0x70 [<ffffffff8109f300>] ? internal_add_timer+0x30/0x70 [<ffffffff810a1284>] ? mod_timer+0x114/0x1b0 [<ffffffff81376396>] ? do_unblank_screen+0xa6/0x1c0 [<ffffffff812c38c9>] ? bust_spinlocks+0x19/0x40 [<ffffffff810164c2>] ? oops_end+0x12/0xa0 [<ffffffff814cf5f6>] ? no_context+0x2e5/0x323 [<ffffffff8104c06e>] ? __do_page_fault+0x3fe/0x490 [<ffffffff814d75da>] ? schedule_timeout+0x14a/0x1c0 [<ffffffff814d4b3f>] ? wait_for_completion+0xaf/0x120 [<ffffffff81236a25>] ? xfs_buf_delwri_submit+0x25/0x80 [<ffffffff8107ce60>] ? try_to_wake_up+0x2f0/0x2f0 [<ffffffff8108b9dd>] ? up+0xd/0x40 [<ffffffff814da3a8>] ? page_fault+0x28/0x30 [<ffffffff81259ef6>] ? xlog_recover_free_trans+0x16/0xb0 [<ffffffff8125e448>] ? xlog_recover_process_data+0x108/0x2a0 [<ffffffff8125e741>] ? xlog_do_recovery_pass+0x161/0x5c0 [<ffffffff8124e3d0>] ? xfs_parseargs+0xb80/0xb80 [<ffffffff8124e3d0>] ? xfs_parseargs+0xb80/0xb80 [<ffffffff8125ec18>] ? xlog_do_log_recovery+0x78/0xa0 [<ffffffff8125ec5a>] ? xlog_do_recover+0x1a/0x100 [<ffffffff8125f00b>] ? xlog_recover+0x7b/0xb0 [<ffffffff81253486>] ? xfs_log_mount+0xe6/0x2b0 [<ffffffff8124b642>] ? xfs_mountfs+0x442/0x780 [<ffffffff8123a9e0>] ? xfs_filestream_get_ag+0x20/0x20 [<ffffffff8124e697>] ? xfs_fs_fill_super+0x2c7/0x340 [<ffffffff8113b996>] ? mount_bdev+0x1c6/0x210 [<ffffffff8113c55a>] ? mount_fs+0x1a/0xd0 [<ffffffff811553b4>] ? vfs_kern_mount+0x64/0x110 [<ffffffff81157513>] ? do_mount+0x213/0xa80 [<ffffffff810ef799>] ? __get_free_pages+0x9/0x50 [<ffffffff81158078>] ? SyS_mount+0x98/0xf0 [<ffffffff814d8569>] ? system_call_fastpath+0x16/0x1b Code: 0c 24 44 8b 44 24 08 48 8b 4c 24 10 e9 9f fc ff ff 0f 0b 0f 0b be ba 00 00 00 48 c7 c7 13 b0 5d 81 e8 d8 8a 00 00 e9 21 ff ff ff <0f> 0b 90 41 56 45 31 c0 31 d2 41 b9 04 00 00 00 b9 18 00 00 00 RIP [<ffffffff8104e96d>] change_page_attr_set_clr+0x41d/0x420 RSP <ffff880037caa9a8> ---[ end trace b6bd5ad538480248 ]--- Configuration: kernel 3.17.7, amd64 on Debian 7.7. The hardware is unchanged from my previous tests ( Adaptec 71685, dual Opteron 6212). The filesystem got corrupted while rebuilding the RAID and doing read/write IO, as previously. There is first this error: XFS (sda5): Metadata corruption detected at xfs_buf_iodone_work+0x8d/0xb0, block 0xe003be218 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffff88040e339000: 8b 98 af af 25 4a 84 7a 51 6c 38 41 f7 2d 78 b5 ....%J.zQl8A.-x. ffff88040e339010: 35 f3 af 8e 64 32 81 a4 6b 1d a4 0b 3c 8c d8 c3 5...d2..k...<... ffff88040e339020: 23 ba 20 f7 c9 3a a1 fa d1 ea e3 27 03 46 dd 83 #. ..:.....'.F.. ffff88040e339030: cb f8 75 d3 a2 82 a7 b1 9f 7d 14 bb c8 2a 94 8d ..u......}...*.. XFS (sda5): metadata I/O error: block 0xe003be218 ("xfs_trans_read_buf_map") error 117 numblks 8 XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d50 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffff880037d82000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d50 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffff880037d82000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d50 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffff880037d82000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d50 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffff880037d82000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d50 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffff880037d82000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880037d82030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ It goes on a bot more then it becomes that: XFS (sda5): metadata I/O error: block 0x1001c26d50 ("xfs_trans_read_buf_map") error 117 numblks 16 XFS (sda5): xfs_do_force_shutdown(0x1) called from line 382 of file fs/xfs/xfs_trans_buf.c. Return address = 0xffffffff81260803 XFS (sda5): I/O Error Detected. Shutting down filesystem XFS (sda5): Please umount the filesystem and rectify the problem(s) XFS (sda5): xfs_imap_to_bp: xfs_trans_read_buf() returned error -117. XFS (sda5): xfs_log_force: error -5 returned. XFS (sda5): xfs_log_force: error -5 returned. XFS (sda5): xfs_log_force: error -5 returned. XFS (sda5): xfs_log_force: error -5 returned. XFS (sda5): xfs_log_force: error -5 returned. XFS (sda5): xfs_log_force: error -5 returned. XFS (sda5): xfs_log_force: error -5 returned. XFS (sda5): xfs_log_force: error -5 returned. It goes for a long time, then that follows when I unmount and try to remount ( "structure needs cleaning") and dmesg says : XFS (sda5): Mounting V4 Filesystem XFS (sda5): Starting recovery (logdev: internal) XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d40 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffffc90013c62000: 49 4e 81 ff 02 02 00 00 00 00 03 e8 00 00 03 e8 IN.............. ffffc90013c62010: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 01 ................ ffffc90013c62020: 50 98 e3 98 1e 97 8a 07 4c 06 1c e5 00 00 00 00 P.......L....... ffffc90013c62030: 54 ae c5 83 18 8f d6 8c 00 00 00 00 00 00 14 82 T............... XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d40 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffffc90013c62000: 49 4e 81 ff 02 02 00 00 00 00 03 e8 00 00 03 e8 IN.............. ffffc90013c62010: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 01 ................ ffffc90013c62020: 50 98 e3 98 1e 97 8a 07 4c 06 1c e5 00 00 00 00 P.......L....... ffffc90013c62030: 54 ae c5 83 18 8f d6 8c 00 00 00 00 00 00 14 82 T............... XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d40 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffffc90013c62000: 49 4e 81 ff 02 02 00 00 00 00 03 e8 00 00 03 e8 IN.............. ffffc90013c62010: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 01 ................ ffffc90013c62020: 50 98 e3 98 1e 97 8a 07 4c 06 1c e5 00 00 00 00 P.......L....... ffffc90013c62030: 54 ae c5 83 18 8f d6 8c 00 00 00 00 00 00 14 82 T............... XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d40 XFS (sda5): Unmount and run xfs_repair XFS (sda5): First 64 bytes of corrupted metadata buffer: ffffc90013c62000: 49 4e 81 ff 02 02 00 00 00 00 03 e8 00 00 03 e8 IN.............. ffffc90013c62010: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 01 ................ ffffc90013c62020: 50 98 e3 98 1e 97 8a 07 4c 06 1c e5 00 00 00 00 P.......L....... ffffc90013c62030: 54 ae c5 83 18 8f d6 8c 00 00 00 00 00 00 14 82 T............... XFS (sda5): Metadata corruption detected at xfs_inode_buf_verify+0x6c/0xb0, block 0x1001c26d40 And finally ends with: XFS (sda5): metadata I/O error: block 0x1001c26d40 ("xlog_recover_do..(read#2)") error 117 numblks 16 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<ffffffff81259ef6>] xlog_recover_free_trans+0x16/0xb0 PGD 37da7067 PUD 3752c067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: nfsv3 nfsv4 ib_iser rdma_cm iw_cm ib_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nfs bonding md_mod dm_mod nfsd lockd nfs_acl auth_rpcgss oid_registry sunrpc ipv6 fuse af_packet snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_midi snd_rawmidi snd_seq_oss snd_seq_midi_event snd_seq snd_timer snd_seq_device snd virtio_net virtio_balloon soundcore loop virtio_blk virtio_pci virtio_ring virtio ata_piix xhci_hcd uhci_hcd usb_storage joydev usbhid kvm_amd kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper mgag200 evdev ttm cryptd drm_kms_helper e1000e drm microcode pcspkr sp5100_tco i2c_algo_bit psmouse k10temp ptp fam15h_power pps_core ohci_pci i2c_piix4 ohci_hcd ehci_pci ehci_hcd i2c_core ses usbcore enclosure usb_common sg myri10ge acpi_cpufreq dca processor thermal_sys button ata_generic aacraid pata _atiixp ahci libahci libata CPU: 5 PID: 18084 Comm: mount Not tainted 3.17.7-storiq64-opteron #1 Hardware name: Supermicro H8SGL/H8SGL, BIOS 3.0a 05/07/2013 task: ffff88040e1ad7f0 ti: ffff880037ca8000 task.ti: ffff880037ca8000 RIP: 0010:[<ffffffff81259ef6>] [<ffffffff81259ef6>] xlog_recover_free_trans+0x16/0xb0 RSP: 0018:ffff880037cabb08 EFLAGS: 00010207 RAX: 00000000ffffff8b RBX: 0000000000000001 RCX: 0000000000000002 RDX: 00000000ffffff8b RSI: ffff88040c9105a0 RDI: ffff8800377b7f40 RBP: 0000000000000000 R08: ffff880037ca8000 R09: 0000000000000000 R10: ffffffff81723480 R11: 0000000000000001 R12: ffff880037cabc28 R13: ffff8800377b7f70 R14: ffff8800377b7f40 R15: ffff8800377b7f40 FS: 00007ffee71207e0(0000) GS:ffff88041eca0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 00000000db274000 CR4: 00000000000407e0 Stack: 0000000000000001 ffffc90015c0bf1c ffff880037cabc28 00000000930b92d9 ffffc90015c0bf10 ffffffff8125e448 ffff8804ffffff8b ffffc90015c0c000 ffff880037cabbf8 ffff88020176cc00 ffff880403115000 0000000281259fbe Call Trace: [<ffffffff8125e448>] ? xlog_recover_process_data+0x108/0x2a0 [<ffffffff8125e741>] ? xlog_do_recovery_pass+0x161/0x5c0 [<ffffffff8124e3d0>] ? xfs_parseargs+0xb80/0xb80 [<ffffffff8124e3d0>] ? xfs_parseargs+0xb80/0xb80 [<ffffffff8125ec18>] ? xlog_do_log_recovery+0x78/0xa0 [<ffffffff8125ec5a>] ? xlog_do_recover+0x1a/0x100 [<ffffffff8125f00b>] ? xlog_recover+0x7b/0xb0 [<ffffffff81253486>] ? xfs_log_mount+0xe6/0x2b0 [<ffffffff8124b642>] ? xfs_mountfs+0x442/0x780 [<ffffffff8123a9e0>] ? xfs_filestream_get_ag+0x20/0x20 [<ffffffff8124e697>] ? xfs_fs_fill_super+0x2c7/0x340 [<ffffffff8113b996>] ? mount_bdev+0x1c6/0x210 [<ffffffff8113c55a>] ? mount_fs+0x1a/0xd0 [<ffffffff811553b4>] ? vfs_kern_mount+0x64/0x110 [<ffffffff81157513>] ? do_mount+0x213/0xa80 [<ffffffff810ef799>] ? __get_free_pages+0x9/0x50 [<ffffffff81158078>] ? SyS_mount+0x98/0xf0 [<ffffffff814d8569>] ? system_call_fastpath+0x16/0x1b Code: 00 00 00 00 00 e9 bb a8 fd ff 66 66 2e 0f 1f 84 00 00 00 00 00 41 56 49 89 fe 41 55 4c 8d 6f 30 41 54 55 53 48 8b 6f 30 4c 39 ed <4c> 8b 65 00 74 76 0f 1f 40 00 48 8b 45 08 48 ba 00 01 10 00 00 RIP [<ffffffff81259ef6>] xlog_recover_free_trans+0x16/0xb0 RSP <ffff880037cabb08> CR2: 0000000000000000 -- ------------------------------------------------------------------------ Emmanuel Florac | Direction technique | Intellique | <eflorac@xxxxxxxxxxxxxx> | +33 1 78 94 84 02 ------------------------------------------------------------------------ _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs