Hello Song, That code does not compile, but I guess that what you meant was something like this: diff --git drivers/md/md.c drivers/md/md.c index 04384452a..cbc97a96b 100644 --- drivers/md/md.c +++ drivers/md/md.c @@ -78,6 +78,7 @@ static DEFINE_SPINLOCK(pers_lock); static struct kobj_type md_ktype; +struct kmem_cache *md_io_cache; struct md_cluster_operations *md_cluster_ops; EXPORT_SYMBOL(md_cluster_ops); static struct module *md_cluster_mod; @@ -5701,8 +5702,8 @@ static int md_alloc(dev_t dev, char *name) */ mddev->hold_active = UNTIL_STOP; - error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE, - sizeof(struct md_io)); + error = mempool_init_slab_pool(&mddev->md_io_pool, BIO_POOL_SIZE, + md_io_cache); if (error) goto abort; @@ -9542,6 +9543,10 @@ static int __init md_init(void) { int ret = -ENOMEM; + md_io_cache = KMEM_CACHE(md_io, 0); + if (!md_io_cache) + goto err_md_io_cache; + md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0); if (!md_wq) goto err_wq; @@ -9578,6 +9583,8 @@ static int __init md_init(void) err_misc_wq: destroy_workqueue(md_wq); err_wq: + kmem_cache_destroy(md_io_cache); +err_md_io_cache: return ret; } @@ -9863,6 +9870,7 @@ static __exit void md_exit(void) destroy_workqueue(md_rdev_misc_wq); destroy_workqueue(md_misc_wq); destroy_workqueue(md_wq); + kmem_cache_destroy(md_io_cache); } subsys_initcall(md_init); Unfortunately, now it crashes with: <0>[ 625.159576] traps: PANIC: double fault, error_code: 0x0 <4>[ 625.159578] double fault: 0000 [#1] SMP NOPTI <4>[ 625.159579] CPU: 32 PID: 0 Comm: swapper/32 Tainted: P OE 5.11.8-pw-fix1 #1 <4>[ 625.159579] Hardware name: empty S8030GM2NE/S8030GM2NE, BIOS V4.00 03/11/2021 <4>[ 625.159580] RIP: 0010:__slab_free+0x26/0x380 <4>[ 625.159580] Code: 1f 44 00 00 0f 1f 44 00 00 55 49 89 ca 45 89 c3 48 89 e5 41 57 41 56 49 89 fe 41 55 41 54 49 89 f4 53 48 83 e4 f0 48 83 ec 70 <48> 89 54 24 28 0f 1f 44 00 00 41 8b 46 28 4d 8b 6c 24 20 49 8b 5c <4>[ 625.159581] RSP: 0018:ffffb11c4d028fc0 EFLAGS: 00010086 <4>[ 625.159582] RAX: ffff9a38cfc34fd0 RBX: ffff9a1a1360b870 RCX: ffff9a1a1360b870 <4>[ 625.159583] RDX: ffff9a1a1360b870 RSI: ffffe328044d82c0 RDI: ffff9a39536e5e00 <4>[ 625.159583] RBP: ffffb11c4d029060 R08: 0000000000000001 R09: ffffffff9f850a57 <4>[ 625.159583] R10: ffff9a1a1360b870 R11: 0000000000000001 R12: ffffe328044d82c0 <4>[ 625.159584] R13: ffffe328044d82c0 R14: ffff9a39536e5e00 R15: ffff9a39536e5e00 <4>[ 625.159584] FS: 0000000000000000(0000) GS:ffff9a38cfc00000(0000) knlGS:0000000000000000 <4>[ 625.159585] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 625.159585] CR2: ffffb11c4d028fb8 CR3: 000000407aece000 CR4: 0000000000350ee0 <4>[ 625.159585] Call Trace: <4>[ 625.159586] <IRQ> <4>[ 625.159586] ? kmem_cache_free+0x3bf/0x410 <4>[ 625.159586] ? mempool_free_slab+0x17/0x20 <4>[ 625.159587] ? mempool_free_slab+0x17/0x20 <4>[ 625.159587] ? mempool_free+0x2f/0x80 <4>[ 625.159587] ? md_end_io+0x4a/0x70 <4>[ 625.159587] ? bio_endio+0xe0/0x120 <4>[ 625.159588] ? bio_chain_endio+0x2d/0x40 <4>[ 625.159588] ? md_end_io+0x5c/0x70 <4>[ 625.159588] ? bio_endio+0xe0/0x120 <4>[ 625.159589] ? bio_chain_endio+0x2d/0x40 <4>[ 625.159589] ? md_end_io+0x5c/0x70 <4>[ 625.159589] ? bio_endio+0xe0/0x120 <4>[ 625.159589] ? bio_chain_endio+0x2d/0x40 <4>[ 625.159590] ? md_end_io+0x5c/0x70 <4>[ 625.159590] ? bio_endio+0xe0/0x120 ... <4>[ 625.159663] Lost 344 message(s)! Some info from a moment "just" (~0.2 s) before the crash: $ cd /sys/dev/block/9:12 ; watch -n0.2 'for f in `find . -maxdepth 1 -type f` `ls queue/* |grep -v usec` ; do echo $f `cat $f`; done | tee -a ~/md' ./uevent MAJOR=9 MINOR=12 DEVNAME=md12 DEVTYPE=disk ./ext_range 256 ./range 1 ./alignment_offset 0 ./dev 9:12 ./ro 0 ./stat 577 0 10446 84 46128254 0 11584858627 400313072 660 1289080 400313156 0 0 0 0 0 0 ./events_poll_msecs -1 ./events_async ./size 41986624512 ./discard_alignment 0 ./capability 50 ./hidden 0 ./removable 0 ./events ./inflight 0 666 queue/add_random 0 queue/chunk_sectors 0 queue/dax 0 queue/discard_granularity 512 queue/discard_max_bytes 2199023255040 queue/discard_max_hw_bytes 2199023255040 queue/discard_zeroes_data 0 queue/fua 1 queue/hw_sector_size 512 queue/io_poll 0 queue/io_poll_delay 0 queue/iostats 0 queue/logical_block_size 512 queue/max_discard_segments 256 queue/max_hw_sectors_kb 128 queue/max_integrity_segments 0 queue/max_sectors_kb 128 queue/max_segments 33 queue/max_segment_size 4294967295 queue/minimum_io_size 524288 queue/nomerges 0 queue/nr_requests 128 queue/nr_zones 0 queue/optimal_io_size 1572864 queue/physical_block_size 512 queue/read_ahead_kb 3072 queue/rotational 0 queue/rq_affinity 0 queue/scheduler none queue/stable_writes 0 queue/write_cache write back queue/write_same_max_bytes 0 queue/write_zeroes_max_bytes 0 queue/zone_append_max_bytes 0 queue/zoned none $ cd /sys/kernel/slab; watch -n0.2 'for f in `ls md_io/* | grep -v calls`; do echo $f `cat $f`; done | tee -a ~/slab' md_io/aliases 4 md_io/align 8 md_io/cache_dma 0 md_io/cpu_partial 30 md_io/cpu_slabs 625 N0=250 N1=84 N2=23 N3=268 md_io/ctor md_io/destroy_by_rcu 0 md_io/hwcache_align 0 md_io/min_partial 5 md_io/objects 63852 N0=23256 N1=12138 N2=3264 N3=25194 md_io/object_size 40 md_io/objects_partial 0 md_io/objs_per_slab 102 md_io/order 0 md_io/partial 8 N0=3 N1=3 N3=2 md_io/poison 0 md_io/reclaim_account 0 md_io/red_zone 0 md_io/remote_node_defrag_ratio 100 md_io/sanity_checks 0 md_io/shrink md_io/slabs 634 N0=231 N1=122 N2=32 N3=249 md_io/slabs_cpu_partial 559(559) C0=21(21) C1=16(16) C2=12(12) C3=21(21) C4=7(7) C5=23(23) C6=19(19) C7=19(19) C8=5(5) C9=4(4) C10=3(3) C12=5(5) C13=6(6) C14=6(6) C18=1(1) C19=2(2) C24=19(19) C25=20(20) C26=15(15 ) C27=11(11) C28=17(17) C29=14(14) C30=23(23) C31=14(14) C32=21(21) C33=8(8) C34=19(19) C35=12(12) C36=19(19) C37=17(17) C38=15(15) C39=7(7) C40=3(3) C41=3(3) C42=1(1) C45=3(3) C46=5(5) C47=1(1) C48=1(1) C49=1(1) C53=1(1) C56=3(3) C57=5(5) C58=21(21) C59=25(25) C60=5(5) C61=25(25) C62=17(17) C63=18(18) md_io/slab_size 40 md_io/store_user 0 md_io/total_objects 64668 N0=23562 N1=12444 N2=3264 N3=25398 md_io/trace 0 md_io/usersize 0 md_io/validate $ watch -n0.2 'cat /proc/meminfo | paste - - | tee -a ~/meminfo' MemTotal: 528235648 kB MemFree: 20002732 kB MemAvailable: 483890268 kB Buffers: 7356 kB Cached: 495416180 kB SwapCached: 0 kB Active: 96396800 kB Inactive: 399891308 kB Active(anon): 10976 kB Inactive(anon): 890908 kB Active(file): 96385824 kB Inactive(file): 399000400 kB Unevictable: 78768 kB Mlocked: 78768 kB SwapTotal: 0 kB SwapFree: 0 kB Dirty: 88422072 kB Writeback: 948756 kB AnonPages: 945772 kB Mapped: 57300 kB Shmem: 26300 kB KReclaimable: 7248160 kB Slab: 7962748 kB SReclaimable: 7248160 kB SUnreclaim: 714588 kB KernelStack: 18288 kB PageTables: 10796 kB NFS_Unstable: 0 kB Bounce: 0 kB WritebackTmp: 0 kB CommitLimit: 264117824 kB Committed_AS: 21816824 kB VmallocTotal: 34359738367 kB VmallocUsed: 561588 kB VmallocChunk: 0 kB Percpu: 65792 kB HardwareCorrupted: 0 kB AnonHugePages: 0 kB ShmemHugePages: 0 kB ShmemPmdMapped: 0 kB FileHugePages: 0 kB FilePmdMapped: 0 kB HugePages_Total: 0 HugePages_Free: 0 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 2048 kB Hugetlb: 0 kB DirectMap4k: 541000 kB DirectMap2M: 11907072 kB DirectMap1G: 525336576 kB Regards, Paweł Wiejacha. On Mon, 12 Apr 2021 at 08:49, Song Liu <song@xxxxxxxxxx> wrote: > > On Fri, Apr 9, 2021 at 2:41 PM Paweł Wiejacha > <pawel.wiejacha@xxxxxxxxxxxx> wrote: > > > > Hello, > > > > Two of my machines constantly crash with a double fault like this: > > > > 1146 <0>[33685.629591] traps: PANIC: double fault, error_code: 0x0 > > 1147 <4>[33685.629593] double fault: 0000 [#1] SMP NOPTI > > 1148 <4>[33685.629594] CPU: 10 PID: 2118287 Comm: kworker/10:0 > > Tainted: P OE 5.11.8-051108-generic #202103200636 > > 1149 <4>[33685.629595] Hardware name: ASUSTeK COMPUTER INC. KRPG-U8 > > Series/KRPG-U8 Series, BIOS 4201 09/25/2020 > > 1150 <4>[33685.629595] Workqueue: xfs-conv/md12 xfs_end_io [xfs] > > 1151 <4>[33685.629596] RIP: 0010:__slab_free+0x23/0x340 > > 1152 <4>[33685.629597] Code: 4c fe ff ff 0f 1f 00 0f 1f 44 00 00 55 > > 48 89 e5 41 57 49 89 cf 41 56 49 89 fe 41 55 41 54 49 89 f4 53 48 83 > > e4 f0 48 83 ec 70 <48> 89 54 24 28 0f 1f 44 00 00 41 8b 46 28 4d 8b 6c > > 24 20 49 8b 5c > > 1153 <4>[33685.629598] RSP: 0018:ffffa9bc00848fa0 EFLAGS: 00010086 > > 1154 <4>[33685.629599] RAX: ffff94c04d8b10a0 RBX: ffff94437a34a880 > > RCX: ffff94437a34a880 > > 1155 <4>[33685.629599] RDX: ffff94437a34a880 RSI: ffffcec745e8d280 > > RDI: ffff944300043b00 > > 1156 <4>[33685.629599] RBP: ffffa9bc00849040 R08: 0000000000000001 > > R09: ffffffff82a5d6de > > 1157 <4>[33685.629600] R10: 0000000000000001 R11: 000000009c109000 > > R12: ffffcec745e8d280 > > 1158 <4>[33685.629600] R13: ffff944300043b00 R14: ffff944300043b00 > > R15: ffff94437a34a880 > > 1159 <4>[33685.629601] FS: 0000000000000000(0000) > > GS:ffff94c04d880000(0000) knlGS:0000000000000000 > > 1160 <4>[33685.629601] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > 1161 <4>[33685.629602] CR2: ffffa9bc00848f98 CR3: 000000014d04e000 > > CR4: 0000000000350ee0 > > 1162 <4>[33685.629602] Call Trace: > > 1163 <4>[33685.629603] <IRQ> > > 1164 <4>[33685.629603] ? kfree+0x3bc/0x3e0 > > 1165 <4>[33685.629603] ? mempool_kfree+0xe/0x10 > > 1166 <4>[33685.629603] ? mempool_kfree+0xe/0x10 > > 1167 <4>[33685.629604] ? mempool_free+0x2f/0x80 > > 1168 <4>[33685.629604] ? md_end_io+0x4a/0x70 > > 1169 <4>[33685.629604] ? bio_endio+0xdc/0x130 > > 1170 <4>[33685.629605] ? bio_chain_endio+0x2d/0x40 > > 1171 <4>[33685.629605] ? md_end_io+0x5c/0x70 > > 1172 <4>[33685.629605] ? bio_endio+0xdc/0x130 > > 1173 <4>[33685.629605] ? bio_chain_endio+0x2d/0x40 > > 1174 <4>[33685.629606] ? md_end_io+0x5c/0x70 > > 1175 <4>[33685.629606] ? bio_endio+0xdc/0x130 > > 1176 <4>[33685.629606] ? bio_chain_endio+0x2d/0x40 > > 1177 <4>[33685.629607] ? md_end_io+0x5c/0x70 > > ... repeated ... > > 1436 <4>[33685.629677] ? bio_endio+0xdc/0x130 > > 1437 <4>[33685.629677] ? bio_chain_endio+0x2d/0x40 > > 1438 <4>[33685.629677] ? md_end_io+0x5c/0x70 > > 1439 <4>[33685.629677] ? bio_endio+0xdc/0x130 > > 1440 <4>[33685.629678] ? bio_chain_endio+0x2d/0x40 > > 1441 <4>[33685.629678] ? md_ > > 1442 <4>[33685.629679] Lost 357 message(s)! > > > > This happens on: > > 5.11.8-051108-generic #202103200636 SMP Sat Mar 20 11:17:32 UTC 2021 > > and on 5.8.0-44-generic #50~20.04.1-Ubuntu > > (https://changelogs.ubuntu.com/changelogs/pool/main/l/linux/linux_5.8.0-44.50/changelog) > > which contains backported > > https://github.com/torvalds/linux/commit/41d2d848e5c09209bdb57ff9c0ca34075e22783d > > ("md: improve io stats accounting"). > > The 5.8.18-050818-generic #202011011237 SMP Sun Nov 1 12:40:15 UTC > > 2020 which does not contain above suspected change does not crash. > > > > If there's a better way/place to report this bug just let me know. If > > not, here are steps to reproduce: > > > > 1. Create a RAID 0 device using three Micron_9300_MTFDHAL7T6TDP disks. > > mdadm --create --verbose /dev/md12 --level=stripe --raid-devices=3 > > /dev/nvme0n1p1 /dev/nvme1n1p1 /dev/nvme2n1p1 > > > > 2. Setup xfs on it: > > mkfs.xfs /dev/md12 and mount it > > > > 3. Write to a file on this filesystem: > > while true; do rm -rf /mnt/md12/crash* ; for i in `seq 8`; do dd > > if=/dev/zero of=/mnt/md12/crash$i bs=32K count=50000000 & done; wait; > > done > > Wait for a crash (usually less than 20 min). > > > > I couldn't reproduce it with a single dd process (maybe I have to wait > > a little longer), but a single cat > > /very/large/file/on/cephfs/over100GbE > /mnt/md12/crash is enough for > > this double fault to occur. > > > > More info: > > This long mempool_kfree - md_end_io - * -md_end_io stack trace looks > > always the same, but the panic occurs in different places: > > > > pstore/6948115143318/dmesg.txt-<4>[545649.087998] CPU: 88 PID: 0 Comm: > > swapper/88 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6948377398316/dmesg.txt-<4>[11275.914909] CPU: 14 PID: 0 Comm: > > swapper/14 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6948532816002/dmesg.txt-<4>[33685.629594] CPU: 10 PID: 2118287 > > Comm: kworker/10:0 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6948532816002/dmesg.txt-<4>[33685.629595] Workqueue: > > xfs-conv/md12 xfs_end_io [xfs] > > pstore/6948855849083/dmesg.txt-<4>[42934.321129] CPU: 85 PID: 0 Comm: > > swapper/85 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6948876331782/dmesg.txt-<4>[ 3475.020672] CPU: 86 PID: 0 Comm: > > swapper/86 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6949083860307/dmesg.txt-<4>[43048.254375] CPU: 45 PID: 0 Comm: > > swapper/45 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6949091775931/dmesg.txt-<4>[ 1150.790240] CPU: 64 PID: 0 Comm: > > swapper/64 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6949123356826/dmesg.txt-<4>[ 6963.858253] CPU: 6 PID: 51 Comm: > > kworker/6:0 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6949123356826/dmesg.txt-<4>[ 6963.858255] Workqueue: ceph-msgr > > ceph_con_workfn [libceph] > > pstore/6949123356826/dmesg.txt-<4>[ 6963.858253] CPU: 6 PID: 51 Comm: > > kworker/6:0 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > pstore/6949123356826/dmesg.txt-<4>[ 6963.858255] Workqueue: ceph-msgr > > ceph_con_workfn [libceph] > > pstore/6949152322085/dmesg.txt-<4>[ 6437.077874] CPU: 59 PID: 0 Comm: > > swapper/59 Tainted: P OE 5.11.8-051108-generic > > #202103200636 > > > > cat /proc/cmdline > > BOOT_IMAGE=/vmlinuz-5.11.8-051108-generic > > root=/dev/mapper/ubuntu--vg-ubuntu--lv ro net.ifnames=0 biosdevname=0 > > strict-devmem=0 mitigations=off iommu=pt > > > > cat /proc/cpuinfo > > model name : AMD EPYC 7552 48-Core Processor > > > > cat /proc/mounts > > /dev/md12 /mnt/ssd1 xfs > > rw,noatime,attr2,inode64,logbufs=8,logbsize=32k,sunit=1024,swidth=3072,prjquota > > 0 0 > > > > Let me know if you need more information. > > Hi Pawel, > > Thanks for the report. Could you please try whether the following > change fixes the issue? > > Song > > diff --git i/drivers/md/md.c w/drivers/md/md.c > index bb09070a63bcf..889e9440a9f38 100644 > --- i/drivers/md/md.c > +++ w/drivers/md/md.c > @@ -5648,7 +5648,7 @@ static int md_alloc(dev_t dev, char *name) > */ > mddev->hold_active = UNTIL_STOP; > > - error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE, > + error = mempool_init_slab_pool(&mddev->md_io_pool, BIO_POOL_SIZE, > sizeof(struct md_io)); > if (error) > goto abort;