Re: 5.19.0: dnf install hangs when system is under load

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2022/8/11 00:19, Chris Murphy wrote:
Downstream bug - 5.19.0: dnf install hangs when system is under load
https://bugzilla.redhat.com/show_bug.cgi?id=2117326

5.19.0-65.fc37.x86_64

Setup
btrfs raid10 on 8x plain partitions

Command
sudo dnf install pciutils

Reproducible:
About 1 in 3, correlates with the system being under heavy load, otherwise it's not happening

Get stuck at
Running scriptlet: sg3_utils-1.46-3.fc36.x86_64   2/2

ps aux status for dnf is D+, kill -9 does nothing, strace shows nothing. The hang last at least 10 minutes, didn't test beyond that.

Full dmesg with sysrq+w is attached to the bug report.

snippet

[ 2268.057017] sysrq: Show Blocked State
[ 2268.057866] task:kworker/u97:11  state:D stack:    0 pid:  340 ppid:     2 flags:0x00004000
[ 2268.058361] Workqueue: writeback wb_workfn (flush-btrfs-1)
[ 2268.058825] Call Trace:
[ 2268.059261]  <TASK>
[ 2268.059692]  __schedule+0x335/0x1240
[ 2268.060145]  ? __blk_mq_sched_dispatch_requests+0xe0/0x130
[ 2268.060611]  schedule+0x4e/0xb0
[ 2268.061059]  io_schedule+0x42/0x70
[ 2268.061473]  blk_mq_get_tag+0x10c/0x290

All the hanging processes are waiting at blk_mq_get_tag(), thus I'm not
sure if it's really btrfs, or something in the block layer.

Adding block layer guys into the thread.

Thanks,
Qu

[ 2268.061910]  ? dequeue_task_stop+0x70/0x70
[ 2268.062359]  __blk_mq_alloc_requests+0x16e/0x2a0
[ 2268.062797]  blk_mq_submit_bio+0x2a2/0x590
[ 2268.063226]  __submit_bio+0xf5/0x180
[ 2268.063660]  submit_bio_noacct_nocheck+0x1f9/0x2b0
[ 2268.064055]  btrfs_map_bio+0x170/0x410
[ 2268.064451]  btrfs_submit_data_bio+0x134/0x220
[ 2268.064859]  ? __mod_memcg_lruvec_state+0x93/0x110
[ 2268.065246]  submit_extent_page+0x17a/0x4b0
[ 2268.065637]  ? page_vma_mkclean_one.constprop.0+0x1b0/0x1b0
[ 2268.066018]  __extent_writepage_io.constprop.0+0x271/0x550
[ 2268.066363]  ? end_extent_writepage+0x100/0x100
[ 2268.066720]  ? writepage_delalloc+0x8a/0x180
[ 2268.067094]  __extent_writepage+0x115/0x490
[ 2268.067472]  extent_write_cache_pages+0x178/0x500
[ 2268.067889]  extent_writepages+0x60/0x140
[ 2268.068274]  do_writepages+0xac/0x1a0
[ 2268.068643]  __writeback_single_inode+0x3d/0x350
[ 2268.069022]  ? _raw_spin_lock+0x13/0x40
[ 2268.069419]  writeback_sb_inodes+0x1c5/0x460
[ 2268.069824]  __writeback_inodes_wb+0x4c/0xe0
[ 2268.070230]  wb_writeback+0x1c9/0x2a0
[ 2268.070622]  wb_workfn+0x298/0x490
[ 2268.070988]  process_one_work+0x1c7/0x380
[ 2268.071366]  worker_thread+0x4d/0x380
[ 2268.071775]  ? process_one_work+0x380/0x380
[ 2268.072179]  kthread+0xe9/0x110
[ 2268.072588]  ? kthread_complete_and_exit+0x20/0x20
[ 2268.073002]  ret_from_fork+0x22/0x30
[ 2268.073408]  </TASK>
[ 2268.073912] task:systemd-journal state:D stack:    0 pid: 1208 ppid:     1 flags:0x00000002
[ 2268.074334] Call Trace:
[ 2268.074756]  <TASK>
[ 2268.075165]  __schedule+0x335/0x1240
[ 2268.075586]  ? __blk_flush_plug+0xf2/0x130
[ 2268.075950]  schedule+0x4e/0xb0
[ 2268.076305]  io_schedule+0x42/0x70
[ 2268.076692]  folio_wait_bit_common+0x116/0x390
[ 2268.077075]  ? filemap_alloc_folio+0xc0/0xc0
[ 2268.077447]  filemap_fault+0xcf/0x980
[ 2268.077821]  __do_fault+0x36/0x130
[ 2268.078153]  do_fault+0x1da/0x440
[ 2268.078483]  __handle_mm_fault+0x9cf/0xed0
[ 2268.078837]  handle_mm_fault+0xae/0x280
[ 2268.079202]  do_user_addr_fault+0x1c5/0x670
[ 2268.079567]  exc_page_fault+0x70/0x170
[ 2268.079929]  asm_exc_page_fault+0x22/0x30
[ 2268.080282] RIP: 0033:0x7f3d8efd6519
[ 2268.080657] RSP: 002b:00007ffd4a073ac0 EFLAGS: 00010202
[ 2268.081017] RAX: 00007f3d8c6586d8 RBX: 00000000016bb6d8 RCX: 00007f3d8efe1309
[ 2268.081374] RDX: 00007ffd4a073ac0 RSI: 000055d6da7a52e8 RDI: 000055d6da7a52a0
[ 2268.081756] RBP: 00007ffd4a073b48 R08: 000055d6da7b7a00 R09: 000055d6da7aaea8
[ 2268.082147] R10: 0000000000000004 R11: 00007ffd4a073e00 R12: 000055d6da7aae70
[ 2268.082511] R13: 0000000000000001 R14: 00007ffd4a073ac0 R15: 00000000016bb6d8
[ 2268.082883]  </TASK>
[ 2268.083757] task:kworker/u97:0   state:D stack:    0 pid: 7664 ppid:     2 flags:0x00004000
[ 2268.084149] Workqueue: writeback wb_workfn (flush-btrfs-1)
[ 2268.084537] Call Trace:
[ 2268.084918]  <TASK>
[ 2268.085289]  __schedule+0x335/0x1240
[ 2268.085675]  ? __blk_flush_plug+0xf2/0x130
[ 2268.086056]  schedule+0x4e/0xb0
[ 2268.086406]  io_schedule+0x42/0x70
[ 2268.086786]  blk_mq_get_tag+0x10c/0x290
[ 2268.087160]  ? dequeue_task_stop+0x70/0x70
[ 2268.087539]  __blk_mq_alloc_requests+0x16e/0x2a0
[ 2268.087912]  blk_mq_submit_bio+0x2a2/0x590
[ 2268.088287]  __submit_bio+0xf5/0x180
[ 2268.088676]  submit_bio_noacct_nocheck+0x1f9/0x2b0
[ 2268.089052]  btrfs_map_bio+0x170/0x410
[ 2268.089426]  btrfs_submit_data_bio+0x134/0x220
[ 2268.089813]  ? __mod_memcg_lruvec_state+0x93/0x110
[ 2268.090192]  submit_extent_page+0x17a/0x4b0
[ 2268.090587]  ? page_vma_mkclean_one.constprop.0+0x1b0/0x1b0
[ 2268.090975]  __extent_writepage_io.constprop.0+0x271/0x550
[ 2268.091365]  ? end_extent_writepage+0x100/0x100
[ 2268.091757]  ? writepage_delalloc+0x8a/0x180
[ 2268.092141]  __extent_writepage+0x115/0x490
[ 2268.092534]  extent_write_cache_pages+0x178/0x500
[ 2268.092890]  extent_writepages+0x60/0x140
[ 2268.093247]  do_writepages+0xac/0x1a0
[ 2268.093624]  ? __blk_mq_sched_dispatch_requests+0xe0/0x130
[ 2268.094004]  __writeback_single_inode+0x3d/0x350
[ 2268.094376]  ? _raw_spin_lock+0x13/0x40
[ 2268.094765]  writeback_sb_inodes+0x1c5/0x460
[ 2268.095151]  __writeback_inodes_wb+0x4c/0xe0
[ 2268.095531]  wb_writeback+0x1c9/0x2a0
[ 2268.095913]  wb_workfn+0x306/0x490
[ 2268.096291]  process_one_work+0x1c7/0x380
[ 2268.096686]  worker_thread+0x4d/0x380
[ 2268.097070]  ? process_one_work+0x380/0x380
[ 2268.097448]  kthread+0xe9/0x110
[ 2268.097842]  ? kthread_complete_and_exit+0x20/0x20
[ 2268.098224]  ret_from_fork+0x22/0x30
[ 2268.098623]  </TASK>
[ 2268.099008] task:kworker/u97:7   state:D stack:    0 pid: 7665 ppid:     2 flags:0x00004000
[ 2268.099410] Workqueue: blkcg_punt_bio blkg_async_bio_workfn
[ 2268.099815] Call Trace:
[ 2268.100198]  <TASK>
[ 2268.100589]  __schedule+0x335/0x1240
[ 2268.100982]  ? __blk_mq_sched_dispatch_requests+0xe0/0x130
[ 2268.101377]  schedule+0x4e/0xb0
[ 2268.101772]  io_schedule+0x42/0x70
[ 2268.102153]  blk_mq_get_tag+0x10c/0x290
[ 2268.102537]  ? dequeue_task_stop+0x70/0x70
[ 2268.102902]  __blk_mq_alloc_requests+0x16e/0x2a0
[ 2268.103275]  blk_mq_submit_bio+0x2a2/0x590
[ 2268.103659]  __submit_bio+0xf5/0x180
[ 2268.104032]  submit_bio_noacct_nocheck+0x1f9/0x2b0
[ 2268.104408]  blkg_async_bio_workfn+0x66/0x90
[ 2268.104797]  process_one_work+0x1c7/0x380
[ 2268.105181]  worker_thread+0x1d6/0x380
[ 2268.105572]  ? process_one_work+0x380/0x380
[ 2268.105949]  kthread+0xe9/0x110
[ 2268.106319]  ? kthread_complete_and_exit+0x20/0x20
[ 2268.106667]  ret_from_fork+0x22/0x30
[ 2268.106882]  </TASK>



--
Chris Murphy




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux