This test exposed a race condiiton when shutting down a request_queue with active IO against it and blkg association for the IOs [1]. The issue ended up being that while the request_queue will just start failing requests, blkg destruction sets the q->root_blkg to %NULL. This caused a NPE. This was fixed in [2]. So to help prevent this from happening again, integrate Ming's test into blktests so that it can more easily be ran. Here I've ported it to fit better into the blktests framework. [1] https://lore.kernel.org/lkml/20181205171039.73066-1-dennis@xxxxxxxxxx/ [2] https://lore.kernel.org/lkml/20181211230308.66276-1-dennis@xxxxxxxxxx/ Signed-off-by: Dennis Zhou <dennis@xxxxxxxxxx> Cc: Ming Lei <ming.lei@xxxxxxxxxx> --- v2: - Used helpers in common/scsi_debug per Omar's suggestion. tests/block/027 | 73 +++++++++++++++++++++++++++++++++++++++++++++ tests/block/027.out | 2 ++ 2 files changed, 75 insertions(+) create mode 100755 tests/block/027 create mode 100644 tests/block/027.out diff --git a/tests/block/027 b/tests/block/027 new file mode 100755 index 0000000..d5e1463 --- /dev/null +++ b/tests/block/027 @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2018 Ming Lei +# +# Regression test for commit 0273ac349f08 ("blkcg: handle dying request_queue +# when associating a blkg") +# +# This tries to expose the race condition between blkg association and +# request_queue shutdown. When a request_queue is shutdown, the corresponding +# blkgs are destroyed. Any further associations should fail gracefully and not +# cause a kernel panic. + +. tests/block/rc +. common/scsi_debug +. common/cgroup + +DESCRIPTION="test graceful shutdown of scsi_debug devices with running fio jobs" +QUICK=1 + +requires() { + _have_cgroup2_controller io && _have_scsi_debug && _have_fio +} + +scsi_debug_stress_remove() { + if ! _init_scsi_debug "$@"; then + return + fi + + # set higher aio limit + echo 524288 > /proc/sys/fs/aio-max-nr + + local dev fio_jobs scheds + local cnt=1 + for dev in "${SCSI_DEBUG_DEVICES[@]}"; do + # create fio job + fio_jobs=$fio_jobs" --name=job1 --filename=/dev/$dev " + + # set queue scheduler and queue_depth + queue_path=/sys/block/$dev/queue + scheds=($(sed 's/[][]//g' "$queue_path/scheduler")) + sched_idx=$((cnt % ${#scheds[@]})) + echo "${scheds[$sched_idx]}" > "$queue_path/scheduler" + echo $cnt > "$queue_path/../device/queue_depth" + cnt=$((cnt+1)) + done + + local num_jobs=4 runtime=21 + fio --rw=randread --size=128G --direct=1 --ioengine=libaio \ + --iodepth=2048 --numjobs=$num_jobs --bs=4k \ + --group_reporting=1 --group_reporting=1 --runtime=$runtime \ + --loops=10000 "$fio_jobs" > "$FULL" 2>&1 & + + sleep 7 + local device_path + for dev in "${SCSI_DEBUG_DEVICES[@]}"; do + # shutdown devices in progress + device_path=/sys/block/$dev/device + [ -f "$device_path/delete" ] && echo 1 > "$device_path/delete" + done + + wait + + _exit_scsi_debug +} + +test() { + echo "Running ${TEST_NAME}" + + scsi_debug_stress_remove virtual_gb=128 max_luns=21 ndelay=10000 \ + max_queue=110 + + echo "Test complete" +} diff --git a/tests/block/027.out b/tests/block/027.out new file mode 100644 index 0000000..b03498f --- /dev/null +++ b/tests/block/027.out @@ -0,0 +1,2 @@ +Running block/027 +Test complete -- 2.17.1