Add a test that repeatedly rescans nvme controllers while doing IO on an nvme namespace connected to these controllers. The purpose of the test is to make sure that no I/O errors or data corruption occurs because of the rescan operations. The test uses sub-second sleeps, which can't be easily accomplished in bash because of missing floating-point arithmetic (and because usleep(1) isn't portable). Therefore an awk program is used to trigger the device rescans. Link: https://lore.kernel.org/linux-nvme/20240822201413.112268-1-mwilck@xxxxxxxx/ Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> Reviewed-by: Nilay Shroff (nilay@xxxxxxxxxxxxx) --- v3: (all changes suggested by Shinichiro Kawasaki) - add "Link:" tag - add comment with patch description - declare variable "st" local - use "mapfile -t" for array assignment v2: - don't use usleep (Nilay Shroff). Use an awk program to do floating point arithmetic and achieve more accurate sub-second sleep times. - add 053.out (Nilay Shroff). --- tests/nvme/053 | 74 ++++++++++++++++++++++++++++++++++++++++++++++ tests/nvme/053.out | 2 ++ tests/nvme/rc | 18 +++++++++++ 3 files changed, 94 insertions(+) create mode 100755 tests/nvme/053 create mode 100644 tests/nvme/053.out diff --git a/tests/nvme/053 b/tests/nvme/053 new file mode 100755 index 0000000..df643ec --- /dev/null +++ b/tests/nvme/053 @@ -0,0 +1,74 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2024 Martin Wilck, SUSE LLC +# +# Repeatedly rescans nvme controllers while doing IO on an nvme namespace +# connected to these controllers, and make sure that no I/O errors or data +# corruption occurs. + +. tests/nvme/rc + +DESCRIPTION="test controller rescan under I/O load" +TIMED=1 +: "${TIMEOUT:=60}" + +rescan_controller() { + local path + path="$1/rescan_controller" + + [[ -f "$path" ]] || { + echo "cannot rescan $1" + return 1 + } + + awk -f "$TMPDIR/rescan.awk" \ + -v path="$path" -v timeout="$TIMEOUT" -v seed="$2" & +} + +create_rescan_script() { + cat >"$TMPDIR/rescan.awk" <<EOF +@load "time" + +BEGIN { + srand(seed); + finish = gettimeofday() + strtonum(timeout); + while (gettimeofday() < finish) { + sleep(0.1 + 5 * rand()); + printf("1\n") > path; + close(path); + } +} +EOF +} + +test_device() { + local -a ctrls + local i st + + echo "Running ${TEST_NAME}" + create_rescan_script + + mapfile -t ctrls < <(_nvme_get_ctrl_list) + _run_fio_verify_io --filename="$TEST_DEV" --time_based &> "$FULL" & + + for i in "${!ctrls[@]}"; do + rescan_controller "${ctrls[$i]}" "$i" + done + + while true; do + wait -n &>/dev/null + st=$? + case $st in + 127) + break + ;; + 0) + ;; + *) + echo "child process exited with $st!" + ;; + esac + done + + echo "Test complete" +} diff --git a/tests/nvme/053.out b/tests/nvme/053.out new file mode 100644 index 0000000..e8086ce --- /dev/null +++ b/tests/nvme/053.out @@ -0,0 +1,2 @@ +Running nvme/053 +Test complete diff --git a/tests/nvme/rc b/tests/nvme/rc index b702a57..a877de3 100644 --- a/tests/nvme/rc +++ b/tests/nvme/rc @@ -192,6 +192,24 @@ _test_dev_nvme_nsid() { cat "${TEST_DEV_SYSFS}/nsid" } +_nvme_get_ctrl_list() { + local subsys + local c + + subsys=$(readlink "${TEST_DEV_SYSFS}/device/subsystem") + case $subsys in + */nvme) + readlink -f "${TEST_DEV_SYSFS}/device" + ;; + */nvme-subsystem) + for c in "${TEST_DEV_SYSFS}"/device/nvme*; do + [[ -L "$c" ]] || continue + [[ -f "$c/dev" ]] && readlink -f "$c" + done + ;; + esac +} + _nvme_calc_rand_io_size() { local img_size_mb local io_size_kb -- 2.46.0