[PATCH 2/2] block: add add_disk() error handling tests

Luis Chamberlain <mcgrof@xxxxxxxxxx> · Wed, 12 May 2021 06:14:20 +0000

If your kernel has the new CONFIG_FAIL_ADD_DISK which adds
error injection to the add_disk() paths, we'll run tests against
each possible test path and ensure that the kernel doesn't
crash / break / leak.

Signed-off-by: Luis Chamberlain <mcgrof@xxxxxxxxxx>
---
 tests/block/013     | 156 ++++++++++++++++++++++++++++++++++++++++++++
 tests/block/013.out |   4 +-
 2 files changed, 157 insertions(+), 3 deletions(-)
 create mode 100755 tests/block/013

diff --git a/tests/block/013 b/tests/block/013
new file mode 100755
index 0000000..a039048
--- /dev/null
+++ b/tests/block/013
@@ -0,0 +1,156 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2021 Luis Chamberlain <mcgrof@xxxxxxxxxx>
+
+# Trigger the block *add_disk()() error path. We used to not have
+# any error path handling for add_disk(). Only recently have we
+# added support for this. There are many places in which the
+# function call for add_disk() can fail, this test allows us to
+# force each path which could fail and test it.
+#
+# Correctness is verified by running the tests many times, looking
+# for kernel errors, crashes or memory leaks.
+
+. tests/block/rc
+. common/null_blk
+
+DESCRIPTION="trigger the add_disk() error paths"
+QUICK=1
+
+if [ -z "$SYS_DEBUGFS" ]; then
+	SYS_DEBUGFS="/sys/kernel/debug"
+fi
+
+if [ -z "$SYS_DEBUGFS_BLOCK_DIR" ]; then
+	SYS_DEBUGFS_BLOCK_DIR="$SYS_DEBUGFS/block"
+fi
+
+if [ -z "$BLOCK_ADD_DISK_FAIL_DEBUGFS_DIR" ]; then
+	BLOCK_ADD_DISK_FAIL_DEBUGFS_DIR="$SYS_DEBUGFS_BLOCK_DIR/config_fail_add_disk"
+fi
+
+if [ -z "$BLOCK_ADD_DISK_CONFIG_DIR" ]; then
+	BLOCK_ADD_DISK_CONFIG_DIR="$SYS_DEBUGFS_BLOCK_DIR/fail_add_disk"
+fi
+
+if [ -z "$KMEMLEAK_DEBUG_FILE" ]; then
+	KMEMLEAK_DEBUG_FILE="$SYS_DEBUGFS/kmemleak"
+fi
+
+requires() {
+	_have_kernel_option FAIL_ADD_DISK
+	_have_kernel_option FAULT_INJECTION_DEBUG_FS
+	_have_kernel_option DEBUG_KMEMLEAK
+	_have_null_blk
+}
+
+kmemleak_clear()
+{
+	echo clear > $KMEMLEAK_DEBUG_FILE
+}
+
+kmemleak_verify()
+{
+	echo scan > $KMEMLEAK_DEBUG_FILE
+	cat $KMEMLEAK_DEBUG_FILE
+}
+
+enable_add_disk_failures()
+{
+	echo 1 > $BLOCK_ADD_DISK_CONFIG_DIR/interval
+	echo 100 > $BLOCK_ADD_DISK_CONFIG_DIR/probability
+}
+
+disable_add_disk_failures()
+{
+	echo 0 > $BLOCK_ADD_DISK_CONFIG_DIR/interval
+	echo 0 > $BLOCK_ADD_DISK_CONFIG_DIR/probability
+}
+
+disable_add_disk_failure_paths()
+{
+	local fail_files_paths
+	local fail_path_bool
+
+	fail_files_paths=$(ls -1 $BLOCK_ADD_DISK_FAIL_DEBUGFS_DIR)
+
+	for fail_path_bool in ${fail_files_paths}; do
+		echo N > $BLOCK_ADD_DISK_FAIL_DEBUGFS_DIR/$fail_path_bool
+		if [[ $? -ne 0 ]]; then
+			echo "Could not disable $fail_path_bool"
+		fi
+	done
+}
+
+test_add_disk_failures()
+{
+	local fail_files
+	local fail_path
+	local leak_wc
+	local test_add_disk_cnt=$1
+
+	fail_files=$(ls -1 $BLOCK_ADD_DISK_FAIL_DEBUGFS_DIR)
+
+	# We enable only one failure path at a time. The order does not matter
+	# and so whatever order we get alphabetically is fine. We test for
+	# correctness by checking for crashes, and memory leaks.
+	for fail_path in ${fail_files}; do
+		disable_add_disk_failure_paths
+		echo Y > $BLOCK_ADD_DISK_FAIL_DEBUGFS_DIR/$fail_path
+		if [[ $? -ne 0 ]]; then
+			echo "Could not enable failure injection for $fail_path"
+			echo "on loop $test_add_disk_cnt"
+		fi
+
+		NULL_BLK_QUIET_MODPROBE="y"
+		if _init_null_blk queue_mode=2 ; then
+			echo "Loading of null_blk should have failed while testing"
+			echo "add_disk() failure on $fail_path loop test"
+			echo "number $test_add_disk_cnt"
+			return 1
+		fi
+		unset NULL_BLK_QUIET_MODPROBE
+
+		leak_wc=$(kmemleak_verify | wc -l)
+		if [[ $leak_wc -ne 0 ]]; then
+			echo "Memory leak detected while testing add_disk()"
+			echo "failure path on $fail_path loop test"
+			echo "number $test_add_disk_cnt"
+			kmemleak_verify
+		fi
+	done
+}
+
+test() {
+	local test_loop_cnt
+	local test_loop_max=5
+	local final_leak_w
+
+	enable_add_disk_failures
+	kmemleak_clear
+
+	for test_loop_cnt in $(seq 1 $test_loop_max); do
+		test_add_disk_failures $test_loop_cnt
+	done
+
+	disable_add_disk_failure_paths
+	disable_add_disk_failures
+
+	# Last check is just a sanity check to ensure we can still load
+	# the null block module correctly/
+	if ! _init_null_blk queue_mode=2; then
+		echo "Loading null_blk failed after disabling error injection"
+		return 1
+	fi
+
+	leak_wc=$(kmemleak_verify | wc -l)
+	if [[ $leak_wc -ne 0 ]]; then
+		echo "Memleak after disabling add_disk() error injection"
+		kmemleak_verify
+	fi
+	kmemleak_clear
+
+	_exit_null_blk
+
+	echo Passed
+}
diff --git a/tests/block/013.out b/tests/block/013.out
index 1a97562..863339f 100644
--- a/tests/block/013.out
+++ b/tests/block/013.out
@@ -1,3 +1 @@
-Running block/013
-Device or resource busy
-Test complete
+Passed
-- 
2.30.2