A bug in md-bitmap has been discovered by setting up a md raid on top of nvme-tcp devices that has optimal io size larger than the allocated bitmap. The following test reproduce the bug by setting up a md-raid on top of nvme-tcp device over ram device that sets the optimal io size by using dm-stripe. Signed-off-by: Ofir Gal <ofir.gal@xxxxxxxxxxx> --- common/brd | 28 ++++++++++++++++ tests/md/001 | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ tests/md/001.out | 3 ++ tests/md/rc | 12 +++++++ 4 files changed, 128 insertions(+) create mode 100644 common/brd create mode 100755 tests/md/001 create mode 100644 tests/md/001.out create mode 100644 tests/md/rc diff --git a/common/brd b/common/brd new file mode 100644 index 0000000..31e964f --- /dev/null +++ b/common/brd @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2024 Ofir Gal +# +# brd helper functions + +. common/shellcheck + +_have_brd() { + _have_module brd +} + +_init_brd() { + # _have_brd loads brd, we need to wait a bit for brd to be not in use in + # order to reload it + sleep 0.2 + + if ! modprobe -r brd || ! modprobe brd "$@" ; then + echo "failed to reload brd with args: $*" + return 1 + fi + + return 0 +} + +_cleanup_brd() { + modprobe -r brd +} diff --git a/tests/md/001 b/tests/md/001 new file mode 100755 index 0000000..e9578e8 --- /dev/null +++ b/tests/md/001 @@ -0,0 +1,85 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2024 Ofir Gal +# +# The bug is "visible" only when the underlying device of the raid is a network +# block device that utilize MSG_SPLICE_PAGES. nvme-tcp is used as the network device. +# +# Regression test for patch "md/md-bitmap: fix writing non bitmap pages" and +# for patch "nvme-tcp: use sendpages_ok() instead of sendpage_ok()" + +. tests/md/rc +. common/brd +. common/nvme + +DESCRIPTION="Raid with bitmap on tcp nvmet with opt-io-size over bitmap size" +QUICK=1 + +#restrict test to nvme-tcp only +nvme_trtype=tcp +nvmet_blkdev_type="device" + +requires() { + # Require dm-stripe + _have_program dmsetup + _have_driver dm-mod + + _require_nvme_trtype tcp + _have_brd +} + +# Sets up a brd device of 1G with optimal-io-size of 256K +setup_underlying_device() { + if ! _init_brd rd_size=1048576 rd_nr=1; then + return 1 + fi + + dmsetup create ram0_big_optio --table \ + "0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0" +} + +cleanup_underlying_device() { + dmsetup remove ram0_big_optio + _cleanup_brd +} + +# Sets up a local host nvme over tcp +setup_nvme_over_tcp() { + _setup_nvmet + + local port + port="$(_create_nvmet_port "${nvme_trtype}")" + + _create_nvmet_subsystem "${def_subsysnqn}" "/dev/mapper/ram0_big_optio" "${def_subsys_uuid}" + _add_nvmet_subsys_to_port "${port}" "${def_subsysnqn}" + + _create_nvmet_host "${def_subsysnqn}" "${def_hostnqn}" + + _nvme_connect_subsys +} + +cleanup_nvme_over_tcp() { + _nvme_disconnect_subsys + _nvmet_target_cleanup --subsysnqn "${def_subsysnqn}" +} + +test() { + echo "Running ${TEST_NAME}" + + setup_underlying_device + setup_nvme_over_tcp + + local ns + ns=$(_find_nvme_ns "${def_subsys_uuid}") + + # Hangs here without the fix + mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \ + --bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \ + /dev/"${ns}" missing + + mdadm --quiet --stop /dev/md/blktests_md + cleanup_nvme_over_tcp + cleanup_underlying_device + + echo "Test complete" +} diff --git a/tests/md/001.out b/tests/md/001.out new file mode 100644 index 0000000..23071ec --- /dev/null +++ b/tests/md/001.out @@ -0,0 +1,3 @@ +Running md/001 +disconnected 1 controller(s) +Test complete diff --git a/tests/md/rc b/tests/md/rc new file mode 100644 index 0000000..d492579 --- /dev/null +++ b/tests/md/rc @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2024 Ofir Gal +# +# Tests for md raid + +. common/rc + +group_requires() { + _have_root + _have_program mdadm +} -- 2.45.1