On 7/19/24 06:28, Yu Kuai wrote: > Hi, > > 在 2024/07/16 19:50, Ofir Gal 写道: >> A bug in md-bitmap has been discovered by setting up a md raid on top of >> nvme-tcp devices that has optimal io size larger than the allocated >> bitmap. >> >> The following test reproduce the bug by setting up a md-raid on top of >> nvme-tcp device over ram device that sets the optimal io size by using >> dm-stripe. >> >> Signed-off-by: Ofir Gal <ofir.gal@xxxxxxxxxxx> >> --- >> common/brd | 28 ++++++++++++++++ >> tests/md/001 | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ >> tests/md/001.out | 3 ++ >> tests/md/rc | 12 +++++++ >> 4 files changed, 128 insertions(+) >> create mode 100644 common/brd >> create mode 100755 tests/md/001 >> create mode 100644 tests/md/001.out >> create mode 100644 tests/md/rc >> >> diff --git a/common/brd b/common/brd >> new file mode 100644 >> index 0000000..31e964f >> --- /dev/null >> +++ b/common/brd >> @@ -0,0 +1,28 @@ >> +#!/bin/bash >> +# SPDX-License-Identifier: GPL-3.0+ >> +# Copyright (C) 2024 Ofir Gal >> +# >> +# brd helper functions >> + >> +. common/shellcheck >> + >> +_have_brd() { >> + _have_module brd >> +} >> + >> +_init_brd() { >> + # _have_brd loads brd, we need to wait a bit for brd to be not in use in >> + # order to reload it >> + sleep 0.2 >> + >> + if ! modprobe -r brd || ! modprobe brd "$@" ; then >> + echo "failed to reload brd with args: $*" >> + return 1 >> + fi >> + >> + return 0 >> +} >> + >> +_cleanup_brd() { >> + modprobe -r brd >> +} >> diff --git a/tests/md/001 b/tests/md/001 >> new file mode 100755 >> index 0000000..e9578e8 >> --- /dev/null >> +++ b/tests/md/001 >> @@ -0,0 +1,85 @@ >> +#!/bin/bash >> +# SPDX-License-Identifier: GPL-3.0+ >> +# Copyright (C) 2024 Ofir Gal >> +# >> +# The bug is "visible" only when the underlying device of the raid is a network >> +# block device that utilize MSG_SPLICE_PAGES. nvme-tcp is used as the network device. >> +# >> +# Regression test for patch "md/md-bitmap: fix writing non bitmap pages" and >> +# for patch "nvme-tcp: use sendpages_ok() instead of sendpage_ok()" >> + >> +. tests/md/rc >> +. common/brd >> +. common/nvme >> + >> +DESCRIPTION="Raid with bitmap on tcp nvmet with opt-io-size over bitmap size" >> +QUICK=1 >> + >> +#restrict test to nvme-tcp only >> +nvme_trtype=tcp >> +nvmet_blkdev_type="device" >> + >> +requires() { >> + # Require dm-stripe >> + _have_program dmsetup >> + _have_driver dm-mod >> + >> + _require_nvme_trtype tcp >> + _have_brd >> +} >> + >> +# Sets up a brd device of 1G with optimal-io-size of 256K >> +setup_underlying_device() { >> + if ! _init_brd rd_size=1048576 rd_nr=1; then >> + return 1 >> + fi >> + >> + dmsetup create ram0_big_optio --table \ >> + "0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0" >> +} >> + >> +cleanup_underlying_device() { >> + dmsetup remove ram0_big_optio >> + _cleanup_brd >> +} > > This is okay for now, however, it'll will be greate to add a common > helper in rc, so that other test can reuse this in fulture. >> + >> +# Sets up a local host nvme over tcp >> +setup_nvme_over_tcp() { >> + _setup_nvmet >> + >> + local port >> + port="$(_create_nvmet_port "${nvme_trtype}")" >> + >> + _create_nvmet_subsystem "${def_subsysnqn}" "/dev/mapper/ram0_big_optio" "${def_subsys_uuid}" >> + _add_nvmet_subsys_to_port "${port}" "${def_subsysnqn}" >> + >> + _create_nvmet_host "${def_subsysnqn}" "${def_hostnqn}" >> + >> + _nvme_connect_subsys >> +} >> + >> +cleanup_nvme_over_tcp() { >> + _nvme_disconnect_subsys >> + _nvmet_target_cleanup --subsysnqn "${def_subsysnqn}" >> +} >> + >> +test() { >> + echo "Running ${TEST_NAME}" >> + >> + setup_underlying_device >> + setup_nvme_over_tcp >> + >> + local ns >> + ns=$(_find_nvme_ns "${def_subsys_uuid}") >> + >> + # Hangs here without the fix >> + mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \ >> + --bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \ >> + /dev/"${ns}" missing > > Perhaps add raid1 to requires()? Applied to v4 >> + >> + mdadm --quiet --stop /dev/md/blktests_md >> + cleanup_nvme_over_tcp >> + cleanup_underlying_device >> + >> + echo "Test complete" >> +} >> diff --git a/tests/md/001.out b/tests/md/001.out >> new file mode 100644 >> index 0000000..23071ec >> --- /dev/null >> +++ b/tests/md/001.out >> @@ -0,0 +1,3 @@ >> +Running md/001 >> +disconnected 1 controller(s) >> +Test complete >> diff --git a/tests/md/rc b/tests/md/rc >> new file mode 100644 >> index 0000000..d492579 >> --- /dev/null >> +++ b/tests/md/rc >> @@ -0,0 +1,12 @@ >> +#!/bin/bash >> +# SPDX-License-Identifier: GPL-3.0+ >> +# Copyright (C) 2024 Ofir Gal >> +# >> +# Tests for md raid >> + >> +. common/rc >> + >> +group_requires() { >> + _have_root >> + _have_program mdadm >> +} > > And md-mod here. Applied to v4. > > It's nice to start adding md tests here. > > Thanks, > Kuai >> >