Stress test folio splits by using the debugfs interface to a target a new smaller folio order while running compaction at the same time. This is dangerous at the moment as its using a debugfs API which requires two out of tree fixes [0] [1] which have already been posted but not yet merged. With these debugfs patches applied this test can now be used to reproduce an issue which was only possible to reproduce by running generic/447 twice with min order: https://gist.github.com/mcgrof/d12f586ec6ebe32b2472b5d634c397df This is designed to try to exacerbate races with folio splits incurred by truncation and race that with compaction and writeback. This only creates a crash with min order enabled, so for example with a 16k block sized XFS test profile. This also begs the question if something like MADV_NOHUGEPAGE might be desirable from userspace, so to enable userspace to request splits when possible. If inspecting more closely, you'll want to enable on your kernel boot: dyndbg='file mm/huge_memory.c +p' Since we want to race large folio splits we also augment the full test output log $seqres.full with the test specific number of successful splits from vmstat thp_split_page and thp_split_page_failed. [0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/commit/?h=20240424-lbs&id=80f6df5037fd0ad560526af45bd7f4d779fe03f6 [1] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/commit/?h=20240424-lbs&id=38f6fac5b4283ea48b1876fc56728f062168f8c3 Signed-off-by: Luis Chamberlain <mcgrof@xxxxxxxxxx> --- For now at laest to allow people to more easily reproduce the crash we're discussing here: https://lkml.kernel.org/r/Zi8aYA92pvjDY7d5@xxxxxxxxxxxxxxxxxxxxxx I can clean this up based on Zorro's feedback after this. Posting this RFCv2 so to enable folks to more easily reproduce the issue and also the debugfs issue that this uses. common/rc | 20 ++++++++ tests/generic/745 | 115 ++++++++++++++++++++++++++++++++++++++++++ tests/generic/745.out | 2 + 3 files changed, 137 insertions(+) create mode 100755 tests/generic/745 create mode 100644 tests/generic/745.out diff --git a/common/rc b/common/rc index d4432f5ce259..1eefb53aa84b 100644 --- a/common/rc +++ b/common/rc @@ -127,6 +127,26 @@ _require_compaction() _notrun "Need compaction enabled CONFIG_COMPACTION=y" fi } + +# Requires CONFIG_DEBUGFS and truncation knobs +SPLIT_DEBUGFS="/sys/kernel/debug/split_huge_pages" +_require_split_debugfs() +{ + if [ ! -f $SPLIT_DEBUGFS ]; then + _notrun "Needs CONFIG_DEBUGFS and split_huge_pages" + fi +} + +_split_huge_pages_file_full() +{ + local file=$1 + local offset="0x0" + local len=$(printf "%x" $(stat --format='%s' $file)) + local order="0" + local split_cmd="$file,$offset,0x${len},$order" + echo $split_cmd > $SPLIT_DEBUGFS +} + # Get hugepagesize in bytes _get_hugepagesize() { diff --git a/tests/generic/745 b/tests/generic/745 new file mode 100755 index 000000000000..0c67bd990a2f --- /dev/null +++ b/tests/generic/745 @@ -0,0 +1,115 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2024 Luis Chamberlain. All Rights Reserved. +# +# FS QA Test No. 734 +# +# stress truncation + writeback + compaction +# +# This aims at trying to reproduce a difficult to reproduce bug found with +# min order. The root cause lies in compaction racing with truncation on +# min order: +# +# https://gist.github.com/mcgrof/d12f586ec6ebe32b2472b5d634c397df +# +# If you're enabling this and want to check underneath the hood you may want to +# enable: +# +# dyndbg='file mm/huge_memory.c +p' +# +# We want to increase the rate of successful truncations + compaction racing, +# so we want to increase the value of thp_split_page in $seqres.full. +# +# Our general goal here is to race with folio truncation + writeback and +# compaction. + +. ./common/preamble + +# This is dangerous_fuzzers fow now until we get the debugfs interface +# this uses fixed. Patches for that have been posted but still under +# review. +_begin_fstest long_rw stress soak smoketest dangerous_fuzzers + +# Override the default cleanup function. +_cleanup() +{ + cd / + rm -f $tmp.* + $KILLALL_PROG -9 fsstress > /dev/null 2>&1 +} + +# Import common functions. +. ./common/filter + +# real QA test starts here +_supported_fs generic +_require_test +_require_scratch +_require_split_debugfs +_require_compaction +_require_command "$KILLALL_PROG" "killall" + +echo "Silence is golden" + +_scratch_mkfs >>$seqres.full 2>&1 +_scratch_mount >> $seqres.full 2>&1 + +nr_cpus=$((LOAD_FACTOR * 4)) +nr_ops=$((25000 * nr_cpus * TIME_FACTOR)) + +fsstress_args=(-w -d $SCRATCH_MNT/test -n $nr_ops -p $nr_cpus) + +# used to let our loops know when to stop +runfile="$tmp.keep.running.loop" +touch $runfile + +# The background ops are out of bounds, the goal is to race with fsstress. + +# Force folio split if possible, this seems to be screaming for MADV_NOHUGEPAGE +# for large folios. +while [ -e $runfile ]; do + for i in $(find $SCRATCH_MNT/test \( -type f \) 2>/dev/null); do + _split_huge_pages_file_full $i >/dev/null 2>&1 + done + sleep 2 +done & +split_huge_pages_files_pid=$! + +while [ -e $runfile ]; do + echo 1 > /proc/sys/vm/compact_memory + sleep 10 +done & +compaction_pid=$! + +blocksize=$(_get_file_block_size $SCRATCH_MNT) +export XFS_DIO_MIN=$((blocksize * 2)) + +test -n "$SOAK_DURATION" && fsstress_args+=(--duration="$SOAK_DURATION") + +split_count_before=0 +split_count_failed_before=0 + +if grep -q thp_split_page /proc/vmstat; then + split_count_before=$(grep ^thp_split_page /proc/vmstat | head -1 | awk '{print $2}') + split_count_failed_before=$(grep ^thp_split_page_failed /proc/vmstat | head -1 | awk '{print $2}') +else + echo "no thp_split_page in /proc/vmstat" >> /proc/vmstat +fi + +$FSSTRESS_PROG $FSSTRESS_AVOID "${fsstress_args[@]}" >> $seqres.full + +rm -f $runfile +wait > /dev/null 2>&1 + +if grep -q thp_split_page /proc/vmstat; then + split_count_after=$(grep ^thp_split_page /proc/vmstat | head -1 | awk '{print $2}') + split_count_failed_after=$(grep ^thp_split_page_failed /proc/vmstat | head -1 | awk '{print $2}') + thp_split_page=$((split_count_after - split_count_before)) + thp_split_page_failed=$((split_count_failed_after - split_count_failed_before)) + + echo "vmstat thp_split_page: $thp_split_page" >> $seqres.full + echo "vmstat thp_split_page_failed: $thp_split_page_failed" >> $seqres.full +fi + +status=0 +exit diff --git a/tests/generic/745.out b/tests/generic/745.out new file mode 100644 index 000000000000..fce6b7f5489d --- /dev/null +++ b/tests/generic/745.out @@ -0,0 +1,2 @@ +QA output created by 745 +Silence is golden -- 2.43.0