Re: [PATCH 25/40] fstests: scale some tests for high CPU count sanity

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]



On Wed, Nov 27, 2024 at 03:51:55PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Several tests use lots of processes to stress the filesystem. many
> of them haven't really considered what this means for running the
> test on high CPU machines (e.g. >32p) and the potential contention
> and performance issues this might trigger.
> 
> Some of these tests simply need to increase the size of the journal.
> Some need to run on filesystems with high inherent concurrency (e.g.
> larger AG count). Some need more efficient/faster file creation. And
> so on.
> 
> This commit is a collection of those sorts of changes to improve
> runtimes on high CPU count machines.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  src/aio-dio-regress/aio-last-ref-held-by-io.c | 5 ++++-
>  tests/generic/251                             | 5 ++++-
>  tests/generic/323                             | 7 +++++--
>  tests/generic/530                             | 2 +-
>  tests/generic/531                             | 8 +++++++-
>  tests/xfs/013                                 | 4 ++--
>  tests/xfs/076                                 | 6 +++---
>  tests/xfs/176                                 | 6 +++---
>  tests/xfs/297                                 | 4 +++-
>  tests/xfs/501                                 | 2 +-
>  tests/xfs/502                                 | 2 +-
>  11 files changed, 34 insertions(+), 17 deletions(-)
> 
> diff --git a/src/aio-dio-regress/aio-last-ref-held-by-io.c b/src/aio-dio-regress/aio-last-ref-held-by-io.c
> index a70f2a9b7..7106e30a9 100644
> --- a/src/aio-dio-regress/aio-last-ref-held-by-io.c
> +++ b/src/aio-dio-regress/aio-last-ref-held-by-io.c
> @@ -85,11 +85,14 @@ aio_test_thread(void *data)
>  	/*
>  	 * Problems have been easier to trigger when spreading the
>  	 * workload over the available CPUs.
> +	 *
> +	 * If CPU hotplug is active, this can randomly fail so dump the error
> +	 * to stderror so it can be filtered out easily by the caller.
>  	 */
>  	CPU_ZERO(&cpuset);
>  	CPU_SET(mycpu, &cpuset);
>  	if (sched_setaffinity(mytid, sizeof(cpuset), &cpuset)) {
> -		printf("FAILED to set thread %d to run on cpu %ld\n",
> +		fprintf(stderr, "FAILED to set thread %d to run on cpu %ld\n",
>  		       mytid, mycpu);
>  	}
>  
> diff --git a/tests/generic/251 b/tests/generic/251
> index b432fb119..98986469e 100755
> --- a/tests/generic/251
> +++ b/tests/generic/251
> @@ -175,9 +175,12 @@ nproc=20
>  # Copy $here to the scratch fs and make coipes of the replica.  The fstests
>  # output (and hence $seqres.full) could be in $here, so we need to snapshot
>  # $here before computing file checksums.
> +#
> +# $here/* as the files to copy so we avoid any .git directory that might be
> +# much, much larger than the rest of the fstests source tree we are copying.
>  content=$SCRATCH_MNT/orig
>  mkdir -p $content
> -cp -axT $here/ $content/
> +cp -ax $here/* $content/

Hi Dave,

Darrick sent a patch to review this line:
https://lore.kernel.org/fstests/173258395238.4031902.16373799205312238046.stgit@frogsfrogsfrogs/T/#u

Please help to review, if you don't need this change anymore, I'll fix this
conflict (by removing your above change on g/251) when I merge this patch.

Thanks,
Zorro

>  
>  mkdir -p $tmp
>  
> diff --git a/tests/generic/323 b/tests/generic/323
> index 457253fee..2dde04d06 100755
> --- a/tests/generic/323
> +++ b/tests/generic/323
> @@ -23,12 +23,15 @@ _require_aiodio aio-last-ref-held-by-io
>  testfile=$TEST_DIR/aio-testfile
>  $XFS_IO_PROG -ftc "pwrite 0 10m" $testfile | _filter_xfs_io
>  
> -$AIO_TEST 0 100 $testfile
> +# This can emit cpu affinity setting failures that aren't considered test
> +# failures but cause golden image failures. Redirect the test output to
> +# $seqres.full so that it is captured but doesn't directly cause test failures.
> +$AIO_TEST 0 100 $testfile 2>> $seqres.full
>  if [ $? -ne 0 ]; then
>  	exit $status
>  fi
>  
> -$AIO_TEST 1 100 $testfile
> +$AIO_TEST 1 100 $testfile 2>> $seqres.full
>  if [ $? -ne 0 ]; then
>  	exit $status
>  fi
> diff --git a/tests/generic/530 b/tests/generic/530
> index 2e47c3e0c..18256b870 100755
> --- a/tests/generic/530
> +++ b/tests/generic/530
> @@ -22,7 +22,7 @@ _require_scratch_shutdown
>  _require_metadata_journaling
>  _require_test_program "t_open_tmpfiles"
>  
> -_scratch_mkfs >> $seqres.full 2>&1
> +_scratch_mkfs "-l size=256m" >> $seqres.full 2>&1
>  _scratch_mount
>  
>  # Set ULIMIT_NOFILE to min(file-max / 2, 50000 files per LOAD_FACTOR)
> diff --git a/tests/generic/531 b/tests/generic/531
> index 0e3564fd4..ed6c3f911 100755
> --- a/tests/generic/531
> +++ b/tests/generic/531
> @@ -21,7 +21,13 @@ _require_scratch
>  _require_xfs_io_command "-T"
>  _require_test_program "t_open_tmpfiles"
>  
> -_scratch_mkfs >> $seqres.full 2>&1
> +# On high CPU count machines, this runs a -lot- of create and unlink
> +# concurrency. Set the filesytsem up to handle this.
> +if [ $FSTYP = "xfs" ]; then
> +	_scratch_mkfs "-d agcount=32" >> $seqres.full 2>&1
> +else
> +	_scratch_mkfs >> $seqres.full 2>&1
> +fi
>  _scratch_mount
>  
>  # Try to load up all the CPUs, two threads per CPU.
> diff --git a/tests/xfs/013 b/tests/xfs/013
> index fd3d8c64c..5a92ef084 100755
> --- a/tests/xfs/013
> +++ b/tests/xfs/013
> @@ -28,7 +28,7 @@ _create()
>  	mkdir -p $dir
>  	for i in $(seq 0 $count)
>  	do
> -		touch $dir/$i 2>&1 | filter_enospc
> +		echo -n > $dir/$i 2>&1 | filter_enospc
>  	done
>  }
>  
> @@ -42,7 +42,7 @@ _rand_replace()
>  	do
>  		file=$((RANDOM % count))
>  		rm -f $dir/$file
> -		touch $dir/$file 2>&1 | filter_enospc
> +		echo -n > $dir/$file 2>&1 | filter_enospc
>  	done
>  }
>  
> diff --git a/tests/xfs/076 b/tests/xfs/076
> index 840617ccb..e315a067c 100755
> --- a/tests/xfs/076
> +++ b/tests/xfs/076
> @@ -47,10 +47,10 @@ _alloc_inodes()
>  	dir=$1
>  
>  	i=0
> -	while [ true ]; do
> -		touch $dir/$i 2>> $seqres.full || break
> +	( while [ true ]; do
> +		echo -n > $dir/$i || break
>  		i=$((i + 1))
> -	done
> +	done ) >> $seqres.full 2>&1
>  }
>  
>  
> diff --git a/tests/xfs/176 b/tests/xfs/176
> index 8e5951ec1..1aa8cde38 100755
> --- a/tests/xfs/176
> +++ b/tests/xfs/176
> @@ -68,10 +68,10 @@ _alloc_inodes()
>  	dir=$1
>  
>  	i=0
> -	while [ true ]; do
> -		echo -n > $dir/$i >> $seqres.full 2>&1 || break
> +	( while [ true ]; do
> +		echo -n > $dir/$i || break
>  		i=$((i + 1))
> -	done
> +	done ) >> $seqres.full 2>&1
>  }
>  
>  # Find a sparse inode cluster after logend_agno/logend_agino.
> diff --git a/tests/xfs/297 b/tests/xfs/297
> index f9cd2ff12..af6af601a 100755
> --- a/tests/xfs/297
> +++ b/tests/xfs/297
> @@ -34,7 +34,9 @@ _scratch_mount
>  STRESS_DIR="$SCRATCH_MNT/testdir"
>  mkdir -p $STRESS_DIR
>  
> -_run_fsstress_bg -d $STRESS_DIR -n 1000 -p 1000 $FSSTRESS_AVOID
> +# turn off sync as this can lead to near deadlock conditions due to every
> +# fsstress process lockstepping against freeze on large CPU count machines
> +_run_fsstress_bg -d $STRESS_DIR -f sync=0 -n 1000 -p 1000 $FSSTRESS_AVOID
>  
>  # Freeze/unfreeze file system randomly
>  echo "Start freeze/unfreeze randomly" | tee -a $seqres.full
> diff --git a/tests/xfs/501 b/tests/xfs/501
> index 1da4cbf92..678c51b52 100755
> --- a/tests/xfs/501
> +++ b/tests/xfs/501
> @@ -33,7 +33,7 @@ _require_xfs_sysfs debug/log_recovery_delay
>  _require_scratch
>  _require_test_program "t_open_tmpfiles"
>  
> -_scratch_mkfs >> $seqres.full 2>&1
> +_scratch_mkfs "-l size=256m" >> $seqres.full 2>&1
>  _scratch_mount
>  
>  # Set ULIMIT_NOFILE to min(file-max / 2, 30000 files per LOAD_FACTOR)
> diff --git a/tests/xfs/502 b/tests/xfs/502
> index 52b8e95a2..10b0017f6 100755
> --- a/tests/xfs/502
> +++ b/tests/xfs/502
> @@ -23,7 +23,7 @@ _require_xfs_io_error_injection "iunlink_fallback"
>  _require_scratch
>  _require_test_program "t_open_tmpfiles"
>  
> -_scratch_mkfs | _filter_mkfs 2> $tmp.mkfs > /dev/null
> +_scratch_mkfs "-l size=256m" | _filter_mkfs 2> $tmp.mkfs > /dev/null
>  cat $tmp.mkfs >> $seqres.full
>  . $tmp.mkfs
>  
> -- 
> 2.45.2
> 
> 





[Index of Archives]     [Linux Filesystems Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux