From: Dave Chinner <dchinner@xxxxxxxxxx> Now that we've factored the code that runs individual tests out of check, we can start using it in check-parallel instead of calling check to do so. I originally thought that: We can do this by implementing a _run_seq() callback that runs the individual test in it's own private namespace, using the generic test execution and status tracking to record what happens with each test that is run by a given runner. This allows the runner to destage a test at a time from the global list and execute it in a batched setup, all without requiring us to run check for each test. But, no, no we can't do that. We have to mount the test filesystem inside the new private mount namespace context, otherwise all the other runner contexts see it and we reintroduce shared mount namespace problems... Hence we need a helper script that sets up the test device and does all that admin stuff before it starts grabbing tests from the test list. This means that multiple tests will run inside the runner's private namespace; the helper is really now a very minimal version of check.... This requires that check-parallel also sets up the required environment for all tests to run in. We can do this now simply by sourcing common/config as this now sets up and exports the entire environment that tests rely on. Note: this change is mainly for RFC purposes right now - it still needs more cleanup and probably some splitting up before it is ready to go. I'm including it more as a informational "this is why various factoring and rearrangement has been done" patch than a full review request... Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- check | 29 ++---------- check-parallel | 75 ++++++++++++------------------ common/config | 6 +++ common/rc | 2 + common/test_exec | 29 +++++++++++- common/test_list | 6 +++ tests/xfs/271 | 2 - tools/run_test.sh | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 191 insertions(+), 74 deletions(-) create mode 100755 tools/run_test.sh diff --git a/check b/check index 106de0ee6..f032f9c48 100755 --- a/check +++ b/check @@ -25,15 +25,6 @@ _err_msg="" # start the initialisation work now iam=check.$$ -# mkfs.xfs uses the presence of both of these variables to enable formerly -# supported tiny filesystem configurations that fstests use for fuzz testing -# in a controlled environment -export MSGVERB="text:action" -export QA_CHECK_FS=${QA_CHECK_FS:=true} - -# number of diff lines from a failed test, 0 for whole output -export DIFF_LENGTH=${DIFF_LENGTH:=10} - rm -f $tmp.list $tmp.tmp $tmp.grep $here/$iam.out $tmp.report.* $tmp.arglist . ./common/exit @@ -188,6 +179,10 @@ fi # by default don't output timestamps _te_emit_timestamps=${TIMESTAMP:=} +# number of diff lines from a failed test, 0 for whole output +_te_diff_length=${DIFF_LENGTH:=10} + + # If the test config specified a soak test duration, see if there are any # unit suffixes that need converting to an integer seconds count. if [ -n "$SOAK_DURATION" ]; then @@ -245,21 +240,7 @@ _wrapup() fi needwrap=false elif $needwrap; then - if [ -f $check.time -a -f $tmp.time ]; then - cat $check.time $tmp.time \ - | $AWK_PROG ' - { t[$1] = $2 } - END { - if (NR > 0) { - for (i in t) print i " " t[i] - } - }' \ - | sort -n >$tmp.out - mv $tmp.out $check.time - if $OPTIONS_HAVE_SECTIONS; then - cp $check.time ${REPORT_DIR}/check.time - fi - fi + _te_time_report set +x _global_log "" diff --git a/check-parallel b/check-parallel index 1b67709a2..be3dfd346 100755 --- a/check-parallel +++ b/check-parallel @@ -18,13 +18,13 @@ run_section="" exclude_section="" iam="check-parallel" -tmp=/tmp/check-parallel.$$ -test_list="$tmp.test_list" +export here=`pwd` . ./common/exit . ./common/test_names . ./common/test_list . ./common/config-sections +. ./common/config usage() { @@ -155,6 +155,18 @@ elif [ -d "$basedir/runner-0/" ]; then prev_results=`ls -tr $basedir/runner-0/ | grep results | tail -1` fi +# We keep the test list in the base directory because it needs to be common +# across all test runners and their private namespaces. Because runners execute +# in private /tmp/ instances, we can't keep it there, so use the shared +# $basedir that hosts all the runners as the location of the test list. +test_list="$basedir/test_list" + +# Similarly, set our tmp dir to be under the shared basedir so that all the +# temp files that stuff like test execution use for expunge lists work +# appropriately. +export tmp="$basedir/tmp/check-parallel.$$" +mkdir -p $basedir/tmp + # grab all previously run tests and order them from highest runtime to lowest # We are going to try to run the longer tests first, hopefully so we can avoid # massive thundering herds trying to run lots of really short tests in parallel @@ -219,22 +231,6 @@ setup_test_list() echo $_tl_tests |sed -e 's/ /\n/g' | tac > $test_list } -# Grab the next test to be run from the tail of the file. -# Returns an empty string if there is no tests remaining to run. -# File operations are run under flock so concurrent gets are serialised against -# each other. -get_next_test() -{ - local test_file="$test_list.$1" - local test= - - flock 99 - test=$(tail -1 $test_file) - sed -i "\,$test,d" $test_file - flock -u 99 - echo $test -} - _create_loop_device() { @@ -260,32 +256,6 @@ _destroy_loop_device() losetup -d $dev || _fail "Cannot destroy loop device $dev" } -run_tests() -{ - local section="$1" - local logfile="$2" - - exec 99<>$tmp.test_list_lock - - local test_to_run=$(get_next_test $section) - - # Run the tests in it's own mount namespace, as per the comment below - # that precedes making the basedir a private mount. - # - # Similarly, we need to run check in it's own PID namespace so that - # operations like pkill only affect the runner instance, not globally - # kill processes from other check instances. - while [ -n "$test_to_run" ]; do - echo -n " $test_to_run " - unset FSTESTS_ISOL - if ! _tl_expunge_test $test_to_run; then - tools/run_privatens ./check -s $section $test_to_run >> $logfile 2>&1 - fi - - test_to_run=$(get_next_test $section) - done -} - runner_go() { @@ -301,6 +271,8 @@ runner_go() local _results=$me/results-$2 local section=$3 + export tmp=/tmp/check-parallel-runner-$id + mkdir -p $me xfs_io -f -c "truncate $TEST_DEV_SIZE" $_test @@ -323,6 +295,7 @@ runner_go() export LOGWRITES_DEV=$(_create_loop_device $_logwrites) export RESULT_BASE=$_results + export REPORT_DIR="$RESULT_BASE/$section" mkdir -p $TEST_DIR mkdir -p $SCRATCH_MNT @@ -338,7 +311,7 @@ runner_go() # export DUMP_CORRUPT_FS=1 - run_tests $section $_results/log + tools/run_test.sh $basedir $test_list.$section >> $_results/log 2>&1 wait sleep 1 @@ -351,6 +324,7 @@ runner_go() _destroy_loop_device $SCRATCH_RTDEV _destroy_loop_device $SCRATCH_LOGDEV _destroy_loop_device $LOGWRITES_DEV + rm -f $tmp.* grep -q Failures: $_results/log if [ $? -eq 0 ]; then @@ -366,6 +340,7 @@ run_section() local now="$2" local results="$basedir/*/results-$now" local i + local threads=$runners echo $run_section |grep -qw $section || return echo $exclude_section |grep -qw $section && return @@ -391,7 +366,15 @@ run_section() fi cp $test_list $test_list.$section - for ((i = 0; i < $runners; i++)); do + # only run as many runners are there are tests to run + i=$(cat $test_list.$section | wc -l) + if [ "$i" -lt "$runners" ]; then + threads=$i + fi + + echo Test list $test_list.$section contains $i tests + echo Running $threads tests concurrently + for ((i = 0; i < $threads; i++)); do runner_go $i $now $section & done wait diff --git a/common/config b/common/config index b93a6c0d3..149ef99c7 100644 --- a/common/config +++ b/common/config @@ -70,6 +70,12 @@ export LOAD_FACTOR=${LOAD_FACTOR:=1} export SOAK_DURATION=${SOAK_DURATION:=} export DEBUGFS_MNT=${DEBUGFS_MNT:="/sys/kernel/debug"} +# mkfs.xfs uses the presence of both of these variables to enable formerly +# supported tiny filesystem configurations that fstests use for fuzz testing +# in a controlled environment +export MSGVERB="text:action" +export QA_CHECK_FS=${QA_CHECK_FS:=true} + # some constants for overlayfs setup export OVL_UPPER="ovl-upper" export OVL_LOWER="ovl-lower" diff --git a/common/rc b/common/rc index be6cd92c4..5b7d2faf8 100644 --- a/common/rc +++ b/common/rc @@ -2936,6 +2936,8 @@ _require_xfs_io_command() _notrun "O_TMPFILE is not supported" ;; "fsmap") + df -h >> $seqres.full 2>&1 + df -i >> $seqres.full 2>&1 testio=`$XFS_IO_PROG -f -c "fsmap" $testfile 2>&1` echo $testio | grep -q "Inappropriate ioctl" && \ _notrun "xfs_io $command support is missing" diff --git a/common/test_exec b/common/test_exec index 63efa3d19..e9c80bd59 100644 --- a/common/test_exec +++ b/common/test_exec @@ -21,6 +21,12 @@ _te_notrun=() _te_loop_status=() _te_emit_timestamps="" _te_dry_run="" +_te_diff_length=10 + +# This is needed because callers don't always source common/report and +# if $do_report is undefined or true we'll try to call _make_testcase_report() +# without it being defined. +do_report=false _te_wipe_counters() { @@ -43,6 +49,25 @@ _te_timestamp() fi } +_te_time_report() +{ + if [ -f $check.time -a -f $tmp.time ]; then + cat $check.time $tmp.time \ + | $AWK_PROG ' + { t[$1] = $2 } + END { + if (NR > 0) { + for (i in t) print i " " t[i] + } + }' \ + | sort -n >$tmp.out + mv $tmp.out $check.time + if $OPTIONS_HAVE_SECTIONS; then + cp $check.time ${REPORT_DIR}/check.time + fi + fi +} + _te_check_filesystems() { local ret=0 @@ -330,10 +355,10 @@ _te_run_test() _dump_err "- output mismatch (see $seqres.out.bad)" mv $tmp.out $seqres.out.bad $diff $seq.out $seqres.out.bad | { - if test "$DIFF_LENGTH" -le 0; then + if test "$_te_diff_length" -le 0; then cat else - head -n "$DIFF_LENGTH" + head -n "$_te_diff_length" echo "..." echo "(Run '$diff $here/$seq.out $seqres.out.bad'" \ " to see the entire diff)" diff --git a/common/test_list b/common/test_list index 092b3ed17..4beb08b22 100644 --- a/common/test_list +++ b/common/test_list @@ -160,6 +160,11 @@ _tl_prepare_test_list() trim_test_list $list done + # Remove expunged tests + for f in "${_tl_exclude_tests[@]}"; do + trim_test_list tests/$f + done + # sort the list of tests into numeric order unless we're running tests # in the exact order specified if ! $_tl_exact_order; then @@ -194,6 +199,7 @@ _tl_expunge_test() return 1 } + _tl_setup_exclude_tests() { local list="$1" diff --git a/tests/xfs/271 b/tests/xfs/271 index 8a71746d6..ae4599282 100755 --- a/tests/xfs/271 +++ b/tests/xfs/271 @@ -22,8 +22,6 @@ _cleanup() _require_xfs_scratch_rmapbt _require_xfs_io_command "fsmap" -rm -f "$seqres.full" - echo "Format and mount" _scratch_mkfs > "$seqres.full" 2>&1 _scratch_mount diff --git a/tools/run_test.sh b/tools/run_test.sh new file mode 100755 index 000000000..75f7a7dcd --- /dev/null +++ b/tools/run_test.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 Red Hat, Inc. All Rights Reserved. +# +# Test run helper for check-parallel +# +# check-parallel sets up all the devices and environments needs to run tests, +# but we can't set up for a test until we are executing in a private pid/mount +# namespace. +# +# This means we cannot simply run the test itself in a private name space; they +# require things like the test device to already be mounted, and we require a +# private mount namespace before we start mounting devices. +# +# Hence we need a helper that first creates a private namespace, then +# does all the setup work for tests to run, then iterates tests until there +# are no more tests to run. +# +# The tests to run are held in a shared file in $basedir so that we set up +# a private /tmp for the namespace and no lose access to the test list. + +# re-execute in a private namespace as a first step +if [ -z "${FSTESTS_ISOL}" ]; then + if [ -z "$1" ] || [ "$1" = "--help" ]; then + echo "Usage: $0 basedir test_list " + exit 1 + fi + + if [ ! -d "$1" ]; then + echo "invalid basedir ($1) specified" + exit 1 + fi + + FSTESTS_ISOL=privatens exec "$(dirname "$0")/../src/nsexec" -z -m -p "$0" "$@" + exit $? +fi + +# Everything past this point runs in a private namespace. +# +# We set up private mounts for /proc and /tmp so they aren't visible outside +# this mount namespace and it's children. +for path in /proc /tmp; do + mountpoint "$path" >/dev/null && \ + mount --make-private "$path" +done +mount -t proc proc /proc +mount -t tmpfs tmpfs /tmp + +echo $PWD $FSTYP + +. ./common/exit +. ./common/test_names +. ./common/test_list +. ./common/test_exec +. ./common/rc + +basedir="$1" +test_file="$2" +_te_diff_length=${DIFF_LENGTH:=10} +export tmp=/tmp/run-helper.$$ + +# XXX: should be a _te_diff variable +diff='diff -u' + +# Grab the next test to be run from the tail of the file. +# Returns an empty string if there is no tests remaining to run. +# File operations are run under flock so concurrent gets are serialised against +# each other. +get_next_test() +{ + local test= + + flock 99 + test=$(tail -1 $test_file) + sed -i "\,$test,d" $test_file + flock -u 99 + echo $test +} + +_run_seq() +{ + ./$seq + return $? +} + +exec 99<>$test_file.lock + +# XXX - refactor this back to a test_exec variable +check="$RESULT_BASE/check" +touch $check.time + +init_rc + +test_to_run=$(get_next_test) +while [ -n "$test_to_run" ]; do + _te_run_test "tests/$test_to_run" + + test_to_run=$(get_next_test) +done + +_te_time_report + +if ((${#_te_try[*]} > 0)); then + echo "Ran: ${_te_try[*]}" +fi + +if ((${#_te_notrun[*]} > 0)); then + echo "Not run: ${_te_notrun[*]}" +fi + +if ((${#_te_bad[*]} > 0)); then + echo "Failures: ${_te_bad[*]}" + echo "Failed ${#_te_bad[*]} of ${#_te_try[*]} tests" +else + echo "Passed all ${#_te_try[*]} tests" +fi -- 2.45.2