From 4ec4d4dad0ebefe1d9ca5f2967e382ae062a541c Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Tue, 4 Jul 2023 09:08:33 -0700 Subject: [PATCH] testsuite: add sched-simple alloc check test Problem: there is no test script specifically for checking that sched-simple does not double-book resources. Add t2304-sched-simple-alloc-check.t which uses the alloc-check.so jobtap plugin. Currently this just validates the alloc-check plugin and checks that sched-simple doesn't suffer from the same bug as flux-framework/flux-sched#1043 but other tests could be added as needed. --- t/Makefile.am | 1 + t/t2304-sched-simple-alloc-check.t | 79 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100755 t/t2304-sched-simple-alloc-check.t diff --git a/t/Makefile.am b/t/Makefile.am index ae216db81f1f..a0b273ab8a11 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -159,6 +159,7 @@ TESTSCRIPTS = \ t2300-sched-simple.t \ t2302-sched-simple-up-down.t \ t2303-sched-hello.t \ + t2304-sched-simple-alloc-check.t \ t2310-resource-module.t \ t2311-resource-drain.t \ t2312-resource-exclude.t \ diff --git a/t/t2304-sched-simple-alloc-check.t b/t/t2304-sched-simple-alloc-check.t new file mode 100755 index 000000000000..893d68182078 --- /dev/null +++ b/t/t2304-sched-simple-alloc-check.t @@ -0,0 +1,79 @@ +#!/bin/sh + +test_description='check that sched-simple never double books resources' + +# Append --logfile option if FLUX_TESTS_LOGFILE is set in environment: +test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile +. $(dirname $0)/sharness.sh + +test_under_flux 1 + +# Verify that alloc-check plugin works using alloc-bypass +test_expect_success 'load alloc-bypass and alloc-check plugins' ' + flux jobtap load alloc-bypass.so && + flux jobtap load alloc-check.so +' +test_expect_success 'run an alloc-bypass sleep job' ' + flux submit \ + -vvv \ + --wait-event=start \ + --setattr=alloc-bypass.R="$(flux R encode -r 0)" \ + -n 1 \ + sleep inf +' +test_expect_success 'a regular job fails with an alloc-check exception' ' + run_timeout 30 flux submit --flags=waitable -vvv \ + --wait-event=exception \ + -N1 /bin/true >bypass.jobid +' +test_expect_success 'flux job wait says the job failed' ' + test_must_fail flux job wait -v $(cat bypass.jobid) +' +test_expect_success 'clean up jobs' ' + flux cancel --all && + flux queue drain +' +test_expect_success 'unload plugins' ' + flux jobtap remove alloc-check.so && + flux jobtap remove alloc-bypass.so +' + +# Check that sched-simple doesn't suffer from time limit issue like +# flux-framework/flux-sched#1043 +# +test_expect_success 'configure epilog with 2s delay' ' + flux config load <<-EOT + [job-manager] + plugins = [ + { load = "perilog.so" }, + ] + epilog.command = [ "flux", "perilog-run", "epilog", "-e", "sleep,2" ] + EOT +' +test_expect_success 'load alloc-check plugin' ' + flux jobtap load alloc-check.so +' +test_expect_success 'submit consecutive jobs that exceed their time limit' ' + (for i in $(seq 5); \ + do flux submit -N1 -x -t1s sleep inf; \ + done) >jobids +' +test_expect_success 'wait for jobs to complete and capture their stderr' ' + (for id in $(cat jobids); do \ + flux job attach $id || true; \ + done) 2>joberr +' +test_expect_success 'some jobs received timeout exception' ' + grep "job.exception type=timeout" joberr +' +test_expect_success 'no jobs received alloc-check exception' ' + test_must_fail grep "job.exception type=alloc-check" joberr +' +test_expect_success 'remove alloc-check plugin' ' + flux jobtap remove alloc-check.so +' +test_expect_success 'undo epilog config' ' + flux config load