Skip to content

Commit

Permalink
testsuite: add sched-simple alloc check test
Browse files Browse the repository at this point in the history
Problem: there is no test script specifically for checking that
sched-simple does not double-book resources.

Add t2304-sched-simple-alloc-check.t which uses the
alloc-check.so jobtap plugin.

Currently this just validates the alloc-check plugin and checks
that sched-simple doesn't suffer from the same bug as
flux-framework/flux-sched#1043 but other tests could be added as
needed.
  • Loading branch information
garlick committed Jul 5, 2023
1 parent e99ee12 commit 4ec4d4d
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 0 deletions.
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ TESTSCRIPTS = \
t2300-sched-simple.t \
t2302-sched-simple-up-down.t \
t2303-sched-hello.t \
t2304-sched-simple-alloc-check.t \
t2310-resource-module.t \
t2311-resource-drain.t \
t2312-resource-exclude.t \
Expand Down
79 changes: 79 additions & 0 deletions t/t2304-sched-simple-alloc-check.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/sh

test_description='check that sched-simple never double books resources'

# Append --logfile option if FLUX_TESTS_LOGFILE is set in environment:
test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile
. $(dirname $0)/sharness.sh

test_under_flux 1

# Verify that alloc-check plugin works using alloc-bypass
test_expect_success 'load alloc-bypass and alloc-check plugins' '
flux jobtap load alloc-bypass.so &&
flux jobtap load alloc-check.so
'
test_expect_success 'run an alloc-bypass sleep job' '
flux submit \
-vvv \
--wait-event=start \
--setattr=alloc-bypass.R="$(flux R encode -r 0)" \
-n 1 \
sleep inf
'
test_expect_success 'a regular job fails with an alloc-check exception' '
run_timeout 30 flux submit --flags=waitable -vvv \
--wait-event=exception \
-N1 /bin/true >bypass.jobid
'
test_expect_success 'flux job wait says the job failed' '
test_must_fail flux job wait -v $(cat bypass.jobid)
'
test_expect_success 'clean up jobs' '
flux cancel --all &&
flux queue drain
'
test_expect_success 'unload plugins' '
flux jobtap remove alloc-check.so &&
flux jobtap remove alloc-bypass.so
'

# Check that sched-simple doesn't suffer from time limit issue like
# flux-framework/flux-sched#1043
#
test_expect_success 'configure epilog with 2s delay' '
flux config load <<-EOT
[job-manager]
plugins = [
{ load = "perilog.so" },
]
epilog.command = [ "flux", "perilog-run", "epilog", "-e", "sleep,2" ]
EOT
'
test_expect_success 'load alloc-check plugin' '
flux jobtap load alloc-check.so
'
test_expect_success 'submit consecutive jobs that exceed their time limit' '
(for i in $(seq 5); \
do flux submit -N1 -x -t1s sleep inf; \
done) >jobids
'
test_expect_success 'wait for jobs to complete and capture their stderr' '
(for id in $(cat jobids); do \
flux job attach $id || true; \
done) 2>joberr
'
test_expect_success 'some jobs received timeout exception' '
grep "job.exception type=timeout" joberr
'
test_expect_success 'no jobs received alloc-check exception' '
test_must_fail grep "job.exception type=alloc-check" joberr
'
test_expect_success 'remove alloc-check plugin' '
flux jobtap remove alloc-check.so
'
test_expect_success 'undo epilog config' '
flux config load </dev/null
'

test_done

0 comments on commit 4ec4d4d

Please sign in to comment.