Skip to content

Commit

Permalink
automatically find unused tcp port when mgpu train or test (open-mmla…
Browse files Browse the repository at this point in the history
…b#224)

* automatically find unused tcp port when mgpu train or test

* fixbug: set tcp_port before set python args
  • Loading branch information
jihanyang authored Aug 9, 2020
1 parent 0741976 commit aecc89c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
12 changes: 10 additions & 2 deletions tools/scripts/slurm_test_mgpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ PY_ARGS=${@:3}
JOB_NAME=eval
SRUN_ARGS=${SRUN_ARGS:-""}

PORT=$(( ( RANDOM % 10000 ) + 10000 ))
while true
do
PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
if [ "${status}" != "0" ]; then
break;
fi
done
echo $PORT

srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
Expand All @@ -18,5 +26,5 @@ srun -p ${PARTITION} \
--ntasks-per-node=${GPUS_PER_NODE} \
--kill-on-bad-exit=1 \
${SRUN_ARGS} \
python -u test.py --launcher slurm ${PY_ARGS} --tcp_port $PORT
python -u test.py --launcher slurm --tcp_port $PORT ${PY_ARGS}

10 changes: 9 additions & 1 deletion tools/scripts/slurm_train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,15 @@ GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
SRUN_ARGS=${SRUN_ARGS:-""}

PORT=$(( ( RANDOM % 10000 ) + 10000 ))
while true
do
PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
if [ "${status}" != "0" ]; then
break;
fi
done
echo $PORT

srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
Expand Down

0 comments on commit aecc89c

Please sign in to comment.