From ab36598a6ef51f10efc4474b99e12216431dbd1c Mon Sep 17 00:00:00 2001 From: KimJeongSun <47556641+KimJeongSun@users.noreply.github.com> Date: Fri, 15 Nov 2019 11:48:00 +0900 Subject: [PATCH] [scripts] Improve how combine_ali_dirs.sh gets job-specific filenames (#3720) --- egs/wsj/s5/steps/combine_ali_dirs.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/combine_ali_dirs.sh b/egs/wsj/s5/steps/combine_ali_dirs.sh index b74b004cac6..39f2ff2b33b 100755 --- a/egs/wsj/s5/steps/combine_ali_dirs.sh +++ b/egs/wsj/s5/steps/combine_ali_dirs.sh @@ -166,10 +166,13 @@ do_combine() { # Merge (presumed already sorted) scp's into a single script. sort -m $temp_dir/$ark.*.scp > $temp_dir/$ark.scp || exit 1 + inputs=$(for n in `seq $nj`; do echo $temp_dir/$ark.$n.scp; done) + utils/split_scp.pl --utt2spk=$data/utt2spk $temp_dir/$ark.scp $inputs + echo "$0: Splitting combined $entities into $nj archives on speaker boundary." $cmd JOB=1:$nj $dest/log/chop_combined_$entities.JOB.log \ $copy_program \ - "scp:utils/split_scp.pl --utt2spk=$data/utt2spk --one-based -j $nj JOB $temp_dir/$ark.scp |" \ + "scp:$temp_dir/$ark.JOB.scp" \ "ark:| gzip -c > $dest/$ark.JOB.gz" || exit 1 # Get some interesting stats, and signal an error if error threshold exceeded.