Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Fix bugs in Dihard 2018 #2897

Merged
merged 15 commits into from
Dec 3, 2018
Prev Previous commit
Next Next commit
change --channel to --rttm-channel, add --apply-deltas options
  • Loading branch information
hzili1 committed Nov 12, 2018
commit b54abb10fddaaef2c9420bf1c589093f54369076
7 changes: 4 additions & 3 deletions egs/callhome_diarization/v1/diarization/cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ stage=0
nj=10
cleanup=true
threshold=0.5
channel=0
rttm_channel=0
read_costs=false
reco2num_spk=
# End configuration section.
Expand All @@ -36,7 +36,8 @@ if [ $# != 2 ]; then
echo " --threshold <threshold|0> # Cluster stopping criterion. Clusters with scores greater"
echo " # than this value will be merged until all clusters"
echo " # exceed this value."
echo " --channel <channel|0> # Channel information on the rttm file"
echo " --rttm-channel <rttm-channel|0> # The value passed into the RTTM channel field. Only affects"
echo " # the format of the RTTM file."
echo " --read-costs <read-costs|false> # If true, interpret input scores as costs, i.e. similarity"
echo " # is indicated by smaller values. If enabled, clusters will"
echo " # be merged until all cluster scores are less than the"
Expand Down Expand Up @@ -88,7 +89,7 @@ fi

if [ $stage -le 2 ]; then
echo "$0: computing RTTM"
diarization/make_rttm.py --channel $channel $srcdir/segments $dir/labels $dir/rttm || exit 1;
diarization/make_rttm.py --rttm-channel $rttm_channel $srcdir/segments $dir/labels $dir/rttm || exit 1;
fi

if $cleanup ; then
Expand Down
16 changes: 15 additions & 1 deletion egs/callhome_diarization/v1/diarization/extract_ivectors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
posterior_scale=1.0 # This scale helps to control for successve features being highly
# correlated. E.g. try 0.1 or 0.3.
apply_cmn=true # If true, apply sliding window cepstral mean normalization
apply_deltas=true # If true, copy the delta options from the i-vector extractor directory.
# If false, we won't add deltas in this step. For speaker diarization,
# we sometimes need to write features to disk that already have various
# post-processing applied so adding deltas is no longer needed in this stage.
# End configuration section.

echo "$0 $@" # Print the command line for logging
Expand Down Expand Up @@ -57,6 +61,12 @@ if [ $# != 3 ]; then
echo " --min-post <min-post|0.025> # Pruning threshold for posteriors"
echo " --apply-cmn <true,false|true> # if true, apply sliding window cepstral mean"
echo " # normalization to features"
echo " --apply-deltas <true,false|true> # If true, copy the delta options from the i-vector"
echo " # extractor directory. If false, we won't add deltas"
echo " # in this step. For speaker diarization, we sometimes"
echo " # need to write features to disk that already have"
echo " # various post-processing applied so adding deltas is"
echo " # no longer needed in this stage."
exit 1;
fi

Expand Down Expand Up @@ -95,7 +105,11 @@ mkdir -p $dir/log
sub_sdata=$sub_data/split$nj;
utils/split_data.sh $sub_data $nj || exit 1;

delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
if $apply_deltas; then
delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
else
delta_opts="--delta-order=0"
fi

## Set up features.
if $apply_cmn; then
Expand Down
7 changes: 4 additions & 3 deletions egs/callhome_diarization/v1/diarization/make_rttm.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ def get_args():
help="Input labels file")
parser.add_argument("rttm_file", type=str,
help="Output RTTM file")
parser.add_argument("--channel", type=int, default=0,
help='Channel information')
parser.add_argument("--rttm-channel", type=int, default=0,
help="The value passed into the RTTM channel field. \
Only affects the format of the RTTM file.")

args = parser.parse_args()
return args
Expand Down Expand Up @@ -123,7 +124,7 @@ def main():
for i in range(1, len(segs)):
start, end, label = segs[i].strip().split(',')
print("SPEAKER {0} {1} {2:7.3f} {3:7.3f} <NA> <NA> {4} <NA> <NA>".format(
reco, args.channel, float(start), float(end)-float(start), label), file=rttm_writer)
reco, args.rttm_channel, float(start), float(end)-float(start), label), file=rttm_writer)

if __name__ == '__main__':
main()
22 changes: 11 additions & 11 deletions egs/dihard_2018/v1/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ fi

if [ $stage -le 4 ]; then
# Extract i-vectors for DIHARD 2018 development and evaluation set.
# We set apply-cmn false and delta-order 0 because we already add
# We set apply-cmn false and apply-deltas false because we already add
# deltas and apply cmn in stage 1.
local/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
--nj 40 --window 1.5 --period 0.75 --apply-cmn false --delta-order 0 \
diarization/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
--nj 40 --window 1.5 --period 0.75 --apply-cmn false --apply-deltas false \
--min-segment 0.5 $ivec_dir \
data/dihard_2018_dev_cmn $ivec_dir/ivectors_dihard_2018_dev

local/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
--nj 40 --window 1.5 --period 0.75 --apply-cmn false --delta-order 0 \
diarization/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
--nj 40 --window 1.5 --period 0.75 --apply-cmn false --apply-deltas false \
--min-segment 0.5 $ivec_dir \
data/dihard_2018_eval_cmn $ivec_dir/ivectors_dihard_2018_eval

Expand All @@ -133,8 +133,8 @@ if [ $stage -le 4 ]; then
# Extract i-vectors for the VoxCeleb, which is our PLDA training
# data. A long period is used here so that we don't compute too
# many i-vectors for each recording.
local/extract_ivectors.sh --cmd "$train_cmd --mem 25G" \
--nj 40 --window 3.0 --period 10.0 --min-segment 1.5 --apply-cmn false --delta-order 0 \
diarization/extract_ivectors.sh --cmd "$train_cmd --mem 25G" \
--nj 40 --window 3.0 --period 10.0 --min-segment 1.5 --apply-cmn false --apply-deltas false \
--hard-min true $ivec_dir \
data/train_cmn_segmented_128k $ivec_dir/ivectors_train_segmented_128k
fi
Expand Down Expand Up @@ -176,7 +176,7 @@ if [ $stage -le 7 ]; then
# set using some reasonable thresholds for a well-calibrated system.
for threshold in -0.5 -0.4 -0.3 -0.2 -0.1 -0.05 0 0.05 0.1 0.2 0.3 0.4 0.5; do
diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--threshold $threshold --channel 1 $ivec_dir/ivectors_dihard_2018_dev/plda_scores \
--threshold $threshold --rttm-channel 1 $ivec_dir/ivectors_dihard_2018_dev/plda_scores \
$ivec_dir/ivectors_dihard_2018_dev/plda_scores_t$threshold

md-eval.pl -r data/dihard_2018_dev/rttm \
Expand All @@ -194,14 +194,14 @@ if [ $stage -le 7 ]; then
echo "$best_threshold" > $ivec_dir/tuning/dihard_2018_dev_best

diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --channel 1 \
--threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
$ivec_dir/ivectors_dihard_2018_dev/plda_scores $ivec_dir/ivectors_dihard_2018_dev/plda_scores

# Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD
# 2018 development set. The DIHARD 2018 development set is used as the validation
# set to tune the parameters.
diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --channel 1 \
--threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
$ivec_dir/ivectors_dihard_2018_eval/plda_scores $ivec_dir/ivectors_dihard_2018_eval/plda_scores

mkdir -p $ivec_dir/results
Expand All @@ -222,7 +222,7 @@ if [ $stage -le 8 ]; then
# In this section, we show how to do the clustering if the number of speakers
# (and therefore, the number of clusters) per recording is known in advance.
diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--reco2num-spk data/dihard_2018_eval/reco2num_spk --channel 1 \
--reco2num-spk data/dihard_2018_eval/reco2num_spk --rttm-channel 1 \
$ivec_dir/ivectors_dihard_2018_eval/plda_scores $ivec_dir/ivectors_dihard_2018_eval/plda_scores_num_spk

md-eval.pl -r data/dihard_2018_eval/rttm \
Expand Down
1 change: 1 addition & 0 deletions egs/dihard_2018/v2/local/make_dihard_2018_dev.py
1 change: 1 addition & 0 deletions egs/dihard_2018/v2/local/make_dihard_2018_dev.sh
1 change: 1 addition & 0 deletions egs/dihard_2018/v2/local/make_dihard_2018_eval.py
1 change: 1 addition & 0 deletions egs/dihard_2018/v2/local/make_dihard_2018_eval.sh
1 change: 1 addition & 0 deletions egs/dihard_2018/v2/local/make_voxceleb1.pl
1 change: 1 addition & 0 deletions egs/dihard_2018/v2/local/make_voxceleb2.pl
8 changes: 4 additions & 4 deletions egs/dihard_2018/v2/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ if [ $stage -le 12 ]; then
# set using some reasonable thresholds for a well-calibrated system.
for threshold in -0.5 -0.4 -0.3 -0.2 -0.1 -0.05 0 0.05 0.1 0.2 0.3 0.4 0.5; do
diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--threshold $threshold --channel 1 $nnet_dir/xvectors_dihard_2018_dev/plda_scores \
--threshold $threshold --rttm-channel 1 $nnet_dir/xvectors_dihard_2018_dev/plda_scores \
$nnet_dir/xvectors_dihard_2018_dev/plda_scores_t$threshold

md-eval.pl -r data/dihard_2018_dev/rttm \
Expand All @@ -268,14 +268,14 @@ if [ $stage -le 12 ]; then
echo "$best_threshold" > $nnet_dir/tuning/dihard_2018_dev_best

diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --channel 1 \
--threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
$nnet_dir/xvectors_dihard_2018_dev/plda_scores $nnet_dir/xvectors_dihard_2018_dev/plda_scores

# Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD
# 2018 development set. The DIHARD 2018 development set is used as the validation
# set to tune the parameters.
diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --channel 1 \
--threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
$nnet_dir/xvectors_dihard_2018_eval/plda_scores $nnet_dir/xvectors_dihard_2018_eval/plda_scores

mkdir -p $nnet_dir/results
Expand All @@ -296,7 +296,7 @@ if [ $stage -le 13 ]; then
# In this section, we show how to do the clustering if the number of speakers
# (and therefore, the number of clusters) per recording is known in advance.
diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
--reco2num-spk data/dihard_2018_eval/reco2num_spk --channel 1 \
--reco2num-spk data/dihard_2018_eval/reco2num_spk --rttm-channel 1 \
$nnet_dir/xvectors_dihard_2018_eval/plda_scores $nnet_dir/xvectors_dihard_2018_eval/plda_scores_num_spk

md-eval.pl -r data/dihard_2018_eval/rttm \
Expand Down