change --channel to --rttm-channel, add --apply-deltas options

kaldi-asr · danpovey · Dec 3, 2018 · Nov 5, 2018 · Nov 6, 2018 · Nov 6, 2018
commit b54abb10fddaaef2c9420bf1c589093f54369076
diff --git a/egs/callhome_diarization/v1/diarization/cluster.sh b/egs/callhome_diarization/v1/diarization/cluster.sh
@@ -14,7 +14,7 @@ stage=0
 nj=10
 cleanup=true
 threshold=0.5
-channel=0
+rttm_channel=0
 read_costs=false
 reco2num_spk=
 # End configuration section.
@@ -36,7 +36,8 @@ if [ $# != 2 ]; then
   echo "  --threshold <threshold|0>                        # Cluster stopping criterion. Clusters with scores greater"
   echo "                                                   # than this value will be merged until all clusters"
   echo "                                                   # exceed this value."
-  echo "  --channel <channel|0>                            # Channel information on the rttm file"
+  echo "  --rttm-channel <rttm-channel|0>                  # The value passed into the RTTM channel field. Only affects"
+  echo "                                                   # the format of the RTTM file."
   echo "  --read-costs <read-costs|false>                  # If true, interpret input scores as costs, i.e. similarity"
   echo "                                                   # is indicated by smaller values. If enabled, clusters will"
   echo "                                                   # be merged until all cluster scores are less than the"
@@ -88,7 +89,7 @@ fi
 
 if [ $stage -le 2 ]; then
   echo "$0: computing RTTM"
-  diarization/make_rttm.py --channel $channel $srcdir/segments $dir/labels $dir/rttm || exit 1;
+  diarization/make_rttm.py --rttm-channel $rttm_channel $srcdir/segments $dir/labels $dir/rttm || exit 1;
 fi
 
 if $cleanup ; then

diff --git a/egs/callhome_diarization/v1/diarization/extract_ivectors.sh b/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
@@ -29,6 +29,10 @@ min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
 posterior_scale=1.0 # This scale helps to control for successve features being highly
                     # correlated.  E.g. try 0.1 or 0.3.
 apply_cmn=true # If true, apply sliding window cepstral mean normalization
+apply_deltas=true # If true, copy the delta options from the i-vector extractor directory.
+                  # If false, we won't add deltas in this step. For speaker diarization,
+		  # we sometimes need to write features to disk that already have various
+		  # post-processing applied so adding deltas is no longer needed in this stage.
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -57,6 +61,12 @@ if [ $# != 3 ]; then
   echo "  --min-post <min-post|0.025>                      # Pruning threshold for posteriors"
   echo "  --apply-cmn <true,false|true>                    # if true, apply sliding window cepstral mean"
   echo "                                                   # normalization to features"
+  echo "  --apply-deltas <true,false|true>                 # If true, copy the delta options from the i-vector"
+  echo "                                                   # extractor directory. If false, we won't add deltas"
+  echo "                                                   # in this step. For speaker diarization, we sometimes"
+  echo "                                                   # need to write features to disk that already have"
+  echo "                                                   # various post-processing applied so adding deltas is"
+  echo "                                                   # no longer needed in this stage."
   exit 1;
 fi
 
@@ -95,7 +105,11 @@ mkdir -p $dir/log
 sub_sdata=$sub_data/split$nj;
 utils/split_data.sh $sub_data $nj || exit 1;
 
-delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
+if $apply_deltas; then
+  delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
+else
+  delta_opts="--delta-order=0"
+fi
 
 ## Set up features.
 if $apply_cmn; then

diff --git a/egs/callhome_diarization/v1/diarization/make_rttm.py b/egs/callhome_diarization/v1/diarization/make_rttm.py
@@ -51,8 +51,9 @@ def get_args():
                       help="Input labels file")
   parser.add_argument("rttm_file", type=str,
                       help="Output RTTM file")
-  parser.add_argument("--channel", type=int, default=0,
-                      help='Channel information')
+  parser.add_argument("--rttm-channel", type=int, default=0,
+                      help="The value passed into the RTTM channel field. \
+                      Only affects the format of the RTTM file.")
 
   args = parser.parse_args()
   return args
@@ -123,7 +124,7 @@ def main():
       for i in range(1, len(segs)):
         start, end, label = segs[i].strip().split(',')
         print("SPEAKER {0} {1} {2:7.3f} {3:7.3f} <NA> <NA> {4} <NA> <NA>".format(
-          reco, args.channel, float(start), float(end)-float(start), label), file=rttm_writer)
+          reco, args.rttm_channel, float(start), float(end)-float(start), label), file=rttm_writer)
 
 if __name__ == '__main__':
   main()
diff --git a/egs/dihard_2018/v1/run.sh b/egs/dihard_2018/v1/run.sh
@@ -116,15 +116,15 @@ fi
 
 if [ $stage -le 4 ]; then
   # Extract i-vectors for DIHARD 2018 development and evaluation set. 
-  # We set apply-cmn false and delta-order 0 because we already add 
+  # We set apply-cmn false and apply-deltas false because we already add
   # deltas and apply cmn in stage 1.
-  local/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
-    --nj 40 --window 1.5 --period 0.75 --apply-cmn false --delta-order 0 \
+  diarization/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
+    --nj 40 --window 1.5 --period 0.75 --apply-cmn false --apply-deltas false \
     --min-segment 0.5 $ivec_dir \
     data/dihard_2018_dev_cmn $ivec_dir/ivectors_dihard_2018_dev
 
-  local/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
-    --nj 40 --window 1.5 --period 0.75 --apply-cmn false --delta-order 0 \
+  diarization/extract_ivectors.sh --cmd "$train_cmd --mem 20G" \
+    --nj 40 --window 1.5 --period 0.75 --apply-cmn false --apply-deltas false \
     --min-segment 0.5 $ivec_dir \
     data/dihard_2018_eval_cmn $ivec_dir/ivectors_dihard_2018_eval
 
@@ -133,8 +133,8 @@ if [ $stage -le 4 ]; then
   # Extract i-vectors for the VoxCeleb, which is our PLDA training
   # data.  A long period is used here so that we don't compute too
   # many i-vectors for each recording.
-  local/extract_ivectors.sh --cmd "$train_cmd --mem 25G" \
-    --nj 40 --window 3.0 --period 10.0 --min-segment 1.5 --apply-cmn false --delta-order 0 \
+  diarization/extract_ivectors.sh --cmd "$train_cmd --mem 25G" \
+    --nj 40 --window 3.0 --period 10.0 --min-segment 1.5 --apply-cmn false --apply-deltas false \
     --hard-min true $ivec_dir \
     data/train_cmn_segmented_128k $ivec_dir/ivectors_train_segmented_128k
 fi
@@ -176,7 +176,7 @@ if [ $stage -le 7 ]; then
   # set using some reasonable thresholds for a well-calibrated system.
   for threshold in -0.5 -0.4 -0.3 -0.2 -0.1 -0.05 0 0.05 0.1 0.2 0.3 0.4 0.5; do
     diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-      --threshold $threshold --channel 1 $ivec_dir/ivectors_dihard_2018_dev/plda_scores \
+      --threshold $threshold --rttm-channel 1 $ivec_dir/ivectors_dihard_2018_dev/plda_scores \
       $ivec_dir/ivectors_dihard_2018_dev/plda_scores_t$threshold
 
     md-eval.pl -r data/dihard_2018_dev/rttm \
@@ -194,14 +194,14 @@ if [ $stage -le 7 ]; then
   echo "$best_threshold" > $ivec_dir/tuning/dihard_2018_dev_best
 
   diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-    --threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --channel 1 \
+    --threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
     $ivec_dir/ivectors_dihard_2018_dev/plda_scores $ivec_dir/ivectors_dihard_2018_dev/plda_scores
 
   # Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD 
   # 2018 development set. The DIHARD 2018 development set is used as the validation 
   # set to tune the parameters. 
   diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-    --threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --channel 1 \
+    --threshold $(cat $ivec_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
     $ivec_dir/ivectors_dihard_2018_eval/plda_scores $ivec_dir/ivectors_dihard_2018_eval/plda_scores
 
   mkdir -p $ivec_dir/results
@@ -222,7 +222,7 @@ if [ $stage -le 8 ]; then
   # In this section, we show how to do the clustering if the number of speakers
   # (and therefore, the number of clusters) per recording is known in advance.
   diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-    --reco2num-spk data/dihard_2018_eval/reco2num_spk --channel 1 \
+    --reco2num-spk data/dihard_2018_eval/reco2num_spk --rttm-channel 1 \
     $ivec_dir/ivectors_dihard_2018_eval/plda_scores $ivec_dir/ivectors_dihard_2018_eval/plda_scores_num_spk
 
   md-eval.pl -r data/dihard_2018_eval/rttm \

diff --git a/egs/dihard_2018/v2/local/make_dihard_2018_dev.py b/egs/dihard_2018/v2/local/make_dihard_2018_dev.py
@@ -0,0 +1 @@
+../../v1/local/make_dihard_2018_dev.py
diff --git a/egs/dihard_2018/v2/local/make_dihard_2018_dev.sh b/egs/dihard_2018/v2/local/make_dihard_2018_dev.sh
@@ -0,0 +1 @@
+../../v1/local/make_dihard_2018_dev.sh
diff --git a/egs/dihard_2018/v2/local/make_dihard_2018_eval.py b/egs/dihard_2018/v2/local/make_dihard_2018_eval.py
@@ -0,0 +1 @@
+../../v1/local/make_dihard_2018_eval.py
diff --git a/egs/dihard_2018/v2/local/make_dihard_2018_eval.sh b/egs/dihard_2018/v2/local/make_dihard_2018_eval.sh
@@ -0,0 +1 @@
+../../v1/local/make_dihard_2018_eval.sh
diff --git a/egs/dihard_2018/v2/local/make_voxceleb1.pl b/egs/dihard_2018/v2/local/make_voxceleb1.pl
@@ -0,0 +1 @@
+../../v1/local/make_voxceleb1.pl
diff --git a/egs/dihard_2018/v2/local/make_voxceleb2.pl b/egs/dihard_2018/v2/local/make_voxceleb2.pl
@@ -0,0 +1 @@
+../../v1/local/make_voxceleb2.pl
diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh
@@ -250,7 +250,7 @@ if [ $stage -le 12 ]; then
   # set using some reasonable thresholds for a well-calibrated system.
   for threshold in -0.5 -0.4 -0.3 -0.2 -0.1 -0.05 0 0.05 0.1 0.2 0.3 0.4 0.5; do
     diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-      --threshold $threshold --channel 1 $nnet_dir/xvectors_dihard_2018_dev/plda_scores \
+      --threshold $threshold --rttm-channel 1 $nnet_dir/xvectors_dihard_2018_dev/plda_scores \
       $nnet_dir/xvectors_dihard_2018_dev/plda_scores_t$threshold
 
     md-eval.pl -r data/dihard_2018_dev/rttm \
@@ -268,14 +268,14 @@ if [ $stage -le 12 ]; then
   echo "$best_threshold" > $nnet_dir/tuning/dihard_2018_dev_best
 
   diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-    --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --channel 1 \
+    --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
     $nnet_dir/xvectors_dihard_2018_dev/plda_scores $nnet_dir/xvectors_dihard_2018_dev/plda_scores
 
   # Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD 
   # 2018 development set. The DIHARD 2018 development set is used as the validation 
   # set to tune the parameters. 
   diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-    --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --channel 1 \
+    --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
     $nnet_dir/xvectors_dihard_2018_eval/plda_scores $nnet_dir/xvectors_dihard_2018_eval/plda_scores
 
   mkdir -p $nnet_dir/results
@@ -296,7 +296,7 @@ if [ $stage -le 13 ]; then
   # In this section, we show how to do the clustering if the number of speakers
   # (and therefore, the number of clusters) per recording is known in advance.
   diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
-    --reco2num-spk data/dihard_2018_eval/reco2num_spk --channel 1 \
+    --reco2num-spk data/dihard_2018_eval/reco2num_spk --rttm-channel 1 \
     $nnet_dir/xvectors_dihard_2018_eval/plda_scores $nnet_dir/xvectors_dihard_2018_eval/plda_scores_num_spk
 
   md-eval.pl -r data/dihard_2018_eval/rttm \