Skip to content

Commit

Permalink
[egs] Add recipes for Speakers in the Wild (SITW) (#2422)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-ryan-snyder authored and danpovey committed May 24, 2018
1 parent 7051384 commit 447e964
Show file tree
Hide file tree
Showing 29 changed files with 1,277 additions and 1 deletion.
16 changes: 16 additions & 0 deletions egs/sitw/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

This directory (sitw) contains example scripts for the Speakers in the
Wild (SITW) Speaker Recognition Challenge. The SITW corpus is required,
and can be obtained by following the directions at the url
http://www.speech.sri.com/projects/sitw/

Additional data sources (e.g., VoxCeleb and MUSAN) are required to train
the systems in the subdirectories. See the corresponding README.txt files
in the subdirectories for more details.

Note: This recipe requires ffmpeg to be installed and its location included
in $PATH.

The subdirectories "v1" and so on are different speaker recognition
recipes. The recipe in v1 is a traditional i-vector system while the v2
recipe uses DNN embeddings called x-vectors.
14 changes: 14 additions & 0 deletions egs/sitw/v1/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

This is a traditional i-vector recipe for Speakers in the Wild. The
following datasets are used:

Evaluation

Speakers in the Wild http://www.speech.sri.com/projects/sitw

System Development

VoxCeleb 1 http://www.robots.ox.ac.uk/~vgg/data/voxceleb
VoxCeleb 2 http://www.robots.ox.ac.uk/~vgg/data/voxceleb2
MUSAN http://www.openslr.org/17
RIR_NOISES http://www.openslr.org/28
15 changes: 15 additions & 0 deletions egs/sitw/v1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export train_cmd="queue.pl --mem 4G"


7 changes: 7 additions & 0 deletions egs/sitw/v1/conf/mfcc.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--sample-frequency=16000
--frame-length=25 # the default is 25
--low-freq=20 # the default.
--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case).
--num-mel-bins=30
--num-ceps=24
--snip-edges=false
2 changes: 2 additions & 0 deletions egs/sitw/v1/conf/vad.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--vad-energy-threshold=5.5
--vad-energy-mean-scale=0.5
123 changes: 123 additions & 0 deletions egs/sitw/v1/local/make_musan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env python3
# Copyright 2015 David Snyder
# 2018 Ewald Enzinger
# Apache 2.0.
#
# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz.
#
# This file is meant to be invoked by make_musan.sh.

import os, sys

def process_music_annotations(path):
utt2spk = {}
utt2vocals = {}
lines = open(path, 'r').readlines()
for line in lines:
utt, genres, vocals, musician = line.rstrip().split()[:4]
# For this application, the musican ID isn't important
utt2spk[utt] = utt
utt2vocals[utt] = vocals == "Y"
return utt2spk, utt2vocals

def prepare_music(root_dir, use_vocals):
utt2vocals = {}
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
music_dir = os.path.join(root_dir, "music")
for root, dirs, files in os.walk(music_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
elif str(file) == "ANNOTATIONS":
utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
utt2spk.update(utt2spk_part)
utt2vocals.update(utt2vocals_part)
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2vocals:
if utt in utt2wav:
if use_vocals or not utt2vocals[utt]:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
return utt2spk_str, utt2wav_str

def prepare_speech(root_dir):
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
speech_dir = os.path.join(root_dir, "speech")
for root, dirs, files in os.walk(speech_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
utt2spk[utt] = utt
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2spk:
if utt in utt2wav:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
return utt2spk_str, utt2wav_str

def prepare_noise(root_dir):
utt2spk = {}
utt2wav = {}
num_good_files = 0
num_bad_files = 0
noise_dir = os.path.join(root_dir, "noise")
for root, dirs, files in os.walk(noise_dir):
for file in files:
file_path = os.path.join(root, file)
if file.endswith(".wav"):
utt = str(file).replace(".wav", "")
utt2wav[utt] = file_path
utt2spk[utt] = utt
utt2spk_str = ""
utt2wav_str = ""
for utt in utt2spk:
if utt in utt2wav:
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
num_good_files += 1
else:
print("Missing file", utt)
num_bad_files += 1
print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
return utt2spk_str, utt2wav_str

def main():
in_dir = sys.argv[1]
out_dir = sys.argv[2]
use_vocals = sys.argv[3] == "Y"
utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
wav_fi.write(utt2wav)
utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
utt2spk_fi.write(utt2spk)


if __name__=="__main__":
main()
39 changes: 39 additions & 0 deletions egs/sitw/v1/local/make_musan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
# Copy of egs/sre16/v1/local/make_musan.sh (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
#
# This script, called by ../run.sh, creates the MUSAN
# data directory. The required dataset is freely available at
# http://www.openslr.org/17/

set -e
in_dir=$1
data_dir=$2
use_vocals='Y'

mkdir -p local/musan.tmp

echo "Preparing ${data_dir}/musan..."
mkdir -p ${data_dir}/musan
local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}

utils/fix_data_dir.sh ${data_dir}/musan

grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
${data_dir}/musan ${data_dir}/musan_music
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
${data_dir}/musan ${data_dir}/musan_speech
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
${data_dir}/musan ${data_dir}/musan_noise

utils/fix_data_dir.sh ${data_dir}/musan_music
utils/fix_data_dir.sh ${data_dir}/musan_speech
utils/fix_data_dir.sh ${data_dir}/musan_noise

rm -rf local/musan.tmp

86 changes: 86 additions & 0 deletions egs/sitw/v1/local/make_sitw.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash
# Copyrigh 2017 Ignacio Viñals
# 2017-2018 David Snyder
#
# This script prepares the SITW data. It creates separate directories
# for dev enroll, eval enroll, dev test, and eval test. It also prepares
# multiple trials files, in the test directories, but we usually only use the
# core-core.lst list.

if [ $# != 2 ]; then
echo "Usage: make_sitw.sh <SITW_PATH> <this_out_dir>"
echo "E.g.: make_sitw.sh /export/corpora/SRI/sitw data"
exit 1
fi

in_dir=$1
out_dir=$2

# Prepare the enrollment data
for mode in dev eval; do
this_out_dir=${out_dir}/sitw_${mode}_enroll
mkdir -p $this_out_dir 2>/dev/null
WAVFILE=$this_out_dir/wav.scp
SPKFILE=$this_out_dir/utt2spk
MODFILE=$this_out_dir/utt2cond
rm $WAVFILE $SPKFILE $MODFILE 2>/dev/null
this_in_dir=${in_dir}/$mode

for enroll in core assist; do
cat $this_in_dir/lists/enroll-${enroll}.lst | \
while read line; do
wav_id=`echo $line| awk '{print $2}' |\
awk 'BEGIN{FS="[./]"}{print $(NF-1)}'`
spkr_id=`echo $line| awk '{print $1}'`
WAV=`echo $line | awk '{print this_in_dir"/"$2}' this_in_dir=$this_in_dir`
echo "${spkr_id}_${wav_id} sox -t flac $WAV -t wav -r 16k -b 16 - channels 1 |" >> $WAVFILE
echo "${spkr_id}_${wav_id} ${spkr_id}" >> $SPKFILE
echo "${spkr_id}_${wav_id} $enroll $mode" >> $MODFILE
done
done
utils/fix_data_dir.sh $this_out_dir
done

# Prepare the test data
for mode in dev eval; do
this_out_dir=${out_dir}/sitw_${mode}_test
mkdir -p $this_out_dir 2>/dev/null
WAVFILE=$this_out_dir/wav.scp
SPKFILE=$this_out_dir/utt2spk
MODFILE=$this_out_dir/utt2cond
rm $WAVFILE $SPKFILE $MODFILE 2>/dev/null
mkdir -p $this_out_dir/trials 2>/dev/null
mkdir -p $this_out_dir/trials/aux 2>/dev/null
this_in_dir=${in_dir}/$mode

for trial in core multi; do
cat $this_in_dir/lists/test-${trial}.lst | awk '{print $1,$2}' |\
while read line; do
wav_id=`echo $line | awk 'BEGIN{FS="[./]"} {print $(NF-1)}'`
WAV=`echo $line | awk '{print this_in_dir"/"$1}' this_in_dir=$this_in_dir`
echo "${wav_id} sox -t flac $WAV -t wav -r 16k -b 16 - channels 1 |" >> $WAVFILE
echo "${wav_id} ${wav_id}" >> $SPKFILE
echo "${wav_id} $trial $mode" >> $MODFILE
done
done

for trial in core-core core-multi assist-core assist-multi; do
cat $this_in_dir/keys/$trial.lst | sed 's@audio/@@g' | sed 's@.flac@@g' |\
awk '{if ($3=="tgt")
{print $1,$2,"target"}
else
{print $1,$2,"nontarget"}
}' > $this_out_dir/trials/${trial}.lst
done

for trial in $this_in_dir/keys/aux/* ; do
trial_name=`basename $trial`
cat $trial | sed 's@audio/@@g' | sed 's@.flac@@g' |\
awk '{if ($3=="tgt")
{print $1,$2,"target"}
else
{print $1,$2,"nontarget"}
}' > $this_out_dir/trials/aux/${trial_name}
done
utils/fix_data_dir.sh $this_out_dir
done
84 changes: 84 additions & 0 deletions egs/sitw/v1/local/make_voxceleb1.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/perl
#
# Copyright 2018 Ewald Enzinger
# 2018 David Snyder
#
# Usage: make_voxceleb1.pl /export/voxceleb1 data/
# Note that this script also downloads a list of speakers that overlap
# with our evaluation set, SITW. These speakers are removed from VoxCeleb1
# prior to preparing the dataset.

if (@ARGV != 2) {
print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
print STDERR "e.g. $0 /export/voxceleb1 data/\n";
exit(1);
}

($data_base, $out_dir) = @ARGV;
my $out_dir = "$out_dir/voxceleb1";

if (system("mkdir -p $out_dir") != 0) {
die "Error making directory $out_dir";
}

# This file provides the list of speakers that overlap between SITW and VoxCeleb1.
if (! -e "$out_dir/voxceleb1_sitw_overlap.txt") {
system("wget -O $out_dir/voxceleb1_sitw_overlap.txt http://www.openslr.org/resources/49/voxceleb1_sitw_overlap.txt");
}

# sitw_overlap contains the list of speakers that also exist in our evaluation set, SITW.
my %sitw_overlap = ();
open(OVERLAP, "<", "$out_dir/voxceleb1_sitw_overlap.txt") or die "Could not open the overlap file $out_dir/voxceleb1_sitw_overlap.txt";
while (<OVERLAP>) {
chomp;
my $spkr_id = $_;
$sitw_overlap{$spkr_id} = ();
}

opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!";
my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
closedir $dh;

open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";

foreach (@spkr_dirs) {
my $spkr_id = $_;
# Only keep the speaker if it isn't in the overlap list.
if (not exists $sitw_overlap{$spkr_id}) {
opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
closedir $dh;
foreach (@files) {
my $filename = $_;
my $rec_id = substr($filename, 0, 11);
my $segment = substr($filename, 12, 7);
my $utt_id = "$spkr_id-$rec_id-$segment";
my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
}
}

close(SPKR) or die;
close(WAV) or die;

if (system(
"utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
die "Error creating spk2utt file in directory $out_dir";
}
system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
die "Error validating directory $out_dir";
}

if (system(
"utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
die "Error creating spk2utt file in directory $out_dir";
}

system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
die "Error validating directory $out_dir";
}
Loading

0 comments on commit 447e964

Please sign in to comment.