forked from kaldi-asr/kaldi
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[egs] Minor fixes to x-vector based recipes (kaldi-asr#2426)
Add missing data prep scripts for MUSAN for callhome_diarization; Copy vad.scp and segments to *_cmn data folders after prepare_feats; Fix check before create_split_dir
- Loading branch information
Showing
10 changed files
with
168 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright 2015 David Snyder | ||
# Apache 2.0. | ||
# | ||
# This file is meant to be invoked by make_musan.sh. | ||
|
||
import os, sys | ||
|
||
def process_music_annotations(path): | ||
utt2spk = {} | ||
utt2vocals = {} | ||
lines = open(path, 'r').readlines() | ||
for line in lines: | ||
utt, genres, vocals, musician = line.rstrip().split()[:4] | ||
# For this application, the musican ID isn't important | ||
utt2spk[utt] = utt | ||
utt2vocals[utt] = vocals == "Y" | ||
return utt2spk, utt2vocals | ||
|
||
def prepare_music(root_dir, use_vocals): | ||
utt2vocals = {} | ||
utt2spk = {} | ||
utt2wav = {} | ||
num_good_files = 0 | ||
num_bad_files = 0 | ||
music_dir = os.path.join(root_dir, "music") | ||
for root, dirs, files in os.walk(music_dir): | ||
for file in files: | ||
file_path = os.path.join(root, file) | ||
if file.endswith(".wav"): | ||
utt = str(file).replace(".wav", "") | ||
utt2wav[utt] = file_path | ||
elif str(file) == "ANNOTATIONS": | ||
utt2spk_part, utt2vocals_part = process_music_annotations(file_path) | ||
utt2spk.update(utt2spk_part) | ||
utt2vocals.update(utt2vocals_part) | ||
utt2spk_str = "" | ||
utt2wav_str = "" | ||
for utt in utt2vocals: | ||
if utt in utt2wav: | ||
if use_vocals or not utt2vocals[utt]: | ||
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" | ||
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" | ||
num_good_files += 1 | ||
else: | ||
print("Missing file", utt) | ||
num_bad_files += 1 | ||
print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data") | ||
return utt2spk_str, utt2wav_str | ||
|
||
def prepare_speech(root_dir): | ||
utt2spk = {} | ||
utt2wav = {} | ||
num_good_files = 0 | ||
num_bad_files = 0 | ||
speech_dir = os.path.join(root_dir, "speech") | ||
for root, dirs, files in os.walk(speech_dir): | ||
for file in files: | ||
file_path = os.path.join(root, file) | ||
if file.endswith(".wav"): | ||
utt = str(file).replace(".wav", "") | ||
utt2wav[utt] = file_path | ||
utt2spk[utt] = utt | ||
utt2spk_str = "" | ||
utt2wav_str = "" | ||
for utt in utt2spk: | ||
if utt in utt2wav: | ||
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" | ||
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" | ||
num_good_files += 1 | ||
else: | ||
print("Missing file", utt) | ||
num_bad_files += 1 | ||
print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data") | ||
return utt2spk_str, utt2wav_str | ||
|
||
def prepare_noise(root_dir): | ||
utt2spk = {} | ||
utt2wav = {} | ||
num_good_files = 0 | ||
num_bad_files = 0 | ||
noise_dir = os.path.join(root_dir, "noise") | ||
for root, dirs, files in os.walk(noise_dir): | ||
for file in files: | ||
file_path = os.path.join(root, file) | ||
if file.endswith(".wav"): | ||
utt = str(file).replace(".wav", "") | ||
utt2wav[utt] = file_path | ||
utt2spk[utt] = utt | ||
utt2spk_str = "" | ||
utt2wav_str = "" | ||
for utt in utt2spk: | ||
if utt in utt2wav: | ||
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" | ||
utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" | ||
num_good_files += 1 | ||
else: | ||
print("Missing file", utt) | ||
num_bad_files += 1 | ||
print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data") | ||
return utt2spk_str, utt2wav_str | ||
|
||
def main(): | ||
in_dir = sys.argv[1] | ||
out_dir = sys.argv[2] | ||
use_vocals = sys.argv[3] == "Y" | ||
utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) | ||
utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) | ||
utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) | ||
utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise | ||
utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise | ||
wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') | ||
wav_fi.write(utt2wav) | ||
utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') | ||
utt2spk_fi.write(utt2spk) | ||
|
||
|
||
if __name__=="__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/bash | ||
# Copyright 2015 David Snyder | ||
# Apache 2.0. | ||
# | ||
# This script, called by ../run.sh, creates the MUSAN | ||
# data directory. The required dataset is freely available at | ||
# http://www.openslr.org/17/ | ||
|
||
set -e | ||
in_dir=$1 | ||
data_dir=$2 | ||
use_vocals='Y' | ||
|
||
mkdir -p local/musan.tmp | ||
|
||
echo "Preparing ${data_dir}/musan..." | ||
mkdir -p ${data_dir}/musan | ||
local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} | ||
|
||
utils/fix_data_dir.sh ${data_dir}/musan | ||
|
||
grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music | ||
grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech | ||
grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise | ||
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ | ||
${data_dir}/musan ${data_dir}/musan_music | ||
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ | ||
${data_dir}/musan ${data_dir}/musan_speech | ||
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ | ||
${data_dir}/musan ${data_dir}/musan_noise | ||
|
||
utils/fix_data_dir.sh ${data_dir}/musan_music | ||
utils/fix_data_dir.sh ${data_dir}/musan_speech | ||
utils/fix_data_dir.sh ${data_dir}/musan_noise | ||
|
||
rm -rf local/musan.tmp | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters