Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export refactor changes #335

Merged
merged 8 commits into from
Oct 1, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added more test coverage
  • Loading branch information
mmcauliffe committed Oct 1, 2021
commit 62005f81961f53ed95cb2b249d37488d4f2e02fc
3 changes: 2 additions & 1 deletion montreal_forced_aligner/command_line/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def transcribe_corpus(args, unknown_args):
else:
transcribe_config = load_basic_transcribe()
transcribe_config.use_mp = not args.disable_mp
transcribe_config.overwrite = args.overwrite
if unknown_args:
transcribe_config.update_from_args(unknown_args)
data_directory = os.path.join(temp_dir, corpus_name)
Expand Down Expand Up @@ -155,7 +156,7 @@ def transcribe_corpus(args, unknown_args):
handler.close()
logger.removeHandler(handler)
if os.path.exists(data_directory):
with open(conf_path, 'w') as f:
with open(conf_path, 'w', encoding='utf8') as f:
yaml.dump(conf, f)


Expand Down
1 change: 1 addition & 0 deletions montreal_forced_aligner/config/transcribe_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self, feature_config):
self.compound_markers = DEFAULT_COMPOUND_MARKERS
self.strip_diacritics = DEFAULT_STRIP_DIACRITICS
self.digraphs = DEFAULT_DIGRAPHS
self.overwrite = False

def params(self):
return {
Expand Down
4 changes: 1 addition & 3 deletions montreal_forced_aligner/corpus/align_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,10 @@ def _load_from_temp(self):
self.file_utt_mapping[file] = [utts]
self.text_mapping = load_scp(text_path)
for utt, text in self.text_mapping.items():
if not isinstance(text, list):
text = [text]
text = text.split()
for w in text:
new_w = re.split(r"[-']", w)
self.word_counts.update(new_w + [w])
self.text_mapping[utt] = ' '.join(text)
self.utt_wav_mapping = load_scp(wav_path)
self.sox_strings = load_scp(sox_strings_path)
self.wav_info = load_scp(wav_info_path, float)
Expand Down
15 changes: 12 additions & 3 deletions montreal_forced_aligner/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,22 @@ def make_safe(element):
return ' '.join(map(make_safe, element))
return str(element)

def make_scp_safe(string):

return string.replace(' ', '_MFASPACE_')

def load_scp_safe(string):
return string.replace('_MFASPACE_', ' ')

def output_mapping(mapping, path):
with open(path, 'w', encoding='utf8') as f:
for k in sorted(mapping.keys()):
v = mapping[k]
if isinstance(v, (list, set, tuple)):
v = ' '.join(map(str, v))
f.write('{} {}\n'.format(k, v))
else:
v = make_scp_safe(v)
f.write(f'{make_scp_safe(k)} {v}\n')


def save_scp(scp, path, sort=True, multiline=False):
Expand Down Expand Up @@ -100,9 +108,11 @@ def load_scp(path, data_type=str):
if line == '':
continue
line_list = line.split()
key = line_list.pop(0)
key = load_scp_safe(line_list.pop(0))
if len(line_list) == 1:
value = data_type(line_list[0])
if isinstance(value, str):
value = load_scp_safe(value)
else:
value = [ data_type(x) for x in line_list if x not in ['[', ']']]
scp[key] = value
Expand Down Expand Up @@ -178,7 +188,6 @@ def setup_logger(identifier, output_directory, console_level='info'):
log_path = os.path.join(output_directory, identifier + '.log')
if os.path.exists(log_path):
os.remove(log_path)
print(log_path)
logger = logging.getLogger(identifier)
logger.setLevel(logging.DEBUG)

Expand Down
18 changes: 8 additions & 10 deletions montreal_forced_aligner/multiprocessing/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def final_fmllr_est_func(model_directory, split_directory, sil_phones, job_name,
stderr=log_file, stdout=subprocess.PIPE)
latt_post_proc = subprocess.Popen([thirdparty_binary('lattice-to-post'),
'--acoustic-scale={}'.format(config.acoustic_scale),
'ark:' + lat_path, 'ark:-'],
'ark:-', 'ark:-'],
stdin=determinize_proc.stdout, stdout=subprocess.PIPE, stderr=log_file)
weight_silence_proc = subprocess.Popen([thirdparty_binary('weight-silence-post'),
str(config.silence_weight),
Expand All @@ -471,14 +471,13 @@ def final_fmllr_est_func(model_directory, split_directory, sil_phones, job_name,
fmllr_proc = subprocess.Popen([thirdparty_binary('gmm-est-fmllr'),
'--fmllr-update-type={}'.format(config.fmllr_update_type),
'--spk2utt=ark:' + spk2utt_path, mdl, feat_string,
'ark,s,cs:-', 'ark:' + trans_tmp_path],
'ark,s,cs:-', 'ark:-'],
stdin=weight_silence_proc.stdout, stdout=subprocess.PIPE, stderr=log_file)
fmllr_proc.communicate()

compose_proc = subprocess.Popen([thirdparty_binary('compose-transforms'),
'--b-is-affine=true', 'ark:' + trans_tmp_path,
'--b-is-affine=true', 'ark:-',
'ark:' + pre_trans_path, 'ark:' + trans_path],
stderr=log_file)
stderr=log_file, stdin=fmllr_proc.stdout)
compose_proc.communicate()
else:
for name in dictionary_names:
Expand Down Expand Up @@ -510,7 +509,7 @@ def final_fmllr_est_func(model_directory, split_directory, sil_phones, job_name,
stderr=log_file, stdout=subprocess.PIPE)
latt_post_proc = subprocess.Popen([thirdparty_binary('lattice-to-post'),
'--acoustic-scale={}'.format(config.acoustic_scale),
'ark:' + lat_path, 'ark:-'],
'ark:-', 'ark:-'],
stdin=determinize_proc.stdout, stdout=subprocess.PIPE,
stderr=log_file)
weight_silence_proc = subprocess.Popen([thirdparty_binary('weight-silence-post'),
Expand All @@ -521,14 +520,13 @@ def final_fmllr_est_func(model_directory, split_directory, sil_phones, job_name,
fmllr_proc = subprocess.Popen([thirdparty_binary('gmm-est-fmllr'),
'--fmllr-update-type={}'.format(config.fmllr_update_type),
'--spk2utt=ark:' + spk2utt_path, mdl, dictionary_feat_string,
'ark,s,cs:-', 'ark:' + trans_tmp_path],
'ark,s,cs:-', 'ark:-'],
stdin=weight_silence_proc.stdout, stdout=subprocess.PIPE, stderr=log_file)
fmllr_proc.communicate()

compose_proc = subprocess.Popen([thirdparty_binary('compose-transforms'),
'--b-is-affine=true', 'ark:' + trans_tmp_path,
'--b-is-affine=true', 'ark:-',
'ark:' + pre_trans_path, 'ark:' + trans_path],
stderr=log_file)
stderr=log_file, stdin=fmllr_proc.stdout)
compose_proc.communicate()


Expand Down
11 changes: 9 additions & 2 deletions montreal_forced_aligner/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ def ctms_to_textgrids_non_mp(align_config, output_directory, model_directory, di
words = dictionary.words
speaker_mapping = None

backup_output_directory = None
if not align_config.overwrite:
backup_output_directory = os.path.join(model_directory, 'textgrids')
os.makedirs(backup_output_directory, exist_ok=True)

def process_current_word_labels(utterance_id):
if utterance_id in corpus.segments:
seg = corpus.segments[utterance_id]
Expand Down Expand Up @@ -223,8 +228,8 @@ def process_current_phone_labels(utterance_id):
phone_data[file_name][speaker].extend(actual_labels)

export_errors = {}
wav_durations = corpus.file_durations
for i in range(num_jobs):
wav_durations = {}
word_data = {}
phone_data = {}
corpus.logger.debug(f'Parsing ctms for job {i}...')
Expand Down Expand Up @@ -313,9 +318,11 @@ def process_current_phone_labels(utterance_id):
ctm_to_textgrid(file_name, word_ctm, phone_ctm, output_directory, dictionary.silences, wav_durations, dictionary.multilingual_ipa,
frame_shift, words_mapping, speaker_mapping,
dictionary.punctuation, dictionary.clitic_set, dictionary.clitic_markers, dictionary.compound_markers, dictionary.oov_code, words,
dictionary.strip_diacritics, corpus.file_directory_mapping, corpus.file_name_mapping, corpus.speaker_ordering, overwrite)
dictionary.strip_diacritics, corpus.file_directory_mapping, corpus.file_name_mapping, corpus.speaker_ordering, overwrite, backup_output_directory)
processed_files.add(file_name)
except Exception as e:
if align_config.debug:
raise
exc_type, exc_value, exc_traceback = sys.exc_info()
export_errors[file_name] = '\n'.join(
traceback.format_exception(exc_type, exc_value, exc_traceback))
Expand Down
42 changes: 21 additions & 21 deletions montreal_forced_aligner/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .helper import thirdparty_binary
from .multiprocessing import transcribe, transcribe_fmllr
from .corpus import AlignableCorpus
from .textgrid import construct_output_path
from .dictionary import MultispeakerDictionary
from .helper import score, log_kaldi_errors, parse_logs
from .exceptions import KaldiProcessingError
Expand Down Expand Up @@ -467,7 +468,7 @@ def _load_transcripts(self, input_directory=None):
lookup = self.dictionary.reversed_word_mapping
if input_directory is None:
input_directory = self.transcribe_directory
if self.transcribe_config.fmllr and not self.transcribe_config.no_speakers:
if self.acoustic_model.feature_config.fmllr and self.transcribe_config.fmllr and not self.transcribe_config.no_speakers:
input_directory = os.path.join(input_directory, 'fmllr')
for j in range(self.corpus.num_jobs):
tra_path = os.path.join(input_directory, 'tra.{}'.format(j))
Expand Down Expand Up @@ -504,40 +505,39 @@ def _load_transcripts(self, input_directory=None):
return transcripts

def export_transcriptions(self, output_directory, source=None):
backup_output_directory = None
if not self.transcribe_config.overwrite:
backup_output_directory = os.path.join(self.transcribe_directory, 'transcriptions')
os.makedirs(backup_output_directory, exist_ok=True)
transcripts = self._load_transcripts(source)
wav_durations = self.corpus.file_durations
if not self.corpus.segments:
for utt, t in transcripts.items():
relative = self.corpus.file_directory_mapping[utt]
if relative:
speaker_directory = os.path.join(output_directory, relative)
else:
speaker_directory = output_directory
os.makedirs(speaker_directory, exist_ok=True)
outpath = os.path.join(speaker_directory, utt + '.lab')
with open(outpath, 'w', encoding='utf8') as f:
speaker = self.corpus.utt_speak_mapping[utt]
output_name, output_path = construct_output_path(utt, output_directory, self.corpus.file_directory_mapping,
self.corpus.file_name_mapping,
speaker, backup_output_directory)
output_path = output_path.replace('.TextGrid', '.lab')
with open(output_path, 'w', encoding='utf8') as f:
f.write(t)

else:

for filename in self.corpus.file_directory_mapping.keys():
maxtime = self.corpus.get_wav_duration(filename)
speaker_directory = output_directory
try:
if self.corpus.file_directory_mapping[filename]:
speaker_directory = os.path.join(output_directory, self.corpus.file_directory_mapping[filename])
except KeyError:
pass
os.makedirs(speaker_directory, exist_ok=True)
output_name, output_path = construct_output_path(filename, output_directory, self.corpus.file_directory_mapping,
self.corpus.file_name_mapping,
backup_output_directory=backup_output_directory)
max_time = round(wav_durations[output_name], 4)
tiers = {}
if self.transcribe_config.no_speakers:
speaker = 'speech'
tiers[speaker] = textgrid.IntervalTier(speaker, [], minT=0, maxT=maxtime)
tiers[speaker] = textgrid.IntervalTier(speaker, [], minT=0, maxT=max_time)
else:
for speaker in self.corpus.speaker_ordering[filename]:
tiers[speaker] = textgrid.IntervalTier(speaker, [], minT=0, maxT=maxtime)
tiers[speaker] = textgrid.IntervalTier(speaker, [], minT=0, maxT=max_time)

tg = textgrid.Textgrid()
tg.maxTimestamp = maxtime
tg.maxTimestamp = max_time
for utt_name, text in transcripts.items():
seg = self.corpus.segments[utt_name]
utt_filename, begin, end = seg['file_name'], seg['begin'], seg['end']
Expand All @@ -552,5 +552,5 @@ def export_transcriptions(self, output_directory, source=None):
tiers[speaker].entryList.append(Interval(start=begin, end=end, label=text))
for t in tiers.values():
tg.addTier(t)
tg.save(os.path.join(speaker_directory, filename + '.TextGrid'),
tg.save(output_path,
includeBlankSpaces=True, format='long_textgrid')
7 changes: 5 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,10 @@ def basic_split_dir(corpus_root_dir, wav_dir, lab_dir, textgrid_dir):
def multilingual_ipa_corpus_dir(corpus_root_dir, wav_dir, lab_dir):
path = os.path.join(corpus_root_dir, 'multilingual')
os.makedirs(path, exist_ok=True)
names = [('speaker', ['multilingual_ipa']), ('speaker_two', ['multilingual_ipa_us']) ]
names = [('speaker', ['multilingual_ipa','multilingual_ipa_2','multilingual_ipa_3',
'multilingual_ipa_4','multilingual_ipa_5',]),
('speaker_two', ['multilingual_ipa_us','multilingual_ipa_us_2','multilingual_ipa_us_3',
'multilingual_ipa_us_4','multilingual_ipa_us_5']) ]
for s, files in names:
s_dir = os.path.join(path, s)
os.makedirs(s_dir, exist_ok=True)
Expand All @@ -205,7 +208,7 @@ def multilingual_ipa_corpus_dir(corpus_root_dir, wav_dir, lab_dir):

@pytest.fixture(scope='session')
def multilingual_ipa_tg_corpus_dir(corpus_root_dir, wav_dir, textgrid_dir):
path = os.path.join(corpus_root_dir, 'multilingual')
path = os.path.join(corpus_root_dir, 'multilingual_tg')
os.makedirs(path, exist_ok=True)
names = [('speaker', ['multilingual_ipa']), ('speaker_two', ['multilingual_ipa_us']) ]
for s, files in names:
Expand Down
2 changes: 1 addition & 1 deletion tests/data/configs/transcribe.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use_mp: false
fmllr: false
fmllr: true


1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
welcome to a series of platchat videos where we're gonna tackle every single team in the overwatch league twenty twenty
1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
and run you through
1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
kinda our fears and also predictions for them
1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_5.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
i'm sideshow joined by custa and reinforce we've got a special edition of platchat
1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_us_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hey josh could have finished it he just decided to fail it instead
1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_us_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
really good performances against top teams that have ended up going their way
1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_us_4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
uh i i still think it's a very good team though in na i think this is uh
1 change: 1 addition & 0 deletions tests/data/lab/multilingual_ipa_us_5.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
uh and this was the first time i think the justice really looked like an elite team
31 changes: 31 additions & 0 deletions tests/data/textgrid/multilingual_ipa_2.TextGrid
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
File type = "ooTextFile"
Object class = "TextGrid"

0
6.2271
<exists>
3
"IntervalTier"
"custa"
0
6.2271
1
0
6.2271
""
"IntervalTier"
"reinforce"
0
6.2271
1
0
6.2271
""
"IntervalTier"
"sideshow"
0
6.2271
1
0
6.2271
"welcome to a series of platchat videos where we're gonna tackle every single team in the overwatch league twenty twenty"
31 changes: 31 additions & 0 deletions tests/data/textgrid/multilingual_ipa_3.TextGrid
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
File type = "ooTextFile"
Object class = "TextGrid"

0
1.3062999999999994
<exists>
3
"IntervalTier"
"custa"
0
1.3062999999999994
1
0
1.3062999999999994
""
"IntervalTier"
"reinforce"
0
1.3062999999999994
1
0
1.3062999999999994
""
"IntervalTier"
"sideshow"
0
1.3062999999999994
1
0
1.3062999999999994
"and run you through"
Loading