From c68ac0a7fd5e1965e839193f71855a7140258e7f Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Fri, 13 Aug 2021 16:13:11 -0700 Subject: [PATCH] Clean up and improve testing --- montreal_forced_aligner/command_line/adapt.py | 5 +- montreal_forced_aligner/command_line/align.py | 5 +- montreal_forced_aligner/command_line/mfa.py | 6 + .../command_line/train_and_align.py | 5 +- .../command_line/transcribe.py | 4 +- .../features/processing.py | 199 +----------------- .../multiprocessing/alignment.py | 183 +++++----------- .../multiprocessing/transcription.py | 23 +- montreal_forced_aligner/transcriber.py | 2 +- tests/conftest.py | 18 ++ tests/test_commandline_align.py | 10 +- tests/test_commandline_lm.py | 11 + tests/test_commandline_transcribe.py | 16 +- 13 files changed, 133 insertions(+), 354 deletions(-) diff --git a/montreal_forced_aligner/command_line/adapt.py b/montreal_forced_aligner/command_line/adapt.py index 7148e4da..80c3d2d6 100644 --- a/montreal_forced_aligner/command_line/adapt.py +++ b/montreal_forced_aligner/command_line/adapt.py @@ -83,12 +83,15 @@ def adapt_model(args, unknown_args=None): training_config = acoustic_model.adaptation_config() logger.debug('ADAPT TRAINING CONFIG:') log_config(logger, training_config) + audio_dir = None + if args.audio_directory: + audio_dir = args.audio_directory try: corpus = AlignableCorpus(args.corpus_directory, data_directory, speaker_characters=args.speaker_characters, num_jobs=args.num_jobs, sample_rate=align_config.feature_config.sample_frequency, logger=logger, use_mp=align_config.use_mp, punctuation=align_config.punctuation, - clitic_markers=align_config.clitic_markers) + clitic_markers=align_config.clitic_markers, audio_directory=audio_dir) if corpus.issues_check: logger.warning('Some issues parsing the corpus were detected. ' 'Please run the validator to get more information.') diff --git a/montreal_forced_aligner/command_line/align.py b/montreal_forced_aligner/command_line/align.py index fdc72ee8..546828df 100644 --- a/montreal_forced_aligner/command_line/align.py +++ b/montreal_forced_aligner/command_line/align.py @@ -81,12 +81,15 @@ def align_corpus(args, unknown_args=None): os.makedirs(args.output_directory, exist_ok=True) acoustic_model = AcousticModel(args.acoustic_model_path, root_directory=model_directory) acoustic_model.log_details(logger) + audio_dir = None + if args.audio_directory: + audio_dir = args.audio_directory try: corpus = AlignableCorpus(args.corpus_directory, data_directory, speaker_characters=args.speaker_characters, num_jobs=args.num_jobs, sample_rate=align_config.feature_config.sample_frequency, logger=logger, use_mp=align_config.use_mp, punctuation=align_config.punctuation, - clitic_markers=align_config.clitic_markers) + clitic_markers=align_config.clitic_markers, audio_directory=audio_dir) if corpus.issues_check: logger.warning('Some issues parsing the corpus were detected. ' 'Please run the validator to get more information.') diff --git a/montreal_forced_aligner/command_line/mfa.py b/montreal_forced_aligner/command_line/mfa.py index 7ec4c199..ae71164e 100644 --- a/montreal_forced_aligner/command_line/mfa.py +++ b/montreal_forced_aligner/command_line/mfa.py @@ -73,6 +73,8 @@ def unfix_path(): 'default is to use directory names') align_parser.add_argument('-t', '--temp_directory', type=str, default='', help='Temporary directory root to use for aligning, default is ~/Documents/MFA') +align_parser.add_argument('-a', '--audio_directory', type=str, default='', + help='Audio directory root to use for finding audio files') align_parser.add_argument('-j', '--num_jobs', type=int, default=3, help='Number of cores to use while aligning') align_parser.add_argument('-v', '--verbose', help="Print more information during alignment", action='store_true') @@ -94,6 +96,8 @@ def unfix_path(): 'default is to use directory names') adapt_parser.add_argument('-t', '--temp_directory', type=str, default='', help='Temporary directory root to use for aligning, default is ~/Documents/MFA') +adapt_parser.add_argument('-a', '--audio_directory', type=str, default='', + help='Audio directory root to use for finding audio files') adapt_parser.add_argument('-j', '--num_jobs', type=int, default=3, help='Number of cores to use while aligning') adapt_parser.add_argument('-v', '--verbose', help="Print more information during alignment", action='store_true') @@ -115,6 +119,8 @@ def unfix_path(): 'default is to use directory names') train_parser.add_argument('-t', '--temp_directory', type=str, default='', help='Temporary directory root to use for aligning, default is ~/Documents/MFA') +train_parser.add_argument('-a', '--audio_directory', type=str, default='', + help='Audio directory root to use for finding audio files') train_parser.add_argument('-j', '--num_jobs', type=int, default=3, help='Number of cores to use while aligning') train_parser.add_argument('-v', '--verbose', help="Output debug messages about alignment", action='store_true') diff --git a/montreal_forced_aligner/command_line/train_and_align.py b/montreal_forced_aligner/command_line/train_and_align.py index 208e529c..cf4a098c 100644 --- a/montreal_forced_aligner/command_line/train_and_align.py +++ b/montreal_forced_aligner/command_line/train_and_align.py @@ -78,12 +78,15 @@ def align_corpus(args, unknown_args=None): os.makedirs(data_directory, exist_ok=True) model_directory = os.path.join(data_directory, 'acoustic_models') os.makedirs(args.output_directory, exist_ok=True) + audio_dir = None + if args.audio_directory: + audio_dir = args.audio_directory try: corpus = AlignableCorpus(args.corpus_directory, data_directory, speaker_characters=args.speaker_characters, num_jobs=getattr(args, 'num_jobs', 3), sample_rate=align_config.feature_config.sample_frequency, debug=getattr(args, 'debug', False), logger=logger, use_mp=align_config.use_mp, - punctuation=align_config.punctuation, clitic_markers=align_config.clitic_markers) + punctuation=align_config.punctuation, clitic_markers=align_config.clitic_markers, audio_directory=audio_dir) if corpus.issues_check: logger.warning('Some issues parsing the corpus were detected. ' 'Please run the validator to get more information.') diff --git a/montreal_forced_aligner/command_line/transcribe.py b/montreal_forced_aligner/command_line/transcribe.py index d09e210e..37324501 100644 --- a/montreal_forced_aligner/command_line/transcribe.py +++ b/montreal_forced_aligner/command_line/transcribe.py @@ -109,13 +109,13 @@ def transcribe_corpus(args, unknown_args): punctuation=transcribe_config.punctuation, clitic_markers=transcribe_config.clitic_markers, compound_markers=transcribe_config.compound_markers, - multilingual_ipa=transcribe_config.multilingual_ipa) + multilingual_ipa=acoustic_model.meta['multilingual_ipa']) else: dictionary = Dictionary(args.dictionary_path, data_directory, logger=logger, punctuation=transcribe_config.punctuation, clitic_markers=transcribe_config.clitic_markers, compound_markers=transcribe_config.compound_markers, - multilingual_ipa=transcribe_config.multilingual_ipa) + multilingual_ipa=acoustic_model.meta['multilingual_ipa']) acoustic_model.validate(dictionary) begin = time.time() t = Transcriber(corpus, dictionary, acoustic_model, language_model, transcribe_config, diff --git a/montreal_forced_aligner/features/processing.py b/montreal_forced_aligner/features/processing.py index 83938ffb..6e9d0a1e 100644 --- a/montreal_forced_aligner/features/processing.py +++ b/montreal_forced_aligner/features/processing.py @@ -39,8 +39,8 @@ def mfcc_func(directory, job_name, mfcc_options): copy_proc.communicate() utt_lengths_proc = subprocess.Popen([thirdparty_binary('feat-to-len'), - 'scp:' + raw_scp_path, 'ark,t:'+ lengths_path], - stderr=log_file) + 'scp:' + raw_scp_path, 'ark,t:' + lengths_path], + stderr=log_file) utt_lengths_proc.communicate() @@ -127,198 +127,3 @@ def compute_vad(directory, num_jobs, use_mp, vad_config=None): run_mp(compute_vad_func, jobs, log_directory) else: run_non_mp(compute_vad_func, jobs, log_directory) - - -def apply_cmvn_func(directory, job_name, config): - normed_scp_path = os.path.join(directory, config.raw_feature_id + '.{}.scp'.format(job_name)) - normed_ark_path = os.path.join(directory, config.raw_feature_id + '.{}.ark'.format(job_name)) - with open(os.path.join(directory, 'log', 'norm.{}.log'.format(job_name)), 'w') as log_file: - utt2spk_path = os.path.join(directory, 'utt2spk.{}'.format(job_name)) - cmvn_path = os.path.join(directory, 'cmvn.{}.scp'.format(job_name)) - feats_path = os.path.join(directory, 'feats.{}.scp'.format(job_name)) - if not os.path.exists(normed_scp_path): - cmvn_proc = subprocess.Popen([thirdparty_binary('apply-cmvn'), - '--utt2spk=ark:' + utt2spk_path, - 'scp:' + cmvn_path, - 'scp:' + feats_path, - 'ark,scp:{},{}'.format(normed_ark_path, normed_scp_path)], - stderr=log_file - ) - cmvn_proc.communicate() - - -def apply_cmvn(directory, num_jobs, config): - log_directory = os.path.join(directory, 'log') - os.makedirs(log_directory, exist_ok=True) - jobs = [(directory, x, config) - for x in range(num_jobs)] - if config.use_mp: - run_mp(apply_cmvn_func, jobs, log_directory) - else: - run_non_mp(apply_cmvn_func, jobs, log_directory) - - -def select_voiced_func(directory, job_name, apply_cmn): - feats_path = os.path.join(directory, 'feats.{}.scp'.format(job_name)) - vad_scp_path = os.path.join(directory, 'vad.{}.scp'.format(job_name)) - voiced_scp_path = os.path.join(directory, 'feats_voiced.{}.scp'.format(job_name)) - voiced_ark_path = os.path.join(directory, 'feats_voiced.{}.ark'.format(job_name)) - with open(os.path.join(directory, 'log', 'select-voiced.{}.log'.format(job_name)), 'w') as log_file: - deltas_proc = subprocess.Popen([thirdparty_binary('add-deltas'), - 'scp:' + feats_path, - 'ark:-' - ], stdout=subprocess.PIPE, stderr=log_file) - if apply_cmn: - cmvn_proc = subprocess.Popen([thirdparty_binary('apply-cmvn-sliding'), - '--norm-vars=false', - '--center=true', - '--cmn-window=300', - 'ark:-', 'ark:-'], - stdin=deltas_proc.stdout, stdout=subprocess.PIPE, stderr=log_file) - select_proc = subprocess.Popen([thirdparty_binary('select-voiced-frames'), - 'ark:-', - 'scp,s,cs:' + vad_scp_path, - 'ark,scp:{},{}'.format(voiced_ark_path, voiced_scp_path)], - stdin=cmvn_proc.stdout, stderr=log_file) - else: - select_proc = subprocess.Popen([thirdparty_binary('select-voiced-frames'), - 'ark:-', - 'scp,s,cs:' + vad_scp_path, - 'ark,scp:{},{}'.format(voiced_ark_path, voiced_scp_path)], - stdin=deltas_proc.stdout, stderr=log_file) - select_proc.communicate() - - -def select_voiced(directory, num_jobs, config, apply_cmn=False): - log_directory = os.path.join(directory, 'log') - os.makedirs(log_directory, exist_ok=True) - jobs = [(directory, x, apply_cmn) - for x in range(num_jobs)] - if config.use_mp: - run_mp(select_voiced_func, jobs, log_directory) - else: - run_non_mp(select_voiced_func, jobs, log_directory) - - -def compute_ivector_features_func(directory, job_name, apply_cmn): - feats_path = os.path.join(directory, 'feats.{}.scp'.format(job_name)) - out_feats_scp_path = os.path.join(directory, 'feats_for_ivector.{}.scp'.format(job_name)) - out_feats_ark_path = os.path.join(directory, 'feats_for_ivector.{}.ark'.format(job_name)) - - with open(os.path.join(directory, 'log', 'cmvn_sliding.{}.log'.format(job_name)), 'w') as log_file: - if apply_cmn: - deltas_proc = subprocess.Popen([thirdparty_binary('add-deltas'), - 'scp:' + feats_path, - 'ark:-' - ], stdout=subprocess.PIPE, stderr=log_file) - - cmvn_proc = subprocess.Popen([thirdparty_binary('apply-cmvn-sliding'), - '--norm-vars=false', - '--center=true', - '--cmn-window=300', - 'ark:-', 'ark,scp:{},{}'.format(out_feats_ark_path, out_feats_scp_path)], - stdin=deltas_proc.stdout, stdout=subprocess.PIPE, stderr=log_file) - cmvn_proc.communicate() - else: - deltas_proc = subprocess.Popen([thirdparty_binary('add-deltas'), - 'scp:' + feats_path, - 'ark,scp:{},{}'.format(out_feats_ark_path, out_feats_scp_path) - ], stderr=log_file) - deltas_proc.communicate() - - -def compute_ivector_features(directory, num_jobs, config, apply_cmn=False): - log_directory = os.path.join(directory, 'log') - os.makedirs(log_directory, exist_ok=True) - jobs = [(directory, x, apply_cmn) - for x in range(num_jobs)] - if config.use_mp: - run_mp(compute_ivector_features_func, jobs, log_directory) - else: - run_non_mp(compute_ivector_features_func, jobs, log_directory) - - -def generate_spliced_features_func(directory, raw_feature_id, config, job_name): - normed_scp_path = os.path.join(directory, raw_feature_id + '.{}.scp'.format(job_name)) - spliced_feature_id = raw_feature_id + '_spliced' - ark_path = os.path.join(directory, spliced_feature_id + '.{}.ark'.format(job_name)) - scp_path = os.path.join(directory, spliced_feature_id + '.{}.scp'.format(job_name)) - log_path = os.path.join(directory, 'log', 'lda.{}.log'.format(job_name)) - with open(log_path, 'a') as log_file: - splice_feats_proc = subprocess.Popen([thirdparty_binary('splice-feats'), - '--left-context={}'.format(config['splice_left_context']), - '--right-context={}'.format(config['splice_right_context']), - 'scp:' + normed_scp_path, - 'ark,scp:{},{}'.format(ark_path, scp_path)], - stderr=log_file) - splice_feats_proc.communicate() - - -def generate_spliced_features(directory, num_jobs, config): - log_directory = os.path.join(directory, 'log') - os.makedirs(log_directory, exist_ok=True) - jobs = [(directory, config.raw_feature_id, config.splice_options, x) - for x in range(num_jobs)] - if config.use_mp: - run_mp(generate_spliced_features_func, jobs, log_directory) - else: - run_non_mp(generate_spliced_features_func, jobs, log_directory) - - -def add_deltas_func(directory, job_name, config): - normed_scp_path = os.path.join(directory, config.raw_feature_id + '.{}.scp'.format(job_name)) - ark_path = os.path.join(directory, config.feature_id + '.{}.ark'.format(job_name)) - scp_path = os.path.join(directory, config.feature_id + '.{}.scp'.format(job_name)) - with open(os.path.join(directory, 'log', 'add_deltas.{}.log'.format(job_name)), 'w') as log_file: - if config.fmllr_path is not None and os.path.exists(config.fmllr_path): - deltas_proc = subprocess.Popen([thirdparty_binary('add-deltas'), - 'scp:' + normed_scp_path, 'ark:-'], - stderr=log_file, - stdout=subprocess.PIPE) - trans_proc = subprocess.Popen([thirdparty_binary('transform-feats'), - 'ark:' + config.fmllr_path, 'ark:-', - 'ark,scp:{},{}'.format(ark_path, scp_path)], - stdin=deltas_proc.stdout, - stderr=log_file) - trans_proc.communicate() - else: - deltas_proc = subprocess.Popen([thirdparty_binary('add-deltas'), - 'scp:' + normed_scp_path, 'ark,scp:{},{}'.format(ark_path, scp_path)], - stderr=log_file) - deltas_proc.communicate() - - -def add_deltas(directory, num_jobs, config): - log_directory = os.path.join(directory, 'log') - os.makedirs(log_directory, exist_ok=True) - jobs = [(directory, x, config) - for x in range(num_jobs)] - if config.use_mp: - run_mp(add_deltas_func, jobs, log_directory) - else: - run_non_mp(add_deltas_func, jobs, log_directory) - - -def apply_lda_func(directory, spliced_feature_id, feature_id, lda_path, job_name): - normed_scp_path = os.path.join(directory, spliced_feature_id + '.{}.scp'.format(job_name)) - ark_path = os.path.join(directory, feature_id + '.{}.ark'.format(job_name)) - scp_path = os.path.join(directory, feature_id + '.{}.scp'.format(job_name)) - log_path = os.path.join(directory, 'log', 'lda.{}.log'.format(job_name)) - with open(log_path, 'a') as log_file: - transform_feats_proc = subprocess.Popen([thirdparty_binary("transform-feats"), - lda_path, - 'scp:'+ normed_scp_path, - 'ark,scp:{},{}'.format(ark_path, scp_path)], - stderr=log_file) - transform_feats_proc.communicate() - - -def apply_lda(directory, num_jobs, config): - log_directory = os.path.join(directory, 'log') - os.makedirs(log_directory, exist_ok=True) - jobs = [(directory, config.spliced_feature_id, config.feature_id, config.lda_path, x) - for x in range(num_jobs)] - if config.use_mp and False: # Looks to be threaded - run_mp(apply_lda_func, jobs, log_directory) - else: - run_non_mp(apply_lda_func, jobs, log_directory) diff --git a/montreal_forced_aligner/multiprocessing/alignment.py b/montreal_forced_aligner/multiprocessing/alignment.py index 39a5df0f..35273a79 100644 --- a/montreal_forced_aligner/multiprocessing/alignment.py +++ b/montreal_forced_aligner/multiprocessing/alignment.py @@ -1,6 +1,5 @@ import subprocess import os -import shutil import re from decimal import Decimal import statistics @@ -12,28 +11,6 @@ from ..exceptions import AlignmentError -def parse_transitions(path, phones_path): - state_extract_pattern = re.compile(r'Transition-state (\d+): phone = (\w+)') - id_extract_pattern = re.compile(r'Transition-id = (\d+)') - cur_phone = None - current = 0 - with open(path, encoding='utf8') as f, open(phones_path, 'w', encoding='utf8') as outf: - outf.write('{} {}\n'.format('', 0)) - for line in f: - line = line.strip() - if line.startswith('Transition-state'): - m = state_extract_pattern.match(line) - _, phone = m.groups() - if phone != cur_phone: - current = 0 - cur_phone = phone - else: - m = id_extract_pattern.match(line) - id = m.groups()[0] - outf.write('{}_{} {}\n'.format(phone, current, id)) - current += 1 - - def acc_stats_func(directory, iteration, job_name, feature_string): log_path = os.path.join(directory, 'log', 'acc.{}.{}.log'.format(iteration, job_name)) model_path = os.path.join(directory, '{}.mdl'.format(iteration)) @@ -69,7 +46,7 @@ def acc_stats(iteration, directory, split_directory, num_jobs, config): The number of processes to use in calculation """ jobs = [(directory, iteration, x, - config.feature_config.construct_feature_proc_string(split_directory, directory, x) + config.feature_config.construct_feature_proc_string(split_directory, directory, x) ) for x in range(num_jobs)] if config.use_mp: @@ -84,23 +61,9 @@ def compile_train_graphs_func(directory, lang_directory, split_directory, job_na if not os.path.exists(mdl_path): mdl_path = os.path.join(directory, 'final.mdl') - log_path = os.path.join(directory, 'log', 'show_transition.log') - transition_path = os.path.join(directory, 'transitions.txt') - triphones_file_path = os.path.join(directory, 'triphones.txt') - if dictionary_names is None: - phones_file_path = os.path.join(lang_directory, 'phones.txt') - if debug: - with open(log_path, 'w', encoding='utf8') as log_file: - with open(transition_path, 'w', encoding='utf8') as f: - subprocess.call([thirdparty_binary('show-transitions'), phones_file_path, mdl_path], - stdout=f, stderr=log_file) - parse_transitions(transition_path, triphones_file_path) log_path = os.path.join(directory, 'log', 'compile-graphs.{}.log'.format(job_name)) - if os.path.exists(triphones_file_path): - phones_file_path = triphones_file_path - words_file_path = os.path.join(lang_directory, 'words.txt') fst_scp_path = os.path.join(directory, 'fsts.{}.scp'.format(job_name)) fst_ark_path = os.path.join(directory, 'fsts.{}.ark'.format(job_name)) text_path = os.path.join(split_directory, 'text.{}.int'.format(job_name)) @@ -111,23 +74,13 @@ def compile_train_graphs_func(directory, lang_directory, split_directory, job_na os.path.join(lang_directory, 'phones', 'disambig.int')), tree_path, mdl_path, os.path.join(lang_directory, 'L.fst'), - "ark:"+ text_path, "ark,scp:{},{}".format(fst_ark_path, fst_scp_path)], + "ark:" + text_path, "ark,scp:{},{}".format(fst_ark_path, fst_scp_path)], stderr=log_file) proc.communicate() else: for name in dictionary_names: - phones_file_path = os.path.join(lang_directory, 'phones.txt') - if debug: - with open(log_path, 'w', encoding='utf8') as log_file: - with open(transition_path, 'w', encoding='utf8') as f: - subprocess.call([thirdparty_binary('show-transitions'), phones_file_path, mdl_path], - stdout=f, stderr=log_file) - parse_transitions(transition_path, triphones_file_path) log_path = os.path.join(directory, 'log', 'compile-graphs.{}.{}.log'.format(job_name, name)) - if os.path.exists(triphones_file_path): - phones_file_path = triphones_file_path - words_file_path = os.path.join(lang_directory, 'words.txt') fst_scp_path = os.path.join(directory, 'fsts.{}.{}.scp'.format(job_name, name)) fst_ark_path = os.path.join(directory, 'fsts.{}.{}.ark'.format(job_name, name)) text_path = os.path.join(split_directory, 'text.{}.{}.int'.format(job_name, name)) @@ -138,7 +91,7 @@ def compile_train_graphs_func(directory, lang_directory, split_directory, job_na os.path.join(lang_directory, 'phones', 'disambig.int')), tree_path, mdl_path, os.path.join(lang_directory, name, 'dictionary', 'L.fst'), - "ark:"+text_path, "ark,scp:{},{}".format(fst_ark_path, fst_scp_path)], + "ark:" + text_path, "ark,scp:{},{}".format(fst_ark_path, fst_scp_path)], stderr=log_file) proc.communicate() @@ -153,45 +106,6 @@ def compile_train_graphs_func(directory, lang_directory, split_directory, job_na outf.write(line) - if debug: - utterances = [] - with open(os.path.join(split_directory, 'utt2spk.{}'.format(job_name)), 'r', encoding='utf8') as f: - for line in f: - utt = line.split()[0].strip() - if not utt: - continue - utterances.append(utt) - - with open(log_path, 'a', encoding='utf8') as log_file: - - temp_fst_path = os.path.join(directory, 'temp.fst.{}'.format(job_name)) - - with open(fst_scp_path, 'r', encoding='utf8') as f: - for line in f: - line = line.strip() - utt = line.split()[0] - - dot_path = os.path.join(directory, '{}.dot'.format(utt)) - fst_proc = subprocess.Popen([thirdparty_binary('fstcopy'), - 'scp:-', - 'scp:echo {} {}|'.format(utt, temp_fst_path)], - stdin=subprocess.PIPE, stderr=log_file) - fst_proc.communicate(input=line.encode()) - - draw_proc = subprocess.Popen([thirdparty_binary('fstdraw'), '--portrait=true', - '--isymbols={}'.format(phones_file_path), - '--osymbols={}'.format(words_file_path), temp_fst_path, - dot_path], - stderr=log_file) - draw_proc.communicate() - try: - dot_proc = subprocess.Popen([thirdparty_binary('dot'), '-Tpdf', '-O', dot_path], - stderr=log_file) - dot_proc.communicate() - except FileNotFoundError: - pass - - def compile_train_graphs(directory, lang_directory, split_directory, num_jobs, aligner, debug=False): """ Multiprocessing function that compiles training graphs for utterances @@ -282,33 +196,34 @@ def align_func(directory, iteration, job_name, mdl, config, feature_string, outp if debug: loglike_path = os.path.join(output_directory, 'ali.{}.loglikes'.format(job_name)) com = [thirdparty_binary('gmm-align-compiled'), - '--transition-scale={}'.format(config['transition_scale']), - '--acoustic-scale={}'.format(config['acoustic_scale']), - '--self-loop-scale={}'.format(config['self_loop_scale']), - '--beam={}'.format(config['beam']), - '--retry-beam={}'.format(config['retry_beam']), - '--careful=false', - '--write-per-frame-acoustic-loglikes=ark,t:{}'.format(loglike_path), - mdl, - "scp:" + fst_path, '{}'.format(feature_string), "ark,t:" + ali_path, - "ark,t:" + score_path] + '--transition-scale={}'.format(config['transition_scale']), + '--acoustic-scale={}'.format(config['acoustic_scale']), + '--self-loop-scale={}'.format(config['self_loop_scale']), + '--beam={}'.format(config['beam']), + '--retry-beam={}'.format(config['retry_beam']), + '--careful=false', + '--write-per-frame-acoustic-loglikes=ark,t:{}'.format(loglike_path), + mdl, + "scp:" + fst_path, '{}'.format(feature_string), "ark,t:" + ali_path, + "ark,t:" + score_path] else: com = [thirdparty_binary('gmm-align-compiled'), - '--transition-scale={}'.format(config['transition_scale']), - '--acoustic-scale={}'.format(config['acoustic_scale']), - '--self-loop-scale={}'.format(config['self_loop_scale']), - '--beam={}'.format(config['beam']), - '--retry-beam={}'.format(config['retry_beam']), - '--careful=false', - mdl, - "scp:" + fst_path, '{}'.format(feature_string), "ark,t:" + ali_path, - "ark,t:" + score_path] + '--transition-scale={}'.format(config['transition_scale']), + '--acoustic-scale={}'.format(config['acoustic_scale']), + '--self-loop-scale={}'.format(config['self_loop_scale']), + '--beam={}'.format(config['beam']), + '--retry-beam={}'.format(config['retry_beam']), + '--careful=false', + mdl, + "scp:" + fst_path, '{}'.format(feature_string), "ark,t:" + ali_path, + "ark,t:" + score_path] align_proc = subprocess.Popen(com, stderr=log_file) align_proc.communicate() -def align(iteration, directory, split_directory, optional_silence, num_jobs, config, output_directory=None, debug=False): +def align(iteration, directory, split_directory, optional_silence, num_jobs, config, output_directory=None, + debug=False): """ Multiprocessing function that aligns based on the current model @@ -344,7 +259,7 @@ def align(iteration, directory, split_directory, optional_silence, num_jobs, con jobs = [(directory, iteration, x, mdl, config.align_options, config.feature_config.construct_feature_proc_string(split_directory, directory, x), - output_directory) for x in range(num_jobs)] + output_directory) for x in range(num_jobs)] if config.use_mp: run_mp(align_func, jobs, log_directory) @@ -422,9 +337,9 @@ def compute_alignment_improvement_func(iteration, config, model_directory, job_n '', '', 'ark:-'], stdout=subprocess.PIPE, stderr=log_file) det_proc = subprocess.Popen([thirdparty_binary('lattice-determinize-pruned'), - 'ark:-', 'ark:-'], - stdin=lin_proc.stdout, stderr=log_file, - stdout=subprocess.PIPE) + 'ark:-', 'ark:-'], + stdin=lin_proc.stdout, stderr=log_file, + stdout=subprocess.PIPE) align_proc = subprocess.Popen([thirdparty_binary('lattice-align-words'), os.path.join(config.dictionary.phones_dir, 'word_boundary.int'), model_path, 'ark:-', 'ark:-'], @@ -588,9 +503,9 @@ def ali_to_textgrid_func(model_directory, word_path, split_directory, job_name, '', '', 'ark:-'], stdout=subprocess.PIPE, stderr=log_file) det_proc = subprocess.Popen([thirdparty_binary('lattice-determinize-pruned'), - 'ark:-', 'ark:-'], - stdin=lin_proc.stdout, stderr=log_file, - stdout=subprocess.PIPE) + 'ark:-', 'ark:-'], + stdin=lin_proc.stdout, stderr=log_file, + stdout=subprocess.PIPE) align_proc = subprocess.Popen([thirdparty_binary('lattice-align-words'), word_path, model_path, 'ark:-', 'ark,t:' + aligned_path], @@ -662,7 +577,7 @@ def convert_ali_to_textgrids(align_config, output_directory, model_directory, di else: run_non_mp(ali_to_textgrid_func, jobs, log_directory) - if not corpus.segments: # Hack for better memory management for .lab files + if not corpus.segments: # Hack for better memory management for .lab files for i in range(num_jobs): word_ctm = {} phone_ctm = {} @@ -804,7 +719,6 @@ def convert_alignments(directory, align_directory, num_jobs, config): def calc_fmllr_func(directory, split_directory, sil_phones, job_name, feature_string, config, initial, model_name='final'): - log_path = os.path.join(directory, 'log', 'fmllr.{}.{}.log'.format(model_name, job_name)) ali_path = os.path.join(directory, 'ali.{}'.format(job_name)) mdl_path = os.path.join(directory, '{}.mdl'.format(model_name)) @@ -892,10 +806,9 @@ def calc_fmllr(directory, split_directory, sil_phones, num_jobs, config, model_name = iteration log_directory = os.path.join(directory, 'log') - jobs = [(directory, split_directory, sil_phones, x, - config.feature_config.construct_feature_proc_string(split_directory, directory, x), - config, initial, model_name) for x in range(num_jobs)] + config.feature_config.construct_feature_proc_string(split_directory, directory, x), + config, initial, model_name) for x in range(num_jobs)] if config.use_fmllr_mp: run_mp(calc_fmllr_func, jobs, log_directory) else: @@ -960,10 +873,9 @@ def lda_acc_stats(directory, split_directory, align_directory, config, ci_phones """ jobs = [(directory, - config.feature_config.construct_feature_proc_string(split_directory, directory, x, splice=True), + config.feature_config.construct_feature_proc_string(split_directory, directory, x, splice=True), align_directory, config.lda_options, ci_phones, x) for x in range(num_jobs)] - if config.use_mp: run_mp(lda_acc_stats_func, jobs, config.log_directory) else: @@ -996,20 +908,20 @@ def calc_lda_mllt_func(directory, feature_string, sil_phones, job_name, config, # Estimating MLLT with open(log_path, 'a', encoding='utf8') as log_file: post_proc = subprocess.Popen([thirdparty_binary('ali-to-post'), - "ark:" + ali_path, 'ark:-'], + "ark:" + ali_path, 'ark:-'], stdout=subprocess.PIPE, stderr=log_file) weight_proc = subprocess.Popen([thirdparty_binary('weight-silence-post'), '0.0', - sil_phones, mdl_path, 'ark:-', - 'ark:-'], - stdin=post_proc.stdout, stdout=subprocess.PIPE, stderr=log_file) + sil_phones, mdl_path, 'ark:-', + 'ark:-'], + stdin=post_proc.stdout, stdout=subprocess.PIPE, stderr=log_file) acc_proc = subprocess.Popen([thirdparty_binary('gmm-acc-mllt'), - '--rand-prune=' + str(config['random_prune']), - mdl_path, - '{}'.format(feature_string), - 'ark:-', - os.path.join(directory, '{}.{}.macc'.format(model_name, job_name))], - stdin=weight_proc.stdout, stderr=log_file) + '--rand-prune=' + str(config['random_prune']), + mdl_path, + '{}'.format(feature_string), + 'ark:-', + os.path.join(directory, '{}.{}.macc'.format(model_name, job_name))], + stdin=weight_proc.stdout, stderr=log_file) acc_proc.communicate() @@ -1056,8 +968,8 @@ def calc_lda_mllt(directory, data_directory, sil_phones, num_jobs, config, else: model_name = iteration jobs = [(directory, - config.feature_config.construct_feature_proc_string(data_directory, directory, x), - sil_phones, x, config.lda_options, initial, model_name) for x in range(num_jobs)] + config.feature_config.construct_feature_proc_string(data_directory, directory, x), + sil_phones, x, config.lda_options, initial, model_name) for x in range(num_jobs)] if config.use_mp: run_mp(calc_lda_mllt_func, jobs, config.log_directory) @@ -1092,4 +1004,3 @@ def calc_lda_mllt(directory, data_directory, sil_phones, num_jobs, config, os.rename(composed_path, previous_mat_path) else: os.rename(new_mat_path, previous_mat_path) - diff --git a/montreal_forced_aligner/multiprocessing/transcription.py b/montreal_forced_aligner/multiprocessing/transcription.py index 54e832c2..06e69721 100644 --- a/montreal_forced_aligner/multiprocessing/transcription.py +++ b/montreal_forced_aligner/multiprocessing/transcription.py @@ -94,16 +94,16 @@ def decode_func(model_directory, job_name, config, feat_string, output_directory decode_proc.communicate() -def score_func(model_directory, job_name, config, output_directory, language_model_weight=None, +def score_func(model_directory, transcribe_directory, job_name, config, output_directory, language_model_weight=None, word_insertion_penalty=None, dictionary_names=None): if language_model_weight is None: language_model_weight = config.language_model_weight if word_insertion_penalty is None: word_insertion_penalty = config.word_insertion_penalty if dictionary_names is None: - lat_path = os.path.join(output_directory, 'lat.{}'.format(job_name)) - rescored_lat_path = os.path.join(output_directory, 'lat.{}.rescored'.format(job_name)) - carpa_rescored_lat_path = os.path.join(output_directory, 'lat.{}.carparescored'.format(job_name)) + lat_path = os.path.join(transcribe_directory, 'lat.{}'.format(job_name)) + rescored_lat_path = os.path.join(transcribe_directory, 'lat.{}.rescored'.format(job_name)) + carpa_rescored_lat_path = os.path.join(transcribe_directory, 'lat.{}.carparescored'.format(job_name)) if os.path.exists(carpa_rescored_lat_path): lat_path = carpa_rescored_lat_path elif os.path.exists(rescored_lat_path): @@ -126,9 +126,9 @@ def score_func(model_directory, job_name, config, output_directory, language_mod best_path_proc.communicate() else: for name in dictionary_names: - lat_path = os.path.join(output_directory, 'lat.{}.{}'.format(job_name, name)) - rescored_lat_path = os.path.join(output_directory, 'lat.{}.{}.rescored'.format(job_name, name)) - carpa_rescored_lat_path = os.path.join(output_directory, 'lat.{}.{}.carparescored'.format(job_name, name)) + lat_path = os.path.join(transcribe_directory, 'lat.{}.{}'.format(job_name, name)) + rescored_lat_path = os.path.join(transcribe_directory, 'lat.{}.{}.rescored'.format(job_name, name)) + carpa_rescored_lat_path = os.path.join(transcribe_directory, 'lat.{}.{}.carparescored'.format(job_name, name)) if os.path.exists(carpa_rescored_lat_path): lat_path = carpa_rescored_lat_path elif os.path.exists(rescored_lat_path): @@ -262,7 +262,8 @@ def transcribe(transcriber): out_dir = os.path.join(decode_directory, 'eval_{}_{}'.format(lmwt, wip)) log_dir = os.path.join(out_dir, 'log') os.makedirs(log_dir, exist_ok=True) - jobs = [(model_directory, x, config, out_dir, lmwt, wip, transcriber.dictionaries_for_job(x)) + + jobs = [(model_directory, decode_directory, x, config, out_dir, lmwt, wip, transcriber.dictionaries_for_job(x)) for x in range(num_jobs)] if config.use_mp: run_mp(score_func, jobs, log_dir) @@ -274,7 +275,7 @@ def transcribe(transcriber): transcriber.transcribe_config.language_model_weight = best[0] transcriber.transcribe_config.word_insertion_penalty = best[1] else: - jobs = [(model_directory, x, config, decode_directory, None, None, transcriber.dictionaries_for_job(x)) + jobs = [(model_directory, decode_directory, x, config, decode_directory, None, None, transcriber.dictionaries_for_job(x)) for x in range(num_jobs)] if config.use_mp: run_mp(score_func, jobs, log_directory) @@ -642,7 +643,7 @@ def transcribe_fmllr(transcriber): out_dir = os.path.join(fmllr_directory, 'eval_{}_{}'.format(lmwt, wip)) log_dir = os.path.join(out_dir, 'log') os.makedirs(log_dir, exist_ok=True) - jobs = [(model_directory, x, config, out_dir, lmwt, wip, transcriber.dictionaries_for_job(x)) + jobs = [(model_directory, fmllr_directory, x, config, out_dir, lmwt, wip, transcriber.dictionaries_for_job(x)) for x in range(num_jobs)] if config.use_mp: run_mp(score_func, jobs, log_dir) @@ -659,7 +660,7 @@ def transcribe_fmllr(transcriber): saved_tra_path = os.path.join(fmllr_directory, 'tra.{}'.format(j)) shutil.copyfile(tra_path, saved_tra_path) else: - jobs = [(model_directory, x, config, fmllr_directory, None, None, transcriber.dictionaries_for_job(x)) + jobs = [(model_directory, fmllr_directory, x, config, fmllr_directory, None, None, transcriber.dictionaries_for_job(x)) for x in range(num_jobs)] if config.use_mp: run_mp(score_func, jobs, log_dir) diff --git a/montreal_forced_aligner/transcriber.py b/montreal_forced_aligner/transcriber.py index ed7a8f3e..71fe24ab 100644 --- a/montreal_forced_aligner/transcriber.py +++ b/montreal_forced_aligner/transcriber.py @@ -386,7 +386,7 @@ def evaluate(self, output_directory, input_directory=None): if g != h: issues.append((utt, g, h)) to_comp.append((g, h)) - gen = pool.map(score, to_comp) + gen = pool.starmap(score, to_comp) for (edits, length) in gen: if edits == 0: correct += 1 diff --git a/tests/conftest.py b/tests/conftest.py index 9b2febea..fc224500 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -158,6 +158,24 @@ def basic_corpus_dir(corpus_root_dir, wav_dir, lab_dir): return path +@pytest.fixture(scope='session') +def basic_split_dir(corpus_root_dir, wav_dir, lab_dir): + path = os.path.join(corpus_root_dir, 'split') + audio_path = os.path.join(path, 'audio') + text_path = os.path.join(path, 'text') + os.makedirs(path, exist_ok=True) + names = [('michael', ['acoustic_corpus']), ('sickmichael', ['cold_corpus', 'cold_corpus3'])] + for s, files in names: + s_text_dir = os.path.join(text_path, s) + s_audio_dir = os.path.join(audio_path, s) + os.makedirs(s_text_dir, exist_ok=True) + os.makedirs(s_audio_dir, exist_ok=True) + for name in files: + shutil.copyfile(os.path.join(wav_dir, name + '.wav'), os.path.join(s_audio_dir, name + '.wav')) + shutil.copyfile(os.path.join(lab_dir, name + '.lab'), os.path.join(s_text_dir, name + '.lab')) + return audio_path, text_path + + @pytest.fixture(scope='session') def multilingual_ipa_corpus_dir(corpus_root_dir, wav_dir, lab_dir): path = os.path.join(corpus_root_dir, 'multilingual') diff --git a/tests/test_commandline_align.py b/tests/test_commandline_align.py index 8c710f53..53c15299 100644 --- a/tests/test_commandline_align.py +++ b/tests/test_commandline_align.py @@ -55,13 +55,21 @@ def test_align_multilingual(multilingual_ipa_corpus_dir, english_uk_ipa_dictiona run_align_corpus(args, unknown) def test_align_multilingual_speaker_dict(multilingual_ipa_corpus_dir, ipa_speaker_dict_path, generated_dir, temp_dir, - basic_align_config, english_acoustic_model, english_ipa_acoustic_model): + basic_align_config, english_ipa_acoustic_model): command = ['align', multilingual_ipa_corpus_dir, ipa_speaker_dict_path, english_ipa_acoustic_model, os.path.join(generated_dir, 'multilingual_speaker_dict'), '-t', temp_dir, '-c', basic_align_config, '-q', '--clean', '-d'] args, unknown = parser.parse_known_args(command) run_align_corpus(args, unknown) +def test_align_split(basic_corpus_dir, english_us_ipa_dictionary, generated_dir, temp_dir, + basic_align_config, english_acoustic_model, english_ipa_acoustic_model): + + command = ['align', basic_corpus_dir, english_us_ipa_dictionary, english_ipa_acoustic_model, os.path.join(generated_dir, 'multilingual'), + '-t', temp_dir, '-c', basic_align_config, '-q', '--clean', '-d'] + args, unknown = parser.parse_known_args(command) + run_align_corpus(args, unknown) + def test_align_stereo(stereo_corpus_dir, sick_dict_path, generated_dir, large_dataset_dictionary, temp_dir, basic_align_config, english_acoustic_model): diff --git a/tests/test_commandline_lm.py b/tests/test_commandline_lm.py index f1a7ca16..3f1555de 100644 --- a/tests/test_commandline_lm.py +++ b/tests/test_commandline_lm.py @@ -14,3 +14,14 @@ def test_train_lm(basic_corpus_dir, temp_dir, generated_dir, basic_train_lm_conf args, unknown = parser.parse_known_args(command) run_train_lm(args) assert os.path.exists(args.output_model_path) + + +def test_train_lm_text(basic_split_dir, temp_dir, generated_dir, basic_train_lm_config): + if sys.platform == 'win32': + pytest.skip('LM training not supported on Windows.') + text_dir = basic_split_dir[1] + command = ['train_lm', text_dir, os.path.join(generated_dir, 'test_basic_lm_split.zip'), + '-t', temp_dir, '-c', basic_train_lm_config, '-q', '--clean'] + args, unknown = parser.parse_known_args(command) + run_train_lm(args) + assert os.path.exists(args.output_model_path) diff --git a/tests/test_commandline_transcribe.py b/tests/test_commandline_transcribe.py index 684c4241..a1e8fdde 100644 --- a/tests/test_commandline_transcribe.py +++ b/tests/test_commandline_transcribe.py @@ -15,11 +15,21 @@ def test_transcribe(basic_corpus_dir, sick_dict_path, english_acoustic_model, ge run_transcribe_corpus(args) -def test_transcribe_speaker_dictionaries(basic_corpus_dir, speaker_dictionary_path, english_acoustic_model, generated_dir, - transcription_acoustic_model, transcription_language_model, temp_dir, transcribe_config): +def test_transcribe_speaker_dictionaries(multilingual_ipa_corpus_dir, ipa_speaker_dict_path, english_ipa_acoustic_model, generated_dir, + transcription_language_model, temp_dir, transcribe_config): output_path = os.path.join(generated_dir, 'transcribe_test') - command = ['transcribe', basic_corpus_dir, speaker_dictionary_path, transcription_acoustic_model, + command = ['transcribe', multilingual_ipa_corpus_dir, ipa_speaker_dict_path, english_ipa_acoustic_model, transcription_language_model, output_path, '-t', temp_dir, '-q', '--clean', '-d', '--config', transcribe_config] args, unknown = parser.parse_known_args(command) + run_transcribe_corpus(args) + + +def test_transcribe_speaker_dictionaries_evaluate(multilingual_ipa_corpus_dir, ipa_speaker_dict_path, english_ipa_acoustic_model, generated_dir, + transcription_language_model, temp_dir, transcribe_config): + output_path = os.path.join(generated_dir, 'transcribe_test') + command = ['transcribe', multilingual_ipa_corpus_dir, ipa_speaker_dict_path, english_ipa_acoustic_model, + transcription_language_model, output_path, + '-t', temp_dir, '-q', '--clean', '-d', '--config', transcribe_config, '--evaluate'] + args, unknown = parser.parse_known_args(command) run_transcribe_corpus(args) \ No newline at end of file