Skip to content

Commit

Permalink
Model export fixes (MontrealCorpusTools#338)
Browse files Browse the repository at this point in the history
Update model export and better support for xsampa orthography
  • Loading branch information
mmcauliffe committed Oct 8, 2021
1 parent 29c5d1a commit 8abae27
Show file tree
Hide file tree
Showing 21 changed files with 654 additions and 46 deletions.
8 changes: 8 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@
Changelog
=========

2.0.0b3
-------

- Fixed a bug involving non-escaped orthographic characters
- Improved SAT alignment with speaker-independent alignment model
- Fixed a bug where models would not function properly if they were renamed
- Added a history subcommand to list previous commands

2.0.0b1
-------

Expand Down
1 change: 1 addition & 0 deletions docs/source/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Other utilities
"download", "Download a model trained by MFA developers", :ref:`pretrained_models`
"thirdparty", "Download and validate new third party binaries", :ref:`installation`
"configure", "Configure MFA to use customized defaults for command line arguments", :ref:`configuration`
"history", "List previous MFA commands run locally",


Grapheme-to-phoneme
Expand Down
7 changes: 1 addition & 6 deletions docs/source/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,8 @@ Options available:

Display help message for the command



Configuration of commands
=========================

.. toctree::
:maxdepth: 1
:maxdepth: 2

configuration_align.rst
configuration_transcription.rst
Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__ver_major__ = 2
__ver_minor__ = 0
__ver_patch__ = '0b2'
__ver_patch__ = '0b3'
__version__ = "{}.{}.{}".format(__ver_major__, __ver_minor__, __ver_patch__)

__all__ = ['aligner', 'command_line', 'models', 'corpus', 'config', 'dictionary', 'exceptions',
Expand Down
5 changes: 4 additions & 1 deletion montreal_forced_aligner/aligner/pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,18 @@ def align(self, subset=None):
log_dir = os.path.join(self.align_directory, 'log')
os.makedirs(log_dir, exist_ok=True)

self.logger.info('Performing first-pass alignment...')
align('final', self.align_directory, self.align_config.data_directory,
self.dictionary.optional_silence_csl,
self.corpus.num_jobs, self.align_config)
self.corpus.num_jobs, self.align_config, speaker_independent=True)
unaligned, average_log_like = compile_information(self.align_directory, self.corpus, self.corpus.num_jobs, self)
self.logger.debug(f'Prior to SAT, average per frame likelihood (this might not actually mean anything): {average_log_like}')
if not self.align_config.disable_sat and self.acoustic_model.feature_config.fmllr \
and not os.path.exists(os.path.join(self.align_directory, 'trans.0')):
calc_fmllr(self.align_directory, self.align_config.data_directory,
self.dictionary.optional_silence_csl, self.corpus.num_jobs, self.align_config, initial=True, iteration='final')

self.logger.info('Performing second-pass alignment...')
align('final', self.align_directory, self.align_config.data_directory,
self.dictionary.optional_silence_csl,
self.corpus.num_jobs, self.align_config)
Expand Down
68 changes: 67 additions & 1 deletion montreal_forced_aligner/command_line/mfa.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import atexit
import sys
import os
import time
import argparse
from datetime import datetime
import multiprocessing as mp

from montreal_forced_aligner import __version__
Expand All @@ -23,9 +25,53 @@
from montreal_forced_aligner.command_line.train_dictionary import run_train_dictionary
from montreal_forced_aligner.command_line.create_segments import run_create_segments
from montreal_forced_aligner.exceptions import MFAError
from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history
from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history, load_command_history


class ExitHooks(object):
def __init__(self):
self.exit_code = None
self.exception = None

def hook(self):
self._orig_exit = sys.exit
sys.exit = self.exit
sys.excepthook = self.exc_handler

def exit(self, code=0):
self.exit_code = code
self._orig_exit(code)

def exc_handler(self, exc_type, exc, *args):
self.exception = exc

hooks = ExitHooks()
hooks.hook()

BEGIN = time.time()
BEGIN_DATE = datetime.now()


def history_save_handler():
history_data = {
'command': ' '.join(sys.argv),
'execution_time': time.time() - BEGIN,
'date': BEGIN_DATE,
'version': __version__
}

if hooks.exit_code is not None:
history_data['exit_code'] = hooks.exit_code
history_data['exception'] = ''
elif hooks.exception is not None:
history_data['exit_code'] = 1
history_data['exception'] = hooks.exception
else:
history_data['exception'] = ''
history_data['exit_code'] = 0
update_command_history(history_data)

atexit.register(history_save_handler)

def fix_path():
from montreal_forced_aligner.config import TEMP_DIR
Expand Down Expand Up @@ -295,6 +341,11 @@ def add_global_options(subparser, textgrid_output=False):
"silences and recombines compound words and clitics",
action='store_true')

history_parser = subparsers.add_parser('history')

history_parser.add_argument('depth', help='Number of commands to list', nargs='?', default=10)
history_parser.add_argument('--verbose', help="Flag for whether to output additional information", action='store_true')

annotator_parser = subparsers.add_parser('annotator')
anchor_parser = subparsers.add_parser('anchor')

Expand Down Expand Up @@ -391,6 +442,21 @@ def main():
update_global_config(args)
global GLOBAL_CONFIG
GLOBAL_CONFIG = load_global_config()
elif args.subcommand == 'history':
depth = args.depth
history = load_command_history()[-depth:]
for h in history:
if args.verbose:
print('command\tDate\tExecution time\tVersion\tExit code\tException')
for h in history:
execution_time = time.strftime('%H:%M:%S', time.gmtime(h['execution_time']))
d = h['date'].isoformat()
print(f"{h['command']}\t{d}\t{execution_time}\t{h['version']}\t{h['exit_code']}\t{h['exception']}")
pass
else:
for h in history:
print(h['command'])

elif args.subcommand == 'version':
print(__version__)
except MFAError as e:
Expand Down
29 changes: 25 additions & 4 deletions montreal_forced_aligner/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,32 @@ def generate_config_path():
return os.path.join(TEMP_DIR, 'global_config.yaml')

def generate_command_history_path():
return os.path.join(TEMP_DIR, 'command_history')
return os.path.join(TEMP_DIR, 'command_history.yaml')

def update_command_history(command, duration, exit_code, exception):
with open(generate_command_history_path(), 'a', encoding='utf8') as f:
f.write(f'{command}\t{duration}\t{exit_code}\t{exception}\n')
def load_command_history():
path = generate_command_history_path()
if os.path.exists(path):
with open(path, 'r', encoding='utf8') as f:
history = yaml.safe_load(f)
else:
history = []
if not history:
history = []
return history


def update_command_history(command_data):
try:
if command_data['command'].split(' ')[1] == 'history':
return
except Exception:
return
history = load_command_history()
path = generate_command_history_path()
history.append(command_data)
history = history[-50:]
with open(path, 'w', encoding='utf8') as f:
yaml.safe_dump(history, f)

def update_global_config(args):
global_configuration_file = generate_config_path()
Expand Down
5 changes: 3 additions & 2 deletions montreal_forced_aligner/config/train_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ def __init__(self, training_configs):
self.compound_markers = DEFAULT_COMPOUND_MARKERS

def update_from_align(self, align_config):
self.training_configs[-1].overwrite = align_config.overwrite
self.training_configs[-1].cleanup_textgrids = align_config.cleanup_textgrids
for tc in self.training_configs:
tc.overwrite = align_config.overwrite
tc.cleanup_textgrids = align_config.cleanup_textgrids

def update(self, data):
for k, v in data.items():
Expand Down
12 changes: 3 additions & 9 deletions montreal_forced_aligner/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,9 @@


def compile_graphemes(graphemes):
if '-' in graphemes:
base = r'^\W*([-{}]+)\W*'
else:
base = r'^\W*([{}]+)\W*'
graphemes = list(graphemes)
for i in range(len(graphemes)):
if graphemes[i] == ']':
graphemes[i] = r'\]'
string = ''.join(x for x in graphemes if x != '-')

base = r'^\W*([{}]+)\W*'
string = re.escape(''.join(graphemes))
try:
return re.compile(base.format(string))
except Exception:
Expand Down
5 changes: 3 additions & 2 deletions montreal_forced_aligner/features/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def generate_base_features(self, corpus, logger=None, compute_cmvn=True):
log_func('Calculating CMVN...')
calc_cmvn(corpus)

def construct_feature_proc_string(self, data_directory, model_directory, job_name, splice=False, voiced=False, cmvn=True):
def construct_feature_proc_string(self, data_directory, model_directory, job_name, splice=False, voiced=False,
cmvn=True, speaker_independent=False):
if self.directory is None:
self.directory = data_directory
lda_mat_path = None
Expand Down Expand Up @@ -190,7 +191,7 @@ def construct_feature_proc_string(self, data_directory, model_directory, job_nam
elif self.deltas:
feats += " add-deltas ark:- ark:- |"

if fmllr_trans_path is not None:
if fmllr_trans_path is not None and not speaker_independent:
if not os.path.exists(fmllr_trans_path):
raise Exception('Could not find {}'.format(fmllr_trans_path))
feats += " transform-feats --utt2spk=ark:{} ark:{} ark:- ark:- |".format(utt2spk_path, fmllr_trans_path)
Expand Down
16 changes: 10 additions & 6 deletions montreal_forced_aligner/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import yaml

from shutil import copy, copyfile, rmtree, make_archive, unpack_archive
from shutil import copy, copyfile, rmtree, make_archive, unpack_archive, move

from . import __version__
from .exceptions import PronunciationAcousticMismatchError
Expand Down Expand Up @@ -31,11 +31,15 @@ def __init__(self, source, root_directory=None):
if os.path.isdir(source):
self.dirname = os.path.abspath(source)
else:
base = root_directory
self.dirname = os.path.join(root_directory, self.name)
if not os.path.exists(self.dirname):
os.makedirs(root_directory, exist_ok=True)
unpack_archive(source, base)
unpack_archive(source, self.dirname)
files = os.listdir(self.dirname)
old_dir_path = os.path.join(self.dirname, files[0])
if len(files) == 1 and os.path.isdir(old_dir_path): # Backwards compatibility
for f in os.listdir(old_dir_path):
move(os.path.join(old_dir_path, f), os.path.join(self.dirname, f))

@property
def meta(self):
Expand Down Expand Up @@ -76,16 +80,16 @@ def __repr__(self):
def clean_up(self):
rmtree(self.dirname)

def dump(self, sink, archive_fmt=FORMAT):
def dump(self, path, archive_fmt=FORMAT):
"""
Write archive to disk, and return the name of final archive
"""
return make_archive(sink, archive_fmt,
return make_archive(os.path.splitext(path)[0], archive_fmt,
*os.path.split(self.dirname))


class AcousticModel(Archive):
files = ['final.mdl', 'final.occs', 'lda.mat', 'tree']
files = ['final.mdl', 'final.alimdl', 'final.occs', 'lda.mat', 'tree']
def add_meta_file(self, aligner):
with open(os.path.join(self.dirname, 'meta.yaml'), 'w', encoding='utf8') as f:
yaml.dump(aligner.meta, f)
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/multiprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from .helper import run_mp, run_non_mp, Stopped, Counter
from .alignment import align, compute_alignment_improvement, convert_ali_to_textgrids, compile_information, acc_stats, \
lda_acc_stats, mono_align_equal, compile_train_graphs, tree_stats, convert_alignments, calc_lda_mllt, calc_fmllr
lda_acc_stats, mono_align_equal, compile_train_graphs, tree_stats, convert_alignments, calc_lda_mllt, calc_fmllr, \
create_align_model

from .transcription import transcribe, transcribe_fmllr
from .ivector import gmm_gselect, acc_global_stats, acc_ivector_stats, extract_ivectors, gauss_to_post, segment_vad, \
classify_speakers
Expand Down
Loading

0 comments on commit 8abae27

Please sign in to comment.