Skip to content

Commit

Permalink
Improvements to output textgrid, add "adapt" functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe committed Aug 2, 2021
1 parent f990df3 commit 587ad65
Show file tree
Hide file tree
Showing 28 changed files with 844 additions and 322 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/publish-to-test-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ jobs:
--wheel
--outdir dist/
.
- name: Publish distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.PYPI_API_TOKEN }}
- name: Publish distribution 📦 to Test PyPI
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
- name: Publish distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.PYPI_API_TOKEN }}
10 changes: 10 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
Changelog
=========

2.0.0a23
--------

- Fix bugs in transcription and aligning with using multiple dictionaries
- Fixed an issue where filenames were output with ``-`` rather than ``_`` if they originally had them
- Changed how output text different from input text when there was a compound marker (i.e., ``-``), these should now
have a single interval for the whole compound rather than two intervals for each subword
- Changed how OOV items are output, so they will be present in the output rather than ``<unk>``

2.0.0a22
--------

Expand All @@ -20,6 +29,7 @@ Changelog
the user's machine
- Update annotator utility to have autosave on exit
- Fixed cases where not all phones in a dictionary were present in phone_mapping
- Changed TextGrid export to not put "sp" or "sil" in the phone tier

2.0.0a21
--------
Expand Down
15 changes: 0 additions & 15 deletions montreal_forced_aligner/aligner/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from ..multiprocessing import compile_information
from ..config import TEMP_DIR

from ..helper import log_kaldi_errors, load_scp
from ..exceptions import KaldiProcessingError
from ..dictionary import MultispeakerDictionary


Expand Down Expand Up @@ -98,19 +96,6 @@ def compile_information(self, model_directory, output_directory):
self.logger.warning('There were {} segments/files not aligned. Please see {} for more details on why '
'alignment failed for these files.'.format(len(issues), issue_path))

log_like = 0
tot_frames = 0
for j in range(self.corpus.num_jobs):
score_path = os.path.join(model_directory, 'ali.{}.scores'.format(j))
scores = load_scp(score_path, data_type=float)
for k, v in scores.items():
log_like += v
tot_frames += self.corpus.utterance_lengths[k]
if tot_frames:
self.logger.debug('Average per frame likelihood (this might not actually mean anything): {}'.format(log_like/tot_frames))
else:
self.logger.debug('No files were aligned, this likely indicates serious problems with the aligner.')

def export_textgrids(self, output_directory):
"""
Export a TextGrid file for every sound file in the dataset
Expand Down
30 changes: 28 additions & 2 deletions montreal_forced_aligner/aligner/pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ..multiprocessing import (align, convert_ali_to_textgrids, compile_train_graphs,
calc_fmllr, generate_pronunciations)
from ..exceptions import KaldiProcessingError
from ..helper import log_kaldi_errors
from ..helper import log_kaldi_errors, load_scp


def parse_transitions(path, phones_path):
Expand Down Expand Up @@ -77,7 +77,7 @@ def setup(self):
self.dictionary.nonsil_phones = self.acoustic_model.meta['phones']
super(PretrainedAligner, self).setup()

def align(self):
def align(self, subset=None):
done_path = os.path.join(self.align_directory, 'done')
dirty_path = os.path.join(self.align_directory, 'dirty')
if os.path.exists(done_path):
Expand All @@ -93,6 +93,19 @@ def align(self):
align('final', self.align_directory, self.align_config.data_directory,
self.dictionary.optional_silence_csl,
self.corpus.num_jobs, self.align_config)

log_like = 0
tot_frames = 0
for j in range(self.corpus.num_jobs):
score_path = os.path.join(self.align_directory, 'ali.{}.scores'.format(j))
scores = load_scp(score_path, data_type=float)
for k, v in scores.items():
log_like += v
tot_frames += self.corpus.utterance_lengths[k]
if tot_frames:
self.logger.debug('Prior to SAT, average per frame likelihood (this might not actually mean anything): {}'.format(log_like/tot_frames))
else:
self.logger.debug('No files were aligned, this likely indicates serious problems with the aligner.')
if not self.align_config.disable_sat and self.acoustic_model.feature_config.fmllr \
and not os.path.exists(os.path.join(self.align_directory, 'trans.0')):
self.logger.info('Calculating fMLLR for speaker adaptation...')
Expand All @@ -102,6 +115,19 @@ def align(self):
align('final', self.align_directory, self.align_config.data_directory,
self.dictionary.optional_silence_csl,
self.corpus.num_jobs, self.align_config)

log_like = 0
tot_frames = 0
for j in range(self.corpus.num_jobs):
score_path = os.path.join(self.align_directory, 'ali.{}.scores'.format(j))
scores = load_scp(score_path, data_type=float)
for k, v in scores.items():
log_like += v
tot_frames += self.corpus.utterance_lengths[k]
if tot_frames:
self.logger.debug('Following SAT, average per frame likelihood (this might not actually mean anything): {}'.format(log_like/tot_frames))
else:
self.logger.debug('No files were aligned, this likely indicates serious problems with the aligner.')
except Exception as e:
with open(dirty_path, 'w'):
pass
Expand Down
10 changes: 4 additions & 6 deletions montreal_forced_aligner/aligner/trainable.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from ..multiprocessing import (convert_ali_to_textgrids)
from ..multiprocessing import convert_ali_to_textgrids
from .base import BaseAligner

from ..helper import log_kaldi_errors
from ..exceptions import KaldiProcessingError


class TrainableAligner(BaseAligner):
"""
Expand All @@ -27,8 +24,9 @@ class TrainableAligner(BaseAligner):
"""

def __init__(self, corpus, dictionary, training_config, align_config, temp_directory=None,
call_back=None, debug=False, verbose=False, logger=None):
call_back=None, debug=False, verbose=False, logger=None, pretrained_aligner=None):
self.training_config = training_config
self.pretrained_aligner = pretrained_aligner
super(TrainableAligner, self).__init__(corpus, dictionary, align_config, temp_directory,
call_back, debug, verbose, logger)

Expand Down Expand Up @@ -65,7 +63,7 @@ def meta(self):
return data

def train(self):
previous = None
previous = self.pretrained_aligner
for identifier, trainer in self.training_config.items():
trainer.debug = self.debug
trainer.logger = self.logger
Expand Down
188 changes: 188 additions & 0 deletions montreal_forced_aligner/command_line/adapt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import shutil
import os
import time
import multiprocessing as mp
import yaml

from montreal_forced_aligner import __version__
from montreal_forced_aligner.corpus.align_corpus import AlignableCorpus
from montreal_forced_aligner.dictionary import Dictionary, MultispeakerDictionary
from montreal_forced_aligner.aligner import TrainableAligner, PretrainedAligner
from montreal_forced_aligner.models import AcousticModel
from montreal_forced_aligner.config import TEMP_DIR, align_yaml_to_config, load_basic_align
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_dict_languages, validate_dictionary_arg
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.exceptions import ArgumentError


def adapt_model(args, unknown_args=None):
command = 'align'
all_begin = time.time()
if not args.temp_directory:
temp_dir = TEMP_DIR
else:
temp_dir = os.path.expanduser(args.temp_directory)
corpus_name = os.path.basename(args.corpus_directory)
if corpus_name == '':
args.corpus_directory = os.path.dirname(args.corpus_directory)
corpus_name = os.path.basename(args.corpus_directory)
data_directory = os.path.join(temp_dir, corpus_name)
if args.config_path:
align_config = align_yaml_to_config(args.config_path)
else:
align_config = load_basic_align()
if unknown_args:
align_config.update_from_args(unknown_args)
conf_path = os.path.join(data_directory, 'config.yml')
if getattr(args, 'clean', False) and os.path.exists(data_directory):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if os.path.exists(conf_path):
with open(conf_path, 'r') as f:
conf = yaml.load(f, Loader=yaml.SafeLoader)
else:
conf = {'dirty': False,
'begin': all_begin,
'version': __version__,
'type': command,
'corpus_directory': args.corpus_directory,
'dictionary_path': args.dictionary_path,
'acoustic_model_path': args.acoustic_model_path}
if conf['dirty'] or conf['type'] != command \
or conf['corpus_directory'] != args.corpus_directory \
or conf['version'] != __version__ \
or conf['dictionary_path'] != args.dictionary_path:
logger.warning(
'WARNING: Using old temp directory, this might not be ideal for you, use the --clean flag to ensure no '
'weird behavior for previous versions of the temporary directory.')
if conf['dirty']:
logger.debug('Previous run ended in an error (maybe ctrl-c?)')
if conf['type'] != command:
logger.debug('Previous run was a different subcommand than {} (was {})'.format(command, conf['type']))
if conf['corpus_directory'] != args.corpus_directory:
logger.debug('Previous run used source directory '
'path {} (new run: {})'.format(conf['corpus_directory'], args.corpus_directory))
if conf['version'] != __version__:
logger.debug('Previous run was on {} version (new run: {})'.format(conf['version'], __version__))
if conf['dictionary_path'] != args.dictionary_path:
logger.debug('Previous run used dictionary path {} '
'(new run: {})'.format(conf['dictionary_path'], args.dictionary_path))
if conf['acoustic_model_path'] != args.acoustic_model_path:
logger.debug('Previous run used acoustic model path {} '
'(new run: {})'.format(conf['acoustic_model_path'], args.acoustic_model_path))

os.makedirs(data_directory, exist_ok=True)
model_directory = os.path.join(data_directory, 'acoustic_models')
os.makedirs(model_directory, exist_ok=True)
acoustic_model = AcousticModel(args.acoustic_model_path, root_directory=model_directory)
acoustic_model.log_details(logger)
training_config = acoustic_model.adaptation_config()
logger.debug('ADAPT TRAINING CONFIG:')
log_config(logger, training_config)
try:
corpus = AlignableCorpus(args.corpus_directory, data_directory,
speaker_characters=args.speaker_characters,
num_jobs=args.num_jobs, sample_rate=align_config.feature_config.sample_frequency,
logger=logger, use_mp=align_config.use_mp, punctuation=align_config.punctuation,
clitic_markers=align_config.clitic_markers)
if corpus.issues_check:
logger.warning('Some issues parsing the corpus were detected. '
'Please run the validator to get more information.')
logger.info(corpus.speaker_utterance_info())
if args.dictionary_path.lower().endswith('.yaml'):
dictionary = MultispeakerDictionary(args.dictionary_path, data_directory, logger=logger,
punctuation=align_config.punctuation,
clitic_markers=align_config.clitic_markers,
compound_markers=align_config.compound_markers,
multilingual_ipa=acoustic_model.meta['multilingual_ipa'],
strip_diacritics=acoustic_model.meta.get('strip_diacritics', None),
digraphs=acoustic_model.meta.get('digraphs', None))
else:
dictionary = Dictionary(args.dictionary_path, data_directory, logger=logger,
punctuation=align_config.punctuation,
clitic_markers=align_config.clitic_markers,
compound_markers=align_config.compound_markers,
multilingual_ipa=acoustic_model.meta['multilingual_ipa'],
strip_diacritics=acoustic_model.meta.get('strip_diacritics', None),
digraphs=acoustic_model.meta.get('digraphs', None))
acoustic_model.validate(dictionary)

begin = time.time()
previous = PretrainedAligner(corpus, dictionary, acoustic_model , align_config,
temp_directory=data_directory,
debug=getattr(args, 'debug', False), logger=logger)
a = TrainableAligner(corpus, dictionary, training_config , align_config,
temp_directory=data_directory,
debug=getattr(args, 'debug', False), logger=logger, pretrained_aligner=previous)
logger.debug('Setup adapter in {} seconds'.format(time.time() - begin))
a.verbose = args.verbose

begin = time.time()
a.train()
logger.debug('Performed adaptation in {} seconds'.format(time.time() - begin))

begin = time.time()
a.save(args.output_model_path, root_directory=model_directory)
logger.debug('Exported TextGrids in {} seconds'.format(time.time() - begin))
logger.info('All done!')
logger.debug('Done! Everything took {} seconds'.format(time.time() - all_begin))
except Exception as _:
conf['dirty'] = True
raise
finally:
handlers = logger.handlers[:]
for handler in handlers:
handler.close()
logger.removeHandler(handler)
with open(conf_path, 'w') as f:
yaml.dump(conf, f)


def validate_args(args, downloaded_acoustic_models, download_dictionaries):
if not os.path.exists(args.corpus_directory):
raise ArgumentError('Could not find the corpus directory {}.'.format(args.corpus_directory))
if not os.path.isdir(args.corpus_directory):
raise ArgumentError('The specified corpus directory ({}) is not a directory.'.format(args.corpus_directory))

args.dictionary_path = validate_dictionary_arg(args.dictionary_path, download_dictionaries)

if args.acoustic_model_path.lower() in downloaded_acoustic_models:
args.acoustic_model_path = get_pretrained_acoustic_path(args.acoustic_model_path.lower())
elif args.acoustic_model_path.lower().endswith(AcousticModel.extension):
if not os.path.exists(args.acoustic_model_path):
raise ArgumentError('The specified model path does not exist: ' + args.acoustic_model_path)
else:
raise ArgumentError(
'The language \'{}\' is not currently included in the distribution, '
'please align via training or specify one of the following language names: {}.'.format(
args.acoustic_model_path.lower(), ', '.join(downloaded_acoustic_models)))


def run_adapt_model(args, unknown_args=None, downloaded_acoustic_models=None, download_dictionaries=None):
if downloaded_acoustic_models is None:
downloaded_acoustic_models = get_available_acoustic_languages()
if download_dictionaries is None:
download_dictionaries = get_available_dict_languages()
try:
args.speaker_characters = int(args.speaker_characters)
except ValueError:
pass
args.corpus_directory = args.corpus_directory.rstrip('/').rstrip('\\')

validate_args(args, downloaded_acoustic_models, download_dictionaries)
adapt_model(args, unknown_args)


if __name__ == '__main__': # pragma: no cover
mp.freeze_support()
from montreal_forced_aligner.command_line.mfa import adapt_parser, fix_path, unfix_path, acoustic_languages, \
dict_languages

adapt_args, unknown = adapt_parser.parse_known_args()
fix_path()
run_adapt_model(adapt_args, unknown, acoustic_languages, dict_languages)
unfix_path()
8 changes: 3 additions & 5 deletions montreal_forced_aligner/command_line/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,15 @@ def align_corpus(args, unknown_args=None):
logger.info(corpus.speaker_utterance_info())
if args.dictionary_path.lower().endswith('.yaml'):
dictionary = MultispeakerDictionary(args.dictionary_path, data_directory, logger=logger,
punctuation=align_config.punctuation,
punctuation=align_config.punctuation, word_set=corpus.word_set,
clitic_markers=align_config.clitic_markers,
compound_markers=align_config.compound_markers,
multilingual_ipa=acoustic_model.meta['multilingual_ipa'],
strip_diacritics=acoustic_model.meta.get('strip_diacritics', None),
digraphs=acoustic_model.meta.get('digraphs', None))
else:
dictionary = Dictionary(args.dictionary_path, data_directory, logger=logger,
punctuation=align_config.punctuation,
punctuation=align_config.punctuation, word_set=corpus.word_set,
clitic_markers=align_config.clitic_markers,
compound_markers=align_config.compound_markers,
multilingual_ipa=acoustic_model.meta['multilingual_ipa'],
Expand Down Expand Up @@ -146,7 +146,7 @@ def validate_args(args, downloaded_acoustic_models, download_dictionaries):
if args.corpus_directory == args.output_directory:
raise ArgumentError('Corpus directory and output directory cannot be the same folder.')

validate_dictionary_arg(args.dictionary_path, download_dictionaries)
args.dictionary_path = validate_dictionary_arg(args.dictionary_path, download_dictionaries)

if args.acoustic_model_path.lower() in downloaded_acoustic_models:
args.acoustic_model_path = get_pretrained_acoustic_path(args.acoustic_model_path.lower())
Expand Down Expand Up @@ -182,8 +182,6 @@ def run_align_corpus(args, unknown_args=None, downloaded_acoustic_models=None, d
dict_languages

align_args, unknown = align_parser.parse_known_args()
print(align_args)
print(unknown)
fix_path()
run_align_corpus(align_args, unknown, acoustic_languages, dict_languages)
unfix_path()
Loading

0 comments on commit 587ad65

Please sign in to comment.