diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 23f3789f..649b9476 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,7 @@ jobs: - uses: "actions/checkout@v2" - uses: "actions/setup-python@v2" with: - python-version: "3.9" + python-version: "3.8" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/ci/mfa_publish.yml b/ci/mfa_publish.yml index 3de124c9..adbc7975 100644 --- a/ci/mfa_publish.yml +++ b/ci/mfa_publish.yml @@ -3,6 +3,7 @@ channels: - conda-forge - defaults dependencies: + - python>=3.8 - numpy - librosa - tqdm diff --git a/docs/source/_static/interrogate_badge.svg b/docs/source/_static/interrogate_badge.svg index 58711083..d001eb10 100644 --- a/docs/source/_static/interrogate_badge.svg +++ b/docs/source/_static/interrogate_badge.svg @@ -1,5 +1,5 @@ - interrogate: 98.8% + interrogate: 97.7% @@ -12,8 +12,8 @@ interrogate interrogate - 98.8% - 98.8% + 97.7% + 97.7% diff --git a/docs/source/_templates/sidebar-nav-bs.html b/docs/source/_templates/sidebar-nav-bs.html index 9ec8c015..6737425d 100644 --- a/docs/source/_templates/sidebar-nav-bs.html +++ b/docs/source/_templates/sidebar-nav-bs.html @@ -1,7 +1,6 @@
{{ generate_nav_html("sidebar", - show_nav_level=theme_show_nav_level|int, maxdepth=theme_navigation_depth|int, collapse=theme_collapse_navigation|tobool, includehidden=True, diff --git a/docs/source/changelog/changelog_2.0.rst b/docs/source/changelog/changelog_2.0.rst index 171a6dd3..a147e2c4 100644 --- a/docs/source/changelog/changelog_2.0.rst +++ b/docs/source/changelog/changelog_2.0.rst @@ -10,6 +10,21 @@ Beta releases ============= +2.0.0b9 +------- + +- Fixed a bug where unknown word phones were showing up as blank +- Fixed a bug where TextGrid export would hang +- Fixed compatibility issues with Python 3.8 +- Added logging for when configuration parameters are ignored +- Added some functionality from the LibriSpeech recipe for triphone training with Arpabet + + - Not sure if it'll improve anything, but I'll run some tests and maybe extend it to other phone sets + +- Added better logging to TextGrid export +- Added new classes for managing collections of utterances, speakers, and files +- Fixed a bug where oovs were not being properly reported by the validation tool + 2.0.0b8 ------- diff --git a/docs/source/external_links.py b/docs/source/external_links.py index e7f56217..ea140452 100644 --- a/docs/source/external_links.py +++ b/docs/source/external_links.py @@ -17,7 +17,7 @@ :license: BSD, see LICENSE for details. """ -from typing import Any +from typing import Any, Dict, List, Tuple import sphinx from docutils import nodes, utils @@ -42,8 +42,8 @@ def model_role( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: text = utils.unescape(text) model_type, model_name = text.split("/") full_url = f"https://github.com/MontrealCorpusTools/mfa-models/raw/main/{model_type}/{model_name.lower()}.zip" @@ -59,8 +59,8 @@ def kaldi_steps_role( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: text = utils.unescape(text) full_url = f"https://github.com/kaldi-asr/kaldi/tree/master/egs/wsj/s5/steps/{text}.sh" title = f"{text}.sh" @@ -75,8 +75,8 @@ def kaldi_utils_role( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: filename = utils.unescape(text) full_url = f"https://github.com/kaldi-asr/kaldi/tree/master/egs/wsj/s5/utils/{filename}" title = f"{text}" @@ -91,8 +91,8 @@ def kaldi_steps_sid_role( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: text = utils.unescape(text) full_url = f"https://github.com/kaldi-asr/kaldi/tree/cbed4ff688a172a7f765493d24771c1bd57dcd20/egs/sre08/v1/sid/{text}.sh" title = f"sid/{text}.sh" @@ -107,8 +107,8 @@ def kaldi_docs_role( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: text = utils.unescape(text) t = text.split("#") text = t[0] @@ -130,8 +130,8 @@ def openfst_src_role( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: text = utils.unescape(text) full_url = f"https://www.openfst.org/doxygen/fst/html/{text}-main_8cc_source.html" title = f"OpenFst {text} source" @@ -146,8 +146,8 @@ def kaldi_src_role( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: text = utils.unescape(text) mapping = { "bin": set( @@ -379,8 +379,8 @@ def xref( lineno: int, inliner: Inliner, options: dict = None, - content: list[str] = None, -) -> tuple[list[Node], list[system_message]]: + content: List[str] = None, +) -> Tuple[List[Node], List[system_message]]: title = target = text # look if explicit title and target are given with `foo ` syntax @@ -409,7 +409,7 @@ def get_refs(app): xref.links = app.config.xref_links -def setup(app: Sphinx) -> dict[str, Any]: +def setup(app: Sphinx) -> Dict[str, Any]: app.add_config_value("xref_links", {}, "env") app.add_role("mfa_model", model_role) app.add_role("kaldi_steps", kaldi_steps_role) diff --git a/docs/source/reference/corpus/index.rst b/docs/source/reference/corpus/index.rst index bad9ed3a..d30c8c92 100644 --- a/docs/source/reference/corpus/index.rst +++ b/docs/source/reference/corpus/index.rst @@ -30,6 +30,19 @@ Corpora Helper classes and functions ============================ +Collections +----------- + +.. currentmodule:: montreal_forced_aligner.corpus.classes + +.. autosummary:: + :toctree: generated/ + + Collection + SpeakerCollection + FileCollection + UtteranceCollection + Multiprocessing --------------- diff --git a/docs/source/user_guide/configuration/global.rst b/docs/source/user_guide/configuration/global.rst index de24432e..4760601d 100644 --- a/docs/source/user_guide/configuration/global.rst +++ b/docs/source/user_guide/configuration/global.rst @@ -68,8 +68,7 @@ The :code:`multilingual_ipa`, :code:`strip_diacritics`, and :code:`digraphs` are "oov_word", "", "Internal word symbol to use for out of vocabulary items" "oov_phone", "spn", "Internal phone symbol to use for out of vocabulary items" "silence_word", "!sil", "Internal word symbol to use initial silence" - "nonoptional_silence_phone", "sil", "Internal phone symbol to use initial silence" - "optional_silence_phone", "sp", "Internal phone symbol to use optional silence in the middle of utterances" + "optional_silence_phone", "sp", "Internal phone symbol to use optional silence in or around utterances" "position_dependent_phones", "True", "Flag for whether phones should mark their position in the word as part of the phone symbol internally" "num_silence_states", "5", "Number of states to use for silence phones" "num_non_silence_states", "3", "Number of states to use for non-silence phones" diff --git a/environment.yml b/environment.yml index 03fb37d5..756d24da 100644 --- a/environment.yml +++ b/environment.yml @@ -2,7 +2,7 @@ name: mfa channels: - conda-forge dependencies: - - python>=3.9 + - python>=3.8 - numpy - librosa - tqdm @@ -16,3 +16,4 @@ dependencies: - ngram - pynini - praatio + - biopython diff --git a/environment_win.yml b/environment_win.yml index 1e827581..76e0c6e4 100644 --- a/environment_win.yml +++ b/environment_win.yml @@ -2,7 +2,7 @@ name: montreal-forced-aligner channels: - conda-forge dependencies: - - python>=3.9 + - python>=3.8 - numpy - librosa - tqdm @@ -12,3 +12,4 @@ dependencies: - kaldi - sox - praatio + - biopython diff --git a/montreal_forced_aligner/abc.py b/montreal_forced_aligner/abc.py index db3474a6..776b6e83 100644 --- a/montreal_forced_aligner/abc.py +++ b/montreal_forced_aligner/abc.py @@ -11,10 +11,23 @@ import sys import time from abc import ABC, ABCMeta, abstractmethod -from typing import TYPE_CHECKING, Any, Optional, Type, Union, get_type_hints +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Optional, + Set, + Tuple, + Type, + Union, + get_type_hints, +) import yaml +from montreal_forced_aligner.helper import comma_join + if TYPE_CHECKING: from argparse import Namespace @@ -41,22 +54,22 @@ ] # Configuration types -MetaDict = dict[str, Any] -Labels: list[Any] -CtmErrorDict: dict[tuple[str, int], str] +MetaDict = Dict[str, Any] +Labels: List[Any] +CtmErrorDict: Dict[Tuple[str, int], str] # Dictionary types -DictionaryEntryType: list[dict[str, Union[tuple[str], float, None, int]]] -ReversedMappingType: dict[int, str] -WordsType: dict[str, DictionaryEntryType] -MappingType: dict[str, int] +DictionaryEntryType: List[Dict[str, Union[Tuple[str], float, None, int]]] +ReversedMappingType: Dict[int, str] +WordsType: Dict[str, DictionaryEntryType] +MappingType: Dict[str, int] # Corpus types -OneToOneMappingType: dict[str, str] -OneToManyMappingType: dict[str, list[str]] +OneToOneMappingType: Dict[str, str] +OneToManyMappingType: Dict[str, List[str]] CorpusMappingType: Union[OneToOneMappingType, OneToManyMappingType] -ScpType: Union[list[tuple[str, str]], list[tuple[str, list[Any]]]] +ScpType: Union[List[Tuple[str, str]], List[Tuple[str, List[Any]]]] class TemporaryDirectoryMixin(metaclass=ABCMeta): @@ -163,7 +176,7 @@ def log_error(self, message: str) -> None: ... @classmethod - def extract_relevant_parameters(cls, config: MetaDict) -> MetaDict: + def extract_relevant_parameters(cls, config: MetaDict) -> Tuple[MetaDict, List[str]]: """ Filter a configuration dictionary to just the relevant parameters for the current worker @@ -176,11 +189,20 @@ def extract_relevant_parameters(cls, config: MetaDict) -> MetaDict: ------- dict[str, Any] Filtered configuration dictionary + list[str] + Skipped keys """ - return {k: v for k, v in config.items() if k in cls.get_configuration_parameters()} + skipped = [] + new_config = {} + for k, v in config.items(): + if k in cls.get_configuration_parameters(): + new_config[k] = v + else: + skipped.append(k) + return new_config, skipped @classmethod - def get_configuration_parameters(cls) -> dict[str, Type]: + def get_configuration_parameters(cls) -> Dict[str, Type]: """ Get the types of parameters available to be configured @@ -189,6 +211,7 @@ def get_configuration_parameters(cls) -> dict[str, Type]: dict[str, Type] Dictionary of parameter names and their types """ + mapping = {Dict: dict, Tuple: tuple, List: list, Set: set} configuration_params = {} for t, ty in get_type_hints(cls.__init__).items(): configuration_params[t] = ty @@ -209,6 +232,14 @@ def get_configuration_parameters(cls) -> dict[str, Type]: pass except AttributeError: pass + for t, ty in configuration_params.items(): + for v in mapping.values(): + try: + if ty.__origin__ == v: + configuration_params[t] = v + break + except AttributeError: + break return configuration_params @property @@ -257,12 +288,15 @@ def __init__( clean: bool = False, **kwargs, ): + kwargs, skipped = type(self).extract_relevant_parameters(kwargs) super().__init__(**kwargs) self.num_jobs = num_jobs self.clean = clean self.initialized = False self.start_time = time.time() self.setup_logger() + if skipped: + self.logger.warning(f"Skipped the following configuration keys: {comma_join(skipped)}") def __del__(self): """Ensure that loggers are cleaned up on delete""" @@ -282,7 +316,7 @@ def working_directory(self) -> str: return self.workflow_directory @classmethod - def parse_args(cls, args: Optional[Namespace], unknown_args: Optional[list[str]]) -> MetaDict: + def parse_args(cls, args: Optional[Namespace], unknown_args: Optional[List[str]]) -> MetaDict: """ Class method for parsing configuration parameters from command line arguments @@ -323,6 +357,10 @@ def parse_args(cls, args: Optional[Namespace], unknown_args: Optional[list[str]] if param_type == bool: if unknown_dict[name].lower() == "false": params[name] = False + if getattr(args, "disable_mp", "False"): + params["use_mp"] = False + elif getattr(args, "disable_textgrid_cleanup", False): + params["cleanup_textgrids"] = False return params @classmethod @@ -330,7 +368,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ Parse configuration parameters from a config file and command line arguments @@ -683,7 +721,7 @@ def adapt(self) -> None: class MfaModel(ABC): """Abstract class for MFA models""" - extensions: list[str] + extensions: List[str] model_type = "base_model" @classmethod @@ -693,7 +731,7 @@ def pretrained_directory(cls) -> str: return os.path.join(get_temporary_directory(), "pretrained_models", cls.model_type) @classmethod - def get_available_models(cls) -> list[str]: + def get_available_models(cls) -> List[str]: """ Get a list of available models for a given model type diff --git a/montreal_forced_aligner/acoustic_modeling/base.py b/montreal_forced_aligner/acoustic_modeling/base.py index ca7bf9ad..b8db8ba5 100644 --- a/montreal_forced_aligner/acoustic_modeling/base.py +++ b/montreal_forced_aligner/acoustic_modeling/base.py @@ -9,7 +9,7 @@ import subprocess import time from abc import abstractmethod -from typing import TYPE_CHECKING, NamedTuple, Optional +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Set, Tuple from tqdm import tqdm @@ -42,15 +42,15 @@ class AlignmentImprovementArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.acoustic_modeling.base.compute_alignment_improvement_func`""" log_path: str - dictionaries: list[str] + dictionaries: List[str] model_path: str - text_int_paths: dict[str, str] - word_boundary_paths: dict[str, str] - ali_paths: dict[str, str] + text_int_paths: Dict[str, str] + word_boundary_paths: Dict[str, str] + ali_paths: Dict[str, str] frame_shift: int - reversed_phone_mappings: dict[str, dict[int, str]] - positions: dict[str, list[str]] - phone_ctm_paths: dict[str, str] + reversed_phone_mappings: Dict[str, Dict[int, str]] + positions: Dict[str, List[str]] + phone_ctm_paths: Dict[str, str] class AccStatsArguments(NamedTuple): @@ -59,19 +59,19 @@ class AccStatsArguments(NamedTuple): """ log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] - ali_paths: dict[str, str] - acc_paths: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] + ali_paths: Dict[str, str] + acc_paths: Dict[str, str] model_path: str def acc_stats_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], - ali_paths: dict[str, str], - acc_paths: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], + ali_paths: Dict[str, str], + acc_paths: Dict[str, str], model_path: str, ) -> None: """ @@ -120,15 +120,15 @@ def acc_stats_func( def compute_alignment_improvement_func( log_path: str, - dictionaries: list[str], + dictionaries: List[str], model_path: str, - text_int_paths: dict[str, str], - word_boundary_paths: dict[str, str], - ali_paths: dict[str, str], + text_int_paths: Dict[str, str], + word_boundary_paths: Dict[str, str], + ali_paths: Dict[str, str], frame_shift: int, - reversed_phone_mappings: dict[str, dict[int, str]], - positions: dict[str, list[str]], - phone_ctm_paths: dict[str, str], + reversed_phone_mappings: Dict[str, Dict[int, str]], + positions: Dict[str, List[str]], + phone_ctm_paths: Dict[str, str], ) -> None: """ Multiprocessing function for computing alignment improvement over training @@ -266,10 +266,10 @@ def compute_alignment_improvement_func( def compare_alignments( - alignments_one: dict[str, list[CtmInterval]], - alignments_two: dict[str, list[CtmInterval]], - silence_phones: set[str], -) -> tuple[Optional[int], Optional[float]]: + alignments_one: Dict[str, List[CtmInterval]], + alignments_two: Dict[str, List[CtmInterval]], + silence_phones: Set[str], +) -> Tuple[Optional[int], Optional[float]]: """ Compares two sets of alignments for difference @@ -386,7 +386,7 @@ def __init__( self.training_complete = False self.realignment_iterations = [] # Gets set later - def acc_stats_arguments(self) -> list[AccStatsArguments]: + def acc_stats_arguments(self) -> List[AccStatsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.base.acc_stats_func` @@ -408,7 +408,7 @@ def acc_stats_arguments(self) -> list[AccStatsArguments]: for j in self.jobs ] - def alignment_improvement_arguments(self) -> list[AlignmentImprovementArguments]: + def alignment_improvement_arguments(self) -> List[AlignmentImprovementArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.base.compute_alignment_improvement_func` @@ -490,7 +490,7 @@ def logger(self) -> logging.Logger: return self.worker.logger @property - def jobs(self) -> list[Job]: + def jobs(self) -> List[Job]: """Top-level worker's job objects""" return self.worker.jobs @@ -501,7 +501,7 @@ def disambiguation_symbols_int_path(self) -> str: def construct_feature_proc_strings( self, speaker_independent: bool = False - ) -> list[dict[str, str]]: + ) -> List[Dict[str, str]]: """Top-level worker's feature strings""" return self.worker.construct_feature_proc_strings(speaker_independent) @@ -706,7 +706,7 @@ def acc_stats(self): def parse_iteration_alignments( self, iteration: Optional[int] = None - ) -> dict[str, list[CtmInterval]]: + ) -> Dict[str, List[CtmInterval]]: """ Function to parse phone CTMs in a given iteration @@ -898,12 +898,17 @@ def meta(self) -> MetaDict: from ..utils import get_mfa_version + phone_regex = None + if self.base_phone_regex is not None: + phone_regex = self.base_phone_regex.pattern data = { "phones": sorted(self.non_silence_phones), "version": get_mfa_version(), "architecture": self.architecture, "train_date": str(datetime.now()), "features": self.feature_options, + "phone_set_type": self.phone_set_type, + "base_phone_regex": phone_regex, "multilingual_ipa": self.multilingual_ipa, } if self.multilingual_ipa: diff --git a/montreal_forced_aligner/acoustic_modeling/lda.py b/montreal_forced_aligner/acoustic_modeling/lda.py index 51400c09..d05a1b5c 100644 --- a/montreal_forced_aligner/acoustic_modeling/lda.py +++ b/montreal_forced_aligner/acoustic_modeling/lda.py @@ -4,7 +4,7 @@ import os import shutil import subprocess -from typing import TYPE_CHECKING, NamedTuple +from typing import TYPE_CHECKING, Dict, List, NamedTuple from montreal_forced_aligner.acoustic_modeling.triphone import TriphoneTrainer from montreal_forced_aligner.utils import parse_logs, run_mp, run_non_mp, thirdparty_binary @@ -20,12 +20,12 @@ class LdaAccStatsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.acoustic_modeling.lda.lda_acc_stats_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] - ali_paths: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] + ali_paths: Dict[str, str] model_path: str lda_options: MetaDict - acc_paths: dict[str, str] + acc_paths: Dict[str, str] class CalcLdaMlltArguments(NamedTuple): @@ -33,22 +33,22 @@ class CalcLdaMlltArguments(NamedTuple): log_path: str """Log file to save stderr""" - dictionaries: list[str] - feature_strings: dict[str, str] - ali_paths: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] + ali_paths: Dict[str, str] model_path: str lda_options: MetaDict - macc_paths: dict[str, str] + macc_paths: Dict[str, str] def lda_acc_stats_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], - ali_paths: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], + ali_paths: Dict[str, str], model_path: str, lda_options: MetaDict, - acc_paths: dict[str, str], + acc_paths: Dict[str, str], ) -> None: """ Multiprocessing function to accumulate LDA stats @@ -126,12 +126,12 @@ def lda_acc_stats_func( def calc_lda_mllt_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], - ali_paths: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], + ali_paths: Dict[str, str], model_path: str, lda_options: MetaDict, - macc_paths: dict[str, str], + macc_paths: Dict[str, str], ) -> None: """ Multiprocessing function for estimating LDA with MLLT. @@ -266,7 +266,7 @@ def __init__( self.splice_left_context = splice_left_context self.splice_right_context = splice_right_context - def lda_acc_stats_arguments(self) -> list[LdaAccStatsArguments]: + def lda_acc_stats_arguments(self) -> List[LdaAccStatsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.lda.lda_acc_stats_func` @@ -289,7 +289,7 @@ def lda_acc_stats_arguments(self) -> list[LdaAccStatsArguments]: for j in self.jobs ] - def calc_lda_mllt_arguments(self) -> list[CalcLdaMlltArguments]: + def calc_lda_mllt_arguments(self) -> List[CalcLdaMlltArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.lda.calc_lda_mllt_func` @@ -355,6 +355,10 @@ def lda_acc_stats(self) -> None: Reference Kaldi script """ + worker_lda_path = os.path.join(self.worker.working_directory, "lda.mat") + lda_path = os.path.join(self.working_directory, "lda.mat") + if os.path.exists(worker_lda_path): + os.remove(worker_lda_path) arguments = self.lda_acc_stats_arguments() if self.use_mp: @@ -371,7 +375,7 @@ def lda_acc_stats(self) -> None: [ thirdparty_binary("est-lda"), f"--dim={self.lda_dimension}", - os.path.join(self.working_directory, "lda.mat"), + lda_path, ] + acc_list, stderr=log_file, @@ -379,8 +383,8 @@ def lda_acc_stats(self) -> None: ) est_lda_proc.communicate() shutil.copyfile( - os.path.join(self.working_directory, "lda.mat"), - os.path.join(self.worker.working_directory, "lda.mat"), + lda_path, + worker_lda_path, ) def _trainer_initialization(self) -> None: diff --git a/montreal_forced_aligner/acoustic_modeling/monophone.py b/montreal_forced_aligner/acoustic_modeling/monophone.py index 53827ad8..18a033b8 100644 --- a/montreal_forced_aligner/acoustic_modeling/monophone.py +++ b/montreal_forced_aligner/acoustic_modeling/monophone.py @@ -4,7 +4,7 @@ import os import re import subprocess -from typing import NamedTuple +from typing import Dict, List, NamedTuple from montreal_forced_aligner.acoustic_modeling.base import AcousticModelTrainingMixin from montreal_forced_aligner.utils import run_mp, run_non_mp, thirdparty_binary @@ -14,21 +14,21 @@ class MonoAlignEqualArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.acoustic_modeling.monophone.mono_align_equal_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] - fst_scp_paths: dict[str, str] - ali_ark_paths: dict[str, str] - acc_paths: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] + fst_scp_paths: Dict[str, str] + ali_ark_paths: Dict[str, str] + acc_paths: Dict[str, str] model_path: str def mono_align_equal_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], - fst_scp_paths: dict[str, str], - ali_ark_paths: dict[str, str], - acc_paths: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], + fst_scp_paths: Dict[str, str], + ali_ark_paths: Dict[str, str], + acc_paths: Dict[str, str], model_path: str, ): """ @@ -132,7 +132,7 @@ def __init__( self.max_gaussians = max_gaussians self.power = power - def mono_align_equal_arguments(self) -> list[MonoAlignEqualArguments]: + def mono_align_equal_arguments(self) -> List[MonoAlignEqualArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.monophone.mono_align_equal_func` diff --git a/montreal_forced_aligner/acoustic_modeling/sat.py b/montreal_forced_aligner/acoustic_modeling/sat.py index df6aab04..1f8d0a77 100644 --- a/montreal_forced_aligner/acoustic_modeling/sat.py +++ b/montreal_forced_aligner/acoustic_modeling/sat.py @@ -5,7 +5,7 @@ import shutil import subprocess import time -from typing import NamedTuple +from typing import Dict, List, NamedTuple from montreal_forced_aligner.acoustic_modeling.triphone import TriphoneTrainer from montreal_forced_aligner.exceptions import KaldiProcessingError @@ -24,22 +24,22 @@ class AccStatsTwoFeatsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.acoustic_modeling.sat.acc_stats_two_feats_func`""" log_path: str - dictionaries: list[str] - ali_paths: dict[str, str] - acc_paths: dict[str, str] + dictionaries: List[str] + ali_paths: Dict[str, str] + acc_paths: Dict[str, str] model_path: str - feature_strings: dict[str, str] - si_feature_strings: dict[str, str] + feature_strings: Dict[str, str] + si_feature_strings: Dict[str, str] def acc_stats_two_feats_func( log_path: str, - dictionaries: list[str], - ali_paths: dict[str, str], - acc_paths: dict[str, str], + dictionaries: List[str], + ali_paths: Dict[str, str], + acc_paths: Dict[str, str], model_path: str, - feature_strings: dict[str, str], - si_feature_strings: dict[str, str], + feature_strings: Dict[str, str], + si_feature_strings: Dict[str, str], ) -> None: """ Multiprocessing function for accumulating stats across speaker-independent and @@ -142,7 +142,7 @@ def __init__( self.power = power self.fmllr_iterations = [] - def acc_stats_two_feats_arguments(self) -> list[AccStatsTwoFeatsArguments]: + def acc_stats_two_feats_arguments(self) -> List[AccStatsTwoFeatsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.sat.acc_stats_two_feats_func` diff --git a/montreal_forced_aligner/acoustic_modeling/trainer.py b/montreal_forced_aligner/acoustic_modeling/trainer.py index 261a06cd..b021a70d 100644 --- a/montreal_forced_aligner/acoustic_modeling/trainer.py +++ b/montreal_forced_aligner/acoustic_modeling/trainer.py @@ -3,7 +3,7 @@ import os import time -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple import yaml @@ -59,7 +59,7 @@ class TrainableAligner(CorpusAligner, TopLevelMfaWorker, ModelExporterMixin): Training blocks """ - def __init__(self, training_configuration: list[tuple[str, dict[str, Any]]] = None, **kwargs): + def __init__(self, training_configuration: List[Tuple[str, Dict[str, Any]]] = None, **kwargs): self.param_dict = { k: v for k, v in kwargs.items() @@ -74,7 +74,7 @@ def __init__(self, training_configuration: list[tuple[str, dict[str, Any]]] = No self.current_acoustic_model: Optional[AcousticModel] = None super().__init__(**kwargs) os.makedirs(self.output_directory, exist_ok=True) - self.training_configs: dict[str, AcousticModelTrainingMixin] = {} + self.training_configs: Dict[str, AcousticModelTrainingMixin] = {} if training_configuration is None: training_configuration = [ ("monophone", {}), @@ -91,7 +91,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ Parse configuration parameters from a config file and command line arguments diff --git a/montreal_forced_aligner/acoustic_modeling/triphone.py b/montreal_forced_aligner/acoustic_modeling/triphone.py index dfa4233b..617e3d2c 100644 --- a/montreal_forced_aligner/acoustic_modeling/triphone.py +++ b/montreal_forced_aligner/acoustic_modeling/triphone.py @@ -3,7 +3,7 @@ import os import subprocess -from typing import TYPE_CHECKING, NamedTuple +from typing import TYPE_CHECKING, Dict, List, NamedTuple from montreal_forced_aligner.acoustic_modeling.base import AcousticModelTrainingMixin from montreal_forced_aligner.utils import parse_logs, run_mp, run_non_mp, thirdparty_binary @@ -19,34 +19,34 @@ class TreeStatsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.acoustic_modeling.triphone.tree_stats_func`""" log_path: str - dictionaries: list[str] + dictionaries: List[str] ci_phones: str model_path: str - feature_strings: dict[str, str] - ali_paths: dict[str, str] - treeacc_paths: dict[str, str] + feature_strings: Dict[str, str] + ali_paths: Dict[str, str] + treeacc_paths: Dict[str, str] class ConvertAlignmentsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.acoustic_modeling.triphone.convert_alignments_func`""" log_path: str - dictionaries: list[str] + dictionaries: List[str] model_path: str tree_path: str align_model_path: str - ali_paths: dict[str, str] - new_ali_paths: dict[str, str] + ali_paths: Dict[str, str] + new_ali_paths: Dict[str, str] def convert_alignments_func( log_path: str, - dictionaries: list[str], + dictionaries: List[str], model_path: str, tree_path: str, align_model_path: str, - ali_paths: dict[str, str], - new_ali_paths: dict[str, str], + ali_paths: Dict[str, str], + new_ali_paths: Dict[str, str], ) -> None: """ Multiprocessing function for converting alignments from a previous trainer @@ -96,12 +96,12 @@ def convert_alignments_func( def tree_stats_func( log_path: str, - dictionaries: list[str], + dictionaries: List[str], ci_phones: str, model_path: str, - feature_strings: dict[str, str], - ali_paths: dict[str, str], - treeacc_paths: dict[str, str], + feature_strings: Dict[str, str], + ali_paths: Dict[str, str], + treeacc_paths: Dict[str, str], ) -> None: """ Multiprocessing function for calculating tree stats for training @@ -189,7 +189,7 @@ def __init__( self.max_gaussians = max_gaussians self.cluster_threshold = cluster_threshold - def tree_stats_arguments(self) -> list[TreeStatsArguments]: + def tree_stats_arguments(self) -> List[TreeStatsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.triphone.tree_stats_func` @@ -205,7 +205,7 @@ def tree_stats_arguments(self) -> list[TreeStatsArguments]: TreeStatsArguments( os.path.join(self.working_log_directory, f"acc_tree.{j.name}.log"), j.current_dictionary_names, - self.worker.silence_csl, + self.worker.context_independent_csl, alignment_model_path, feat_strings[j.name], j.construct_path_dictionary(self.previous_aligner.working_directory, "ali", "ark"), @@ -214,7 +214,7 @@ def tree_stats_arguments(self) -> list[TreeStatsArguments]: for j in self.jobs ] - def convert_alignments_arguments(self) -> list[ConvertAlignmentsArguments]: + def convert_alignments_arguments(self) -> List[ConvertAlignmentsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.acoustic_modeling.triphone.convert_alignments_func` diff --git a/montreal_forced_aligner/alignment/adapting.py b/montreal_forced_aligner/alignment/adapting.py index 6f9fa26a..3b0a0bda 100644 --- a/montreal_forced_aligner/alignment/adapting.py +++ b/montreal_forced_aligner/alignment/adapting.py @@ -5,7 +5,7 @@ import shutil import subprocess import time -from typing import TYPE_CHECKING, NamedTuple +from typing import TYPE_CHECKING, Dict, List, NamedTuple from montreal_forced_aligner.abc import AdapterMixin from montreal_forced_aligner.alignment.pretrained import PretrainedAligner @@ -24,20 +24,20 @@ class MapAccStatsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.alignment.adapting.map_acc_stats_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] model_path: str - ali_paths: dict[str, str] - acc_paths: dict[str, str] + ali_paths: Dict[str, str] + acc_paths: Dict[str, str] def map_acc_stats_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], model_path: str, - ali_paths: dict[str, str], - acc_paths: dict[str, str], + ali_paths: Dict[str, str], + acc_paths: Dict[str, str], ) -> None: """ Multiprocessing function for accumulating mapped stats for adapting acoustic models to new @@ -116,7 +116,7 @@ def __init__(self, mapping_tau: int = 20, **kwargs): self.initialized = False self.adaptation_done = False - def map_acc_stats_arguments(self, alignment=False) -> list[MapAccStatsArguments]: + def map_acc_stats_arguments(self, alignment=False) -> List[MapAccStatsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.alignment.adapting.map_acc_stats_func` diff --git a/montreal_forced_aligner/alignment/base.py b/montreal_forced_aligner/alignment/base.py index 55b0b6e1..beae3959 100644 --- a/montreal_forced_aligner/alignment/base.py +++ b/montreal_forced_aligner/alignment/base.py @@ -7,7 +7,7 @@ import sys import time import traceback -from typing import Optional +from typing import List, Optional from montreal_forced_aligner.abc import FileExporterMixin from montreal_forced_aligner.alignment.mixins import AlignMixin @@ -58,7 +58,7 @@ class CorpusAligner(AcousticCorpusPronunciationMixin, AlignMixin, FileExporterMi def __init__(self, **kwargs): super().__init__(**kwargs) - def cleanup_word_ctm_arguments(self) -> list[CleanupWordCtmArguments]: + def cleanup_word_ctm_arguments(self) -> List[CleanupWordCtmArguments]: """ Generate Job arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.CleanupWordCtmProcessWorker` @@ -69,6 +69,7 @@ def cleanup_word_ctm_arguments(self) -> list[CleanupWordCtmArguments]: """ return [ CleanupWordCtmArguments( + os.path.join(self.working_log_directory, f"parse_word_ctm.{j.name}.log"), j.construct_path_dictionary(self.working_directory, "word", "ctm"), j.current_dictionary_names, j.job_utts(), @@ -77,7 +78,7 @@ def cleanup_word_ctm_arguments(self) -> list[CleanupWordCtmArguments]: for j in self.jobs ] - def no_cleanup_word_ctm_arguments(self) -> list[NoCleanupWordCtmArguments]: + def no_cleanup_word_ctm_arguments(self) -> List[NoCleanupWordCtmArguments]: """ Generate Job arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.NoCleanupWordCtmProcessWorker` @@ -88,6 +89,7 @@ def no_cleanup_word_ctm_arguments(self) -> list[NoCleanupWordCtmArguments]: """ return [ NoCleanupWordCtmArguments( + os.path.join(self.working_log_directory, f"parse_word_ctm.{j.name}.log"), j.construct_path_dictionary(self.working_directory, "word", "ctm"), j.current_dictionary_names, j.job_utts(), @@ -96,7 +98,7 @@ def no_cleanup_word_ctm_arguments(self) -> list[NoCleanupWordCtmArguments]: for j in self.jobs ] - def phone_ctm_arguments(self) -> list[PhoneCtmArguments]: + def phone_ctm_arguments(self) -> List[PhoneCtmArguments]: """ Generate Job arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.PhoneCtmProcessWorker` @@ -107,6 +109,7 @@ def phone_ctm_arguments(self) -> list[PhoneCtmArguments]: """ return [ PhoneCtmArguments( + os.path.join(self.working_log_directory, f"parse_phone_ctm.{j.name}.log"), j.construct_path_dictionary(self.working_directory, "phone", "ctm"), j.current_dictionary_names, j.job_utts(), @@ -116,7 +119,7 @@ def phone_ctm_arguments(self) -> list[PhoneCtmArguments]: for j in self.jobs ] - def combine_ctm_arguments(self) -> list[CombineCtmArguments]: + def combine_ctm_arguments(self) -> List[CombineCtmArguments]: """ Generate Job arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.CombineProcessWorker` @@ -127,6 +130,7 @@ def combine_ctm_arguments(self) -> list[CombineCtmArguments]: """ return [ CombineCtmArguments( + os.path.join(self.working_log_directory, f"combine_ctms.{j.name}.log"), j.current_dictionary_names, j.job_files(), j.job_speakers(), @@ -136,7 +140,7 @@ def combine_ctm_arguments(self) -> list[CombineCtmArguments]: for j in self.jobs ] - def export_textgrid_arguments(self) -> list[ExportTextGridArguments]: + def export_textgrid_arguments(self) -> List[ExportTextGridArguments]: """ Generate Job arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.ExportTextGridProcessWorker` @@ -147,12 +151,13 @@ def export_textgrid_arguments(self) -> list[ExportTextGridArguments]: """ return [ ExportTextGridArguments( + os.path.join(self.working_log_directory, f"export_textgrids.{j.name}.log"), self.files, self.frame_shift, self.textgrid_output, self.backup_output_directory, ) - for _ in self.jobs + for j in self.jobs ] @property @@ -448,8 +453,9 @@ def process_current_phone_labels(): self.logger.debug(f"Generating TextGrids for job {j.name}...") processed_files = set() - for file in j.job_files().values(): + for file in j.job_files(): first_file_write = True + file.aligned = True if file.name in processed_files: first_file_write = False try: @@ -488,7 +494,7 @@ def export_files(self, output_directory: str) -> None: self.convert_ali_to_textgrids() self.logger.debug(f"Exported TextGrids in a total of {time.time() - begin} seconds") - def ali_to_word_ctm_arguments(self) -> list[AliToCtmArguments]: + def ali_to_word_ctm_arguments(self) -> List[AliToCtmArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.ali_to_ctm_func` @@ -512,7 +518,7 @@ def ali_to_word_ctm_arguments(self) -> list[AliToCtmArguments]: for j in self.jobs ] - def ali_to_phone_ctm_arguments(self) -> list[AliToCtmArguments]: + def ali_to_phone_ctm_arguments(self) -> List[AliToCtmArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.ali_to_ctm_func` diff --git a/montreal_forced_aligner/alignment/mixins.py b/montreal_forced_aligner/alignment/mixins.py index 758555bd..eee029fd 100644 --- a/montreal_forced_aligner/alignment/mixins.py +++ b/montreal_forced_aligner/alignment/mixins.py @@ -5,7 +5,7 @@ import os import time from abc import abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Dict, List from montreal_forced_aligner.alignment.multiprocessing import ( AlignArguments, @@ -60,7 +60,7 @@ class AlignMixin(DictionaryMixin): """ logger: logging.Logger - jobs: list[Job] + jobs: List[Job] use_mp: bool def __init__( @@ -95,11 +95,11 @@ def data_directory(self): ... @abstractmethod - def construct_feature_proc_strings(self) -> list[dict[str, str]]: + def construct_feature_proc_strings(self) -> List[Dict[str, str]]: """Generate feature strings""" ... - def compile_train_graphs_arguments(self) -> list[CompileTrainGraphsArguments]: + def compile_train_graphs_arguments(self) -> List[CompileTrainGraphsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.compile_train_graphs_func` @@ -131,7 +131,7 @@ def compile_train_graphs_arguments(self) -> list[CompileTrainGraphsArguments]: ) return args - def align_arguments(self) -> list[AlignArguments]: + def align_arguments(self) -> List[AlignArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.align_func` @@ -163,7 +163,7 @@ def align_arguments(self) -> list[AlignArguments]: ) return args - def compile_information_arguments(self) -> list[CompileInformationArguments]: + def compile_information_arguments(self) -> List[CompileInformationArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.compile_information_func` diff --git a/montreal_forced_aligner/alignment/multiprocessing.py b/montreal_forced_aligner/alignment/multiprocessing.py index 78932935..a1b7e44a 100644 --- a/montreal_forced_aligner/alignment/multiprocessing.py +++ b/montreal_forced_aligner/alignment/multiprocessing.py @@ -12,7 +12,7 @@ import sys import traceback from queue import Empty -from typing import TYPE_CHECKING, NamedTuple, Union +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Union from montreal_forced_aligner.textgrid import ( CtmInterval, @@ -27,7 +27,13 @@ if TYPE_CHECKING: from montreal_forced_aligner.abc import CtmErrorDict, MetaDict, ReversedMappingType - from montreal_forced_aligner.corpus.classes import File, Speaker, Utterance + from montreal_forced_aligner.corpus.classes import ( + File, + FileCollection, + SpeakerCollection, + Utterance, + UtteranceCollection, + ) from montreal_forced_aligner.dictionary import DictionaryData @@ -51,58 +57,63 @@ class AliToCtmArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.ali_to_ctm_func`""" log_path: str - dictionaries: list[str] - ali_paths: dict[str, str] - text_int_paths: dict[str, str] - word_boundary_int_paths: dict[str, str] + dictionaries: List[str] + ali_paths: Dict[str, str] + text_int_paths: Dict[str, str] + word_boundary_int_paths: Dict[str, str] frame_shift: float model_path: str - ctm_paths: dict[str, str] + ctm_paths: Dict[str, str] word_mode: bool class CleanupWordCtmArguments(NamedTuple): """Arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.CleanupWordCtmProcessWorker`""" - ctm_paths: dict[str, str] - dictionaries: list[str] - utterances: dict[str, dict[str, Utterance]] - dictionary_data: dict[str, DictionaryData] + log_path: str + ctm_paths: Dict[str, str] + dictionaries: List[str] + utterances: Dict[str, UtteranceCollection] + dictionary_data: Dict[str, DictionaryData] class NoCleanupWordCtmArguments(NamedTuple): """Arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.NoCleanupWordCtmProcessWorker`""" - ctm_paths: dict[str, str] - dictionaries: list[str] - utterances: dict[str, dict[str, Utterance]] - dictionary_data: dict[str, DictionaryData] + log_path: str + ctm_paths: Dict[str, str] + dictionaries: List[str] + utterances: Dict[str, UtteranceCollection] + dictionary_data: Dict[str, DictionaryData] class PhoneCtmArguments(NamedTuple): """Arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.PhoneCtmProcessWorker`""" - ctm_paths: dict[str, str] - dictionaries: list[str] - utterances: dict[str, dict[str, Utterance]] - reversed_phone_mappings: dict[str, ReversedMappingType] - positions: dict[str, list[str]] + log_path: str + ctm_paths: Dict[str, str] + dictionaries: List[str] + utterances: Dict[str, UtteranceCollection] + reversed_phone_mappings: Dict[str, ReversedMappingType] + positions: Dict[str, List[str]] class CombineCtmArguments(NamedTuple): """Arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.CombineProcessWorker`""" - dictionaries: list[str] - files: dict[str, File] - speakers: dict[str, Speaker] - dictionary_data: dict[str, DictionaryData] + log_path: str + dictionaries: List[str] + files: FileCollection + speakers: SpeakerCollection + dictionary_data: Dict[str, DictionaryData] cleanup_textgrids: bool class ExportTextGridArguments(NamedTuple): """Arguments for :class:`~montreal_forced_aligner.alignment.multiprocessing.ExportTextGridProcessWorker`""" - files: dict[str, File] + log_path: str + files: Dict[str, File] frame_shift: int output_directory: str backup_output_directory: str @@ -118,36 +129,36 @@ class CompileTrainGraphsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.compile_train_graphs_func`""" log_path: str - dictionaries: list[str] + dictionaries: List[str] tree_path: str model_path: str - text_int_paths: dict[str, str] - disambig_paths: dict[str, str] - lexicon_fst_paths: dict[str, str] - fst_scp_paths: dict[str, str] + text_int_paths: Dict[str, str] + disambig_paths: Dict[str, str] + lexicon_fst_paths: Dict[str, str] + fst_scp_paths: Dict[str, str] class AlignArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.alignment.multiprocessing.align_func`""" log_path: str - dictionaries: list[str] - fst_scp_paths: dict[str, str] - feature_strings: dict[str, str] + dictionaries: List[str] + fst_scp_paths: Dict[str, str] + feature_strings: Dict[str, str] model_path: str - ali_paths: dict[str, str] + ali_paths: Dict[str, str] align_options: MetaDict def compile_train_graphs_func( log_path: str, - dictionaries: list[str], + dictionaries: List[str], tree_path: str, model_path: str, - text_int_paths: dict[str, str], + text_int_paths: Dict[str, str], disambig_path: str, - lexicon_fst_paths: dict[str, str], - fst_scp_paths: dict[str, str], + lexicon_fst_paths: Dict[str, str], + fst_scp_paths: Dict[str, str], ) -> None: """ Multiprocessing function to compile training graphs @@ -205,11 +216,11 @@ def compile_train_graphs_func( def align_func( log_path: str, - dictionaries: list[str], - fst_scp_paths: dict[str, str], - feature_strings: dict[str, str], + dictionaries: List[str], + fst_scp_paths: Dict[str, str], + feature_strings: Dict[str, str], model_path: str, - ali_paths: dict[str, str], + ali_paths: Dict[str, str], align_options: MetaDict, ): """ @@ -280,7 +291,7 @@ def align_func( align_proc.communicate() -def compile_information_func(align_log_path: str) -> dict[str, Union[list[str], float, int]]: +def compile_information_func(align_log_path: str) -> Dict[str, Union[List[str], float, int]]: """ Multiprocessing function for compiling information about alignment @@ -334,13 +345,13 @@ def compile_information_func(align_log_path: str) -> dict[str, Union[list[str], def ali_to_ctm_func( log_path: str, - dictionaries: list[str], - ali_paths: dict[str, str], - text_int_paths: dict[str, str], - word_boundary_int_paths: dict[str, str], + dictionaries: List[str], + ali_paths: Dict[str, str], + text_int_paths: Dict[str, str], + word_boundary_int_paths: Dict[str, str], frame_shift: float, model_path: str, - ctm_paths: dict[str, str], + ctm_paths: Dict[str, str], word_mode: bool, ) -> None: """ @@ -493,6 +504,7 @@ def __init__( self.stopped = stopped self.error_catching = error_catching + self.log_path = arguments.log_path # Corpus information self.utterances = arguments.utterances @@ -503,58 +515,70 @@ def run(self) -> None: """ Run the word processing with no clean up """ - current_file_data = {} + with open(self.log_path, "w", encoding="utf8") as log_file: + current_file_data = {} - def process_current(cur_utt: Utterance, current_labels: list[CtmInterval]): - """Process current stack of intervals""" - actual_labels = parse_from_word_no_cleanup( - current_labels, self.dictionary_data[dict_name].reversed_words_mapping - ) - current_file_data[cur_utt.name] = actual_labels + def process_current(cur_utt: Utterance, current_labels: List[CtmInterval]): + """Process current stack of intervals""" + actual_labels = parse_from_word_no_cleanup( + current_labels, self.dictionary_data[dict_name].reversed_words_mapping + ) + current_file_data[cur_utt.name] = actual_labels + log_file.write( + f"Parsed actual word labels ({len(actual_labels)}) for {cur_utt} (was {len(current_labels)})\n" + ) - def process_current_file(cur_file: str): - """Process current file and add to return queue""" - self.to_process_queue.put(("word", cur_file, current_file_data)) + def process_current_file(cur_file: str): + """Process current file and add to return queue""" + self.to_process_queue.put(("word", cur_file, current_file_data)) + log_file.write(f"Added word records for {cur_file} to queue\n") - cur_utt = None - cur_file = None - utt_begin = 0 - current_labels = [] - try: - for dict_name in self.dictionaries: - with open(self.ctm_paths[dict_name], "r") as word_file: - for line in word_file: - line = line.strip() - if not line: - continue - interval = process_ctm_line(line) - utt = interval.utterance - if cur_utt is None: - cur_utt = self.utterances[dict_name][utt] - utt_begin = cur_utt.begin - cur_file = cur_utt.file_name - - if utt != cur_utt: - process_current(cur_utt, current_labels) - cur_utt = self.utterances[dict_name][utt] - file_name = cur_utt.file_name - if file_name != cur_file: - process_current_file(cur_file) - current_file_data = {} - cur_file = file_name - current_labels = [] - if utt_begin: - interval.shift_times(utt_begin) - current_labels.append(interval) - if current_labels: - process_current(cur_utt, current_labels) - process_current_file(cur_file) - except Exception: - self.stopped.stop() - exc_type, exc_value, exc_traceback = sys.exc_info() - self.error_catching[("word", self.job_name)] = "\n".join( - traceback.format_exception(exc_type, exc_value, exc_traceback) - ) + cur_utt = None + cur_file = "" + utt_begin = 0 + current_labels = [] + try: + for dict_name in self.dictionaries: + ctm_path = self.ctm_paths[dict_name] + log_file.write(f"Processing dictionary {dict_name}: {ctm_path}\n") + with open(ctm_path, "r") as word_file: + for line in word_file: + line = line.strip() + if not line: + continue + interval = process_ctm_line(line) + utt = interval.utterance + if cur_utt is None: + cur_utt = self.utterances[dict_name][utt] + utt_begin = cur_utt.begin + cur_file = cur_utt.file_name + log_file.write( + f"Current utt: {cur_utt}, current file: {cur_file}\n" + ) + + if utt != cur_utt: + process_current(cur_utt, current_labels) + cur_utt = self.utterances[dict_name][utt] + file_name = cur_utt.file_name + log_file.write(f"Processing utterance labels: {cur_utt}\n") + if file_name != cur_file: + log_file.write(f"Processing file: {cur_file}\n") + process_current_file(cur_file) + current_file_data = {} + cur_file = file_name + current_labels = [] + if utt_begin: + interval.shift_times(utt_begin) + current_labels.append(interval) + if current_labels: + process_current(cur_utt, current_labels) + process_current_file(cur_file) + except Exception: + self.stopped.stop() + exc_type, exc_value, exc_traceback = sys.exc_info() + self.error_catching[("word", self.job_name)] = "\n".join( + traceback.format_exception(exc_type, exc_value, exc_traceback) + ) class CleanupWordCtmProcessWorker(mp.Process): @@ -596,6 +620,7 @@ def __init__( self.stopped = stopped self.error_catching = error_catching + self.log_path = arguments.log_path # Corpus information self.utterances = arguments.utterances @@ -606,60 +631,73 @@ def run(self) -> None: """ Run the word processing with clean up """ - current_file_data = {} - - def process_current(cur_utt: Utterance, current_labels: list[CtmInterval]) -> None: - """Process current stack of intervals""" - text = cur_utt.text.split() - actual_labels = parse_from_word(current_labels, text, self.dictionary_data[dict_name]) + with open(self.log_path, "w", encoding="utf8") as log_file: + current_file_data = {} + + def process_current(cur_utt: Utterance, current_labels: List[CtmInterval]) -> None: + """Process current stack of intervals""" + text = cur_utt.text.split() + actual_labels = parse_from_word( + current_labels, text, self.dictionary_data[dict_name] + ) - current_file_data[cur_utt.name] = actual_labels + current_file_data[cur_utt.name] = actual_labels + log_file.write( + f"Parsed actual word labels ({len(actual_labels)} for {cur_utt} (was {len(current_labels)})\n" + ) - def process_current_file(cur_file: str) -> None: - """Process current file and add to return queue""" - self.to_process_queue.put(("word", cur_file, current_file_data)) + def process_current_file(cur_file: str) -> None: + """Process current file and add to return queue""" + self.to_process_queue.put(("word", cur_file, current_file_data)) + log_file.write(f"Added word records for {cur_file} to queue\n") - cur_utt = None - cur_file = None - utt_begin = 0 - current_labels = [] - try: - for dict_name in self.dictionaries: - ctm_path = self.ctm_paths[dict_name] - with open(ctm_path, "r") as word_file: - for line in word_file: - line = line.strip() - if not line: - continue - interval = process_ctm_line(line) - utt = interval.utterance - if cur_utt is None: - cur_utt = self.utterances[dict_name][utt] - utt_begin = cur_utt.begin - cur_file = cur_utt.file_name - - if utt != cur_utt: - process_current(cur_utt, current_labels) - cur_utt = self.utterances[dict_name][utt] - utt_begin = cur_utt.begin - file_name = cur_utt.file_name - if file_name != cur_file: - process_current_file(cur_file) - current_file_data = {} - cur_file = file_name - current_labels = [] - if utt_begin: - interval.shift_times(utt_begin) - current_labels.append(interval) - if current_labels: - process_current(cur_utt, current_labels) - process_current_file(cur_file) - except Exception: - self.stopped.stop() - exc_type, exc_value, exc_traceback = sys.exc_info() - self.error_catching[("word", self.job_name)] = "\n".join( - traceback.format_exception(exc_type, exc_value, exc_traceback) - ) + cur_utt = None + cur_file = "" + utt_begin = 0 + current_labels = [] + try: + for dict_name in self.dictionaries: + ctm_path = self.ctm_paths[dict_name] + log_file.write(f"Processing dictionary {dict_name}: {ctm_path}\n") + with open(ctm_path, "r") as word_file: + for line in word_file: + line = line.strip() + if not line: + continue + interval = process_ctm_line(line) + utt = interval.utterance + if cur_utt is None: + cur_utt = self.utterances[dict_name][utt] + utt_begin = cur_utt.begin + cur_file = cur_utt.file_name + log_file.write( + f"Current utt: {cur_utt}, current file: {cur_file}\n" + ) + + if utt != cur_utt: + log_file.write(f"Processing utterance labels: {cur_utt}\n") + process_current(cur_utt, current_labels) + cur_utt = self.utterances[dict_name][utt] + utt_begin = cur_utt.begin + file_name = cur_utt.file_name + if file_name != cur_file: + log_file.write(f"Processing file: {cur_file}\n") + process_current_file(cur_file) + current_file_data = {} + cur_file = file_name + current_labels = [] + if utt_begin: + interval.shift_times(utt_begin) + current_labels.append(interval) + if current_labels: + process_current(cur_utt, current_labels) + process_current_file(cur_file) + except Exception: + self.stopped.stop() + exc_type, exc_value, exc_traceback = sys.exc_info() + self.error_catching[("word", self.job_name)] = "\n".join( + traceback.format_exception(exc_type, exc_value, exc_traceback) + ) class PhoneCtmProcessWorker(mp.Process): @@ -701,72 +739,84 @@ def __init__( self.stopped = stopped self.error_catching = error_catching + self.log_path = arguments.log_path self.utterances = arguments.utterances - self.reversed_phone_mappings = arguments.reversed_phone_mappings self.positions = arguments.positions def run(self) -> None: """Run the phone processing""" cur_utt = None - cur_file = None + cur_file = "" utt_begin = 0 - current_labels = [] + with open(self.log_path, "w", encoding="utf8") as log_file: + current_labels = [] - current_file_data = {} + current_file_data = {} - def process_current_utt(cur_utt: Utterance, current_labels: list[CtmInterval]) -> None: - """Process current stack of intervals""" - actual_labels = parse_from_phone( - current_labels, self.reversed_phone_mappings[dict_name], self.positions[dict_name] - ) - current_file_data[cur_utt.name] = actual_labels + def process_current_utt(cur_utt: Utterance, current_labels: List[CtmInterval]) -> None: + """Process current stack of intervals""" + actual_labels = parse_from_phone( + current_labels, + self.reversed_phone_mappings[dict_name], + self.positions[dict_name], + ) + current_file_data[cur_utt.name] = actual_labels + log_file.write(f"Parsed actual phone labels ({len(actual_labels)} for {cur_utt}\n") - def process_current_file(cur_file: str) -> None: - """Process current file and add to return queue""" - self.to_process_queue.put(("phone", cur_file, current_file_data)) + def process_current_file(cur_file: str) -> None: + """Process current file and add to return queue""" + self.to_process_queue.put(("phone", cur_file, current_file_data)) + log_file.write(f"Added phone records for {cur_file} to queue\n") - try: - for dict_name in self.dictionaries: - with open(self.ctm_paths[dict_name], "r") as word_file: - for line in word_file: - line = line.strip() - if not line: - continue - interval = process_ctm_line(line) - utt = interval.utterance - if cur_utt is None: - cur_utt = self.utterances[dict_name][utt] - cur_file = cur_utt.file_name - utt_begin = cur_utt.begin - - if utt != cur_utt: - - process_current_utt(cur_utt, current_labels) - - cur_utt = self.utterances[dict_name][utt] - file_name = cur_utt.file_name - utt_begin = cur_utt.begin - - if file_name != cur_file: - process_current_file(cur_file) - current_file_data = {} - cur_file = file_name - current_labels = [] - if utt_begin: - interval.shift_times(utt_begin) - current_labels.append(interval) - if current_labels: - process_current_utt(cur_utt, current_labels) - process_current_file(cur_file) - except Exception: - self.stopped.stop() - exc_type, exc_value, exc_traceback = sys.exc_info() - self.error_catching[("phone", self.job_name)] = ( - "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback)) - + f"\n\n{len(self.utterances['english'].keys())}\nCould not find: {utt}\n" - + "\n".join(self.utterances["english"].keys()) - ) + try: + for dict_name in self.dictionaries: + ctm_path = self.ctm_paths[dict_name] + log_file.write(f"Processing dictionary {dict_name}: {ctm_path}\n") + with open(ctm_path, "r") as word_file: + for line in word_file: + line = line.strip() + if not line: + continue + interval = process_ctm_line(line) + utt = interval.utterance + if cur_utt is None: + cur_utt = self.utterances[dict_name][utt] + cur_file = cur_utt.file_name + utt_begin = cur_utt.begin + log_file.write( + f"Current utt: {cur_utt}, current file: {cur_file}\n" + ) + + if utt != cur_utt: + + log_file.write(f"Processing utterance labels: {cur_utt}\n") + process_current_utt(cur_utt, current_labels) + + cur_utt = self.utterances[dict_name][utt] + file_name = cur_utt.file_name + utt_begin = cur_utt.begin + + if file_name != cur_file: + log_file.write(f"Processing file: {cur_file}\n") + process_current_file(cur_file) + current_file_data = {} + cur_file = file_name + current_labels = [] + if utt_begin: + interval.shift_times(utt_begin) + current_labels.append(interval) + if current_labels: + process_current_utt(cur_utt, current_labels) + process_current_file(cur_file) + except Exception: + self.stopped.stop() + exc_type, exc_value, exc_traceback = sys.exc_info() + self.error_catching[("phone", self.job_name)] = ( + "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback)) + + f"\n\n{len(self.utterances['english'])}\nCould not find: {utt}\n" + + "\n".join(self.utterances["english"]) + ) class CombineProcessWorker(mp.Process): @@ -814,12 +864,12 @@ def __init__( self.finished_combining = finished_combining self.error_catching = error_catching + self.log_path = arguments.log_path self.files = arguments.files self.speakers = arguments.speakers self.dictionary_data = arguments.dictionary_data self.cleanup_textgrids = arguments.cleanup_textgrids - - for file in self.files.values(): + for file in self.files: for s in file.speaker_ordering: if s.name not in self.speakers: continue @@ -827,60 +877,74 @@ def __init__( def run(self) -> None: """Run the combination function""" - phone_data = {} word_data = {} - while True: - try: - w_p, file_name, data = self.to_process_queue.get(timeout=queue_polling_timeout) - except Empty: - if self.finished_combining.stop_check(): - break - continue - self.to_process_queue.task_done() - if self.stopped.stop_check(): - continue - if w_p == "phone": - if file_name in word_data: - word_ctm = word_data.pop(file_name) - phone_ctm = data - else: - phone_data[file_name] = data + count = 0 + with open(self.log_path, "w", encoding="utf8") as log_file: + while True: + try: + w_p, file_name, data = self.to_process_queue.get(timeout=queue_polling_timeout) + except Empty: + if self.finished_combining.stop_check(): + break continue - else: - if file_name in phone_data: - phone_ctm = phone_data.pop(file_name) - word_ctm = data - else: - word_data[file_name] = data + log_file.write(f"Got {file_name}, {w_p}\n") + self.to_process_queue.task_done() + if self.stopped.stop_check(): + log_file.write("Got stop check, exiting\n") continue - try: - file = self.files[file_name] - for u_name, u in file.utterances.items(): - if u_name not in word_ctm: + if w_p == "phone": + if file_name in word_data: + word_ctm = word_data.pop(file_name) + phone_ctm = data + else: + log_file.write(f"No word data yet for {file_name}, shelving\n") + phone_data[file_name] = data continue - u.speaker.dictionary_data = self.dictionary_data[ - self.speakers[u.speaker_name].dictionary_name - ] - u.word_labels = word_ctm[u_name] - u.phone_labels = phone_ctm[u_name] - processed_check = True - for s in file.speaker_ordering: - if s.name not in self.speakers: + else: + if file_name in phone_data: + phone_ctm = phone_data.pop(file_name) + word_ctm = data + else: + log_file.write(f"No phone data yet for {file_name}, shelving\n") + word_data[file_name] = data continue - if not file.has_fully_aligned_speaker(s): - processed_check = False - break - if not processed_check: - continue - data = generate_tiers(file, cleanup_textgrids=self.cleanup_textgrids) - self.to_export_queue.put((file_name, data)) - except Exception: - self.stopped.stop() - exc_type, exc_value, exc_traceback = sys.exc_info() - self.error_catching[("combining", self.job_name)] = "\n".join( - traceback.format_exception(exc_type, exc_value, exc_traceback) - ) + try: + file = self.files[file_name] + log_file.write(f"Generating tiers for {file}\n") + for utterance in file.utterances: + if utterance.name not in word_ctm: + log_file.write(f"{utterance.name} not in word_ctm, skipping over\n") + continue + utterance.speaker.dictionary_data = self.dictionary_data[ + self.speakers[utterance.speaker_name].dictionary_name + ] + utterance.word_labels = word_ctm[utterance.name] + utterance.phone_labels = phone_ctm[utterance.name] + processed_check = True + for s in file.speaker_ordering: + if s.name not in self.speakers: + continue + if not file.has_fully_aligned_speaker(s): + + log_file.write( + f"{file} is not fully aligned for speaker {s}, shelving\n" + ) + processed_check = False + break + if not processed_check: + continue + log_file.write(f"Generating tiers for file {count} of {len(self.files)}\n") + count += 1 + data = generate_tiers(file, cleanup_textgrids=self.cleanup_textgrids) + self.to_export_queue.put((file_name, data)) + log_file.write(f"{file_name} put in export queue\n") + except Exception: + self.stopped.stop() + exc_type, exc_value, exc_traceback = sys.exc_info() + self.error_catching[("combining", self.job_name)] = "\n".join( + traceback.format_exception(exc_type, exc_value, exc_traceback) + ) class ExportTextGridProcessWorker(mp.Process): @@ -911,7 +975,7 @@ def __init__( for_write_queue: mp.Queue, stopped: Stopped, finished_processing: Stopped, - textgrid_errors: dict[str, str], + textgrid_errors: Dict[str, str], arguments: ExportTextGridArguments, ): mp.Process.__init__(self) @@ -920,6 +984,7 @@ def __init__( self.finished_processing = finished_processing self.textgrid_errors = textgrid_errors + self.log_path = arguments.log_path self.files = arguments.files self.output_directory = arguments.output_directory self.backup_output_directory = arguments.backup_output_directory @@ -928,29 +993,34 @@ def __init__( def run(self) -> None: """Run the exporter function""" - while True: - try: - file_name, data = self.for_write_queue.get(timeout=queue_polling_timeout) - except Empty: - if self.finished_processing.stop_check(): - break - continue - self.for_write_queue.task_done() - if self.stopped.stop_check(): - continue - try: - overwrite = True - file = self.files[file_name] - output_path = file.construct_output_path( - self.output_directory, self.backup_output_directory - ) - - export_textgrid(file, output_path, data, self.frame_shift, overwrite) - except Exception: - exc_type, exc_value, exc_traceback = sys.exc_info() - self.textgrid_errors[file_name] = "\n".join( - traceback.format_exception(exc_type, exc_value, exc_traceback) - ) + count = 0 + with open(self.log_path, "w", encoding="utf8") as log_file: + while True: + try: + file_name, data = self.for_write_queue.get(timeout=queue_polling_timeout) + except Empty: + if self.finished_processing.stop_check(): + break + continue + log_file.write(f"Got {file_name}\n") + self.for_write_queue.task_done() + if self.stopped.stop_check(): + log_file.write("Got stop check, exiting\n") + continue + try: + overwrite = True + file = self.files[file_name] + output_path = file.construct_output_path( + self.output_directory, self.backup_output_directory + ) + log_file.write(f"Exporting file {count} of {len(self.files)}\n") + count += 1 + export_textgrid(file, output_path, data, self.frame_shift, overwrite) + except Exception: + exc_type, exc_value, exc_traceback = sys.exc_info() + self.textgrid_errors[file_name] = "\n".join( + traceback.format_exception(exc_type, exc_value, exc_traceback) + ) class ExportPreparationProcessWorker(mp.Process): @@ -982,7 +1052,7 @@ def __init__( for_write_queue: mp.Queue, stopped: Stopped, finished_combining: Stopped, - files: dict[str, File], + files: Dict[str, File], ): mp.Process.__init__(self) self.to_export_queue = to_export_queue diff --git a/montreal_forced_aligner/alignment/pretrained.py b/montreal_forced_aligner/alignment/pretrained.py index 813fcf18..3cd4a886 100644 --- a/montreal_forced_aligner/alignment/pretrained.py +++ b/montreal_forced_aligner/alignment/pretrained.py @@ -5,7 +5,7 @@ import subprocess import time from collections import Counter, defaultdict -from typing import TYPE_CHECKING, NamedTuple, Optional +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional import yaml @@ -26,12 +26,12 @@ def generate_pronunciations_func( log_path: str, - dictionaries: list[str], - text_int_paths: dict[str, str], - word_boundary_paths: dict[str, str], - ali_paths: dict[str, str], + dictionaries: List[str], + text_int_paths: Dict[str, str], + word_boundary_paths: Dict[str, str], + ali_paths: Dict[str, str], model_path: str, - pron_paths: dict[str, str], + pron_paths: Dict[str, str], ): """ Multiprocessing function for generating pronunciations @@ -109,12 +109,12 @@ class GeneratePronunciationsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.alignment.pretrained.generate_pronunciations_func`""" log_path: str - dictionaries: list[str] - text_int_paths: dict[str, str] - word_boundary_paths: dict[str, str] - ali_paths: dict[str, str] + dictionaries: List[str] + text_int_paths: Dict[str, str] + word_boundary_paths: Dict[str, str] + ali_paths: Dict[str, str] model_path: str - pron_paths: dict[str, str] + pron_paths: Dict[str, str] class PretrainedAligner(CorpusAligner, TopLevelMfaWorker): @@ -142,6 +142,11 @@ def __init__( self.acoustic_model = AcousticModel(acoustic_model_path) kwargs.update(self.acoustic_model.parameters) super().__init__(**kwargs) + self.phone_set_type = self.acoustic_model.meta["phone_set_type"] + self.base_phone_regex = self.acoustic_model.meta["base_phone_regex"] + for d in self.dictionary_mapping.values(): + d.phone_set_type = self.phone_set_type + d.base_phone_regex = self.base_phone_regex @property def working_directory(self) -> str: @@ -179,7 +184,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ Parse parameters from a config path or command-line arguments @@ -302,7 +307,7 @@ def __init__( def generate_pronunciations_arguments( self, - ) -> list[GeneratePronunciationsArguments]: + ) -> List[GeneratePronunciationsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.alignment.pretrained.generate_pronunciations_func` @@ -402,7 +407,7 @@ def export_lexicons(self, output_directory: str) -> None: nonsil_after_counts[w] += 1 dictionary.pronunciation_probabilities = True - for word, prons in dictionary.words.items(): + for word, prons in dictionary.actual_words.items(): if word not in counts: for p in prons: p["probability"] = 1 diff --git a/montreal_forced_aligner/command_line/adapt.py b/montreal_forced_aligner/command_line/adapt.py index befdcb2d..6d96db95 100644 --- a/montreal_forced_aligner/command_line/adapt.py +++ b/montreal_forced_aligner/command_line/adapt.py @@ -3,7 +3,7 @@ import os import time -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.alignment import AdaptingAligner from montreal_forced_aligner.command_line.utils import validate_model_arg @@ -15,7 +15,7 @@ __all__ = ["adapt_model", "validate_args", "run_adapt_model"] -def adapt_model(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def adapt_model(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the acoustic model adaptation @@ -104,7 +104,7 @@ def validate_args(args: Namespace) -> None: args.acoustic_model_path = validate_model_arg(args.acoustic_model_path, "acoustic") -def run_adapt_model(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def run_adapt_model(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Wrapper function for running acoustic model adaptation diff --git a/montreal_forced_aligner/command_line/align.py b/montreal_forced_aligner/command_line/align.py index c9d763c6..f471b1c0 100644 --- a/montreal_forced_aligner/command_line/align.py +++ b/montreal_forced_aligner/command_line/align.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.alignment import PretrainedAligner from montreal_forced_aligner.command_line.utils import validate_model_arg @@ -15,7 +15,7 @@ __all__ = ["align_corpus", "validate_args", "run_align_corpus"] -def align_corpus(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def align_corpus(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the alignment @@ -78,7 +78,7 @@ def validate_args(args: Namespace) -> None: args.acoustic_model_path = validate_model_arg(args.acoustic_model_path, "acoustic") -def run_align_corpus(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def run_align_corpus(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Wrapper function for running alignment diff --git a/montreal_forced_aligner/command_line/classify_speakers.py b/montreal_forced_aligner/command_line/classify_speakers.py index b1f75ab0..444c5fbf 100644 --- a/montreal_forced_aligner/command_line/classify_speakers.py +++ b/montreal_forced_aligner/command_line/classify_speakers.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.command_line.utils import validate_model_arg from montreal_forced_aligner.exceptions import ArgumentError @@ -14,7 +14,7 @@ __all__ = ["classify_speakers", "validate_args", "run_classify_speakers"] -def classify_speakers(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def classify_speakers(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the speaker classification @@ -74,7 +74,7 @@ def validate_args(args: Namespace) -> None: args.ivector_extractor_path = validate_model_arg(args.ivector_extractor_path, "ivector") -def run_classify_speakers(args: Namespace, unknown: Optional[list[str]] = None) -> None: +def run_classify_speakers(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running speaker classification diff --git a/montreal_forced_aligner/command_line/create_segments.py b/montreal_forced_aligner/command_line/create_segments.py index a818bfa2..fa36ea21 100644 --- a/montreal_forced_aligner/command_line/create_segments.py +++ b/montreal_forced_aligner/command_line/create_segments.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.exceptions import ArgumentError from montreal_forced_aligner.segmenter import Segmenter @@ -14,7 +14,7 @@ __all__ = ["create_segments", "validate_args", "run_create_segments"] -def create_segments(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def create_segments(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the sound file segmentation @@ -68,7 +68,7 @@ def validate_args(args: Namespace) -> None: raise ArgumentError("Corpus directory and output directory cannot be the same folder.") -def run_create_segments(args: Namespace, unknown: Optional[list[str]] = None) -> None: +def run_create_segments(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running sound file segmentation diff --git a/montreal_forced_aligner/command_line/g2p.py b/montreal_forced_aligner/command_line/g2p.py index d465ae60..4cbaca25 100644 --- a/montreal_forced_aligner/command_line/g2p.py +++ b/montreal_forced_aligner/command_line/g2p.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.command_line.utils import validate_model_arg from montreal_forced_aligner.g2p.generator import ( @@ -19,7 +19,7 @@ __all__ = ["generate_dictionary", "validate_args", "run_g2p"] -def generate_dictionary(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def generate_dictionary(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the G2P command @@ -95,7 +95,7 @@ def validate_args(args: Namespace) -> None: args.g2p_model_path = validate_model_arg(args.g2p_model_path, "g2p") -def run_g2p(args: Namespace, unknown: Optional[list[str]] = None) -> None: +def run_g2p(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running G2P diff --git a/montreal_forced_aligner/command_line/mfa.py b/montreal_forced_aligner/command_line/mfa.py index 704f3469..76983f83 100644 --- a/montreal_forced_aligner/command_line/mfa.py +++ b/montreal_forced_aligner/command_line/mfa.py @@ -33,6 +33,7 @@ ) from montreal_forced_aligner.exceptions import MFAError from montreal_forced_aligner.models import MODEL_TYPES +from montreal_forced_aligner.utils import check_third_party if TYPE_CHECKING: from argparse import ArgumentParser @@ -381,6 +382,8 @@ def add_global_options(subparser: argparse.ArgumentParser, textgrid_output: bool ) validate_parser.add_argument( "--ignore_acoustics", + "--skip_acoustics", + dest="ignore_acoustics", help="Skip acoustic feature generation and associated validation", action="store_true", ) @@ -843,6 +846,8 @@ def main() -> None: Main function for the MFA command line interface """ + check_third_party() + hooks = ExitHooks() hooks.hook() atexit.register(hooks.history_save_handler) diff --git a/montreal_forced_aligner/command_line/model.py b/montreal_forced_aligner/command_line/model.py index d3496631..fb5e23a2 100644 --- a/montreal_forced_aligner/command_line/model.py +++ b/montreal_forced_aligner/command_line/model.py @@ -3,7 +3,7 @@ import os import shutil -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union import requests @@ -34,7 +34,7 @@ ] -def list_downloadable_models(model_type: str) -> list[str]: +def list_downloadable_models(model_type: str) -> List[str]: """ Generate a list of models available for download diff --git a/montreal_forced_aligner/command_line/train_acoustic_model.py b/montreal_forced_aligner/command_line/train_acoustic_model.py index be337eb1..c7c045af 100644 --- a/montreal_forced_aligner/command_line/train_acoustic_model.py +++ b/montreal_forced_aligner/command_line/train_acoustic_model.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.acoustic_modeling import TrainableAligner from montreal_forced_aligner.command_line.utils import validate_model_arg @@ -15,7 +15,7 @@ __all__ = ["train_acoustic_model", "validate_args", "run_train_acoustic_model"] -def train_acoustic_model(args: Namespace, unknown_args: Optional[list] = None) -> None: +def train_acoustic_model(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the acoustic model training @@ -100,7 +100,7 @@ def validate_args(args: Namespace) -> None: args.dictionary_path = validate_model_arg(args.dictionary_path, "dictionary") -def run_train_acoustic_model(args: Namespace, unknown_args: Optional[list] = None) -> None: +def run_train_acoustic_model(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Wrapper function for running acoustic model training diff --git a/montreal_forced_aligner/command_line/train_dictionary.py b/montreal_forced_aligner/command_line/train_dictionary.py index 0d3ad356..2be4a571 100644 --- a/montreal_forced_aligner/command_line/train_dictionary.py +++ b/montreal_forced_aligner/command_line/train_dictionary.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.alignment.pretrained import DictionaryTrainer from montreal_forced_aligner.command_line.utils import validate_model_arg @@ -15,7 +15,7 @@ __all__ = ["train_dictionary", "validate_args", "run_train_dictionary"] -def train_dictionary(args: Namespace, unknown_args: Optional[list] = None) -> None: +def train_dictionary(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the pronunciation probability training @@ -75,7 +75,7 @@ def validate_args(args: Namespace) -> None: args.acoustic_model_path = validate_model_arg(args.acoustic_model_path, "acoustic") -def run_train_dictionary(args: Namespace, unknown: Optional[list] = None) -> None: +def run_train_dictionary(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running pronunciation probability training diff --git a/montreal_forced_aligner/command_line/train_g2p.py b/montreal_forced_aligner/command_line/train_g2p.py index 92cb9cbb..38e61d40 100644 --- a/montreal_forced_aligner/command_line/train_g2p.py +++ b/montreal_forced_aligner/command_line/train_g2p.py @@ -1,7 +1,7 @@ """Command line functions for training G2P models""" from __future__ import annotations -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.command_line.utils import validate_model_arg from montreal_forced_aligner.g2p.trainer import PyniniTrainer @@ -13,7 +13,7 @@ __all__ = ["train_g2p", "validate_args", "run_train_g2p"] -def train_g2p(args: Namespace, unknown_args: Optional[list] = None) -> None: +def train_g2p(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the G2P model training @@ -60,7 +60,7 @@ def validate_args(args: Namespace) -> None: args.dictionary_path = validate_model_arg(args.dictionary_path, "dictionary") -def run_train_g2p(args: Namespace, unknown: Optional[list] = None) -> None: +def run_train_g2p(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running G2P model training diff --git a/montreal_forced_aligner/command_line/train_ivector_extractor.py b/montreal_forced_aligner/command_line/train_ivector_extractor.py index 252bd443..3a16f1f8 100644 --- a/montreal_forced_aligner/command_line/train_ivector_extractor.py +++ b/montreal_forced_aligner/command_line/train_ivector_extractor.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.exceptions import ArgumentError from montreal_forced_aligner.ivector.trainer import TrainableIvectorExtractor @@ -13,7 +13,7 @@ __all__ = ["train_ivector", "validate_args", "run_train_ivector_extractor"] -def train_ivector(args: Namespace, unknown_args: Optional[list] = None) -> None: +def train_ivector(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the ivector extractor training @@ -75,7 +75,7 @@ def validate_args(args: Namespace) -> None: ) -def run_train_ivector_extractor(args: Namespace, unknown: Optional[list] = None) -> None: +def run_train_ivector_extractor(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running ivector extraction training diff --git a/montreal_forced_aligner/command_line/train_lm.py b/montreal_forced_aligner/command_line/train_lm.py index 5393fea6..3ffbaa90 100644 --- a/montreal_forced_aligner/command_line/train_lm.py +++ b/montreal_forced_aligner/command_line/train_lm.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.command_line.utils import validate_model_arg from montreal_forced_aligner.exceptions import ArgumentError @@ -18,7 +18,7 @@ __all__ = ["train_lm", "validate_args", "run_train_lm"] -def train_lm(args: Namespace, unknown_args: Optional[list] = None) -> None: +def train_lm(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the language model training @@ -98,7 +98,7 @@ def validate_args(args: Namespace) -> None: raise (ArgumentError(f"Could not find the model file {args.model_path}.")) -def run_train_lm(args: Namespace, unknown: Optional[list] = None) -> None: +def run_train_lm(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running language model training diff --git a/montreal_forced_aligner/command_line/transcribe.py b/montreal_forced_aligner/command_line/transcribe.py index 7cd7a4b1..1ea87e2e 100644 --- a/montreal_forced_aligner/command_line/transcribe.py +++ b/montreal_forced_aligner/command_line/transcribe.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.command_line.utils import validate_model_arg from montreal_forced_aligner.exceptions import ArgumentError @@ -15,7 +15,7 @@ __all__ = ["transcribe_corpus", "validate_args", "run_transcribe_corpus"] -def transcribe_corpus(args: Namespace, unknown_args: Optional[list] = None) -> None: +def transcribe_corpus(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the transcription command @@ -82,7 +82,7 @@ def validate_args(args: Namespace) -> None: raise ArgumentError("Corpus directory and output directory cannot be the same folder.") -def run_transcribe_corpus(args: Namespace, unknown: Optional[list] = None) -> None: +def run_transcribe_corpus(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running corpus transcription diff --git a/montreal_forced_aligner/command_line/validate.py b/montreal_forced_aligner/command_line/validate.py index 91522bc9..b22c44eb 100644 --- a/montreal_forced_aligner/command_line/validate.py +++ b/montreal_forced_aligner/command_line/validate.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from montreal_forced_aligner.command_line.utils import validate_model_arg from montreal_forced_aligner.exceptions import ArgumentError @@ -15,7 +15,7 @@ __all__ = ["validate_corpus", "validate_args", "run_validate_corpus"] -def validate_corpus(args: Namespace, unknown_args: Optional[list[str]] = None) -> None: +def validate_corpus(args: Namespace, unknown_args: Optional[List[str]] = None) -> None: """ Run the validation command @@ -86,7 +86,7 @@ def validate_args(args: Namespace) -> None: args.acoustic_model_path = validate_model_arg(args.acoustic_model_path, "acoustic") -def run_validate_corpus(args: Namespace, unknown: Optional[list[str]] = None) -> None: +def run_validate_corpus(args: Namespace, unknown: Optional[List[str]] = None) -> None: """ Wrapper function for running corpus validation diff --git a/montreal_forced_aligner/config.py b/montreal_forced_aligner/config.py index 9634945d..cfd9bbdc 100644 --- a/montreal_forced_aligner/config.py +++ b/montreal_forced_aligner/config.py @@ -6,7 +6,7 @@ from __future__ import annotations import re -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Dict, List from montreal_forced_aligner.exceptions import RootDirectoryError @@ -76,7 +76,7 @@ def generate_command_history_path() -> str: return os.path.join(get_temporary_directory(), "command_history.yaml") -def load_command_history() -> list[dict[str, Any]]: +def load_command_history() -> List[Dict[str, Any]]: """ Load command history for MFA @@ -95,7 +95,7 @@ def load_command_history() -> list[dict[str, Any]]: return history -def update_command_history(command_data: dict[str, Any]) -> None: +def update_command_history(command_data: Dict[str, Any]) -> None: """ Update command history with most recent command @@ -184,7 +184,7 @@ def update_global_config(args: Namespace) -> None: yaml.dump(default_config, f) -def load_global_config() -> dict[str, Any]: +def load_global_config() -> Dict[str, Any]: """ Load the global MFA configuration diff --git a/montreal_forced_aligner/corpus/acoustic_corpus.py b/montreal_forced_aligner/corpus/acoustic_corpus.py index e86f1275..901dc34a 100644 --- a/montreal_forced_aligner/corpus/acoustic_corpus.py +++ b/montreal_forced_aligner/corpus/acoustic_corpus.py @@ -9,7 +9,7 @@ import time from abc import ABCMeta from queue import Empty -from typing import Optional +from typing import Dict, List, Optional from montreal_forced_aligner.abc import MfaWorker, TemporaryDirectoryMixin from montreal_forced_aligner.corpus.base import CorpusMixin @@ -150,7 +150,7 @@ def construct_base_feature_string(self, all_feats: bool = False) -> str: def construct_feature_proc_strings( self, speaker_independent: bool = False, - ) -> list[dict[str, str]]: + ) -> List[Dict[str, str]]: """ Constructs a feature processing string to supply to Kaldi binaries, taking into account corpus features and the current working directory of the aligner (whether fMLLR or LDA transforms should be used, etc). @@ -223,7 +223,7 @@ def construct_feature_proc_strings( strings.append(feat_strings) return strings - def compute_vad_arguments(self) -> list[VadArguments]: + def compute_vad_arguments(self) -> List[VadArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.corpus.features.compute_vad_func` @@ -243,7 +243,7 @@ def compute_vad_arguments(self) -> list[VadArguments]: for j in self.jobs ] - def calc_fmllr_arguments(self) -> list[CalcFmllrArguments]: + def calc_fmllr_arguments(self) -> List[CalcFmllrArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.corpus.features.calc_fmllr_func` @@ -268,7 +268,7 @@ def calc_fmllr_arguments(self) -> list[CalcFmllrArguments]: for j in self.jobs ] - def mfcc_arguments(self) -> list[MfccArguments]: + def mfcc_arguments(self) -> List[MfccArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.corpus.features.mfcc_func` @@ -429,10 +429,10 @@ def combine_feats(self) -> None: if self.utterances[f[0]].ignored: continue self.utterances[f[0]].features = f[1] - for u, utterance in self.utterances.items(): + for utterance in self.utterances: if utterance.features is None: utterance.ignored = True - ignore_check.append(u) + ignore_check.append(utterance.name) if ignore_check: self.log_warning( "There were some utterances ignored due to short duration, see the log file for full " @@ -446,11 +446,11 @@ def combine_feats(self) -> None: def _write_feats(self): """Write feats scp file for Kaldi""" - if any(x.features is not None for x in self.utterances.values()): + if any(x.features is not None for x in self.utterances): with open( os.path.join(self.corpus_output_directory, "feats.scp"), "w", encoding="utf8" ) as f: - for utterance in self.utterances.values(): + for utterance in self.utterances: if not utterance.features: continue f.write(f"{utterance.name} {utterance.features}\n") @@ -668,12 +668,18 @@ def _load_corpus_from_source(self) -> None: } all_sound_files.update(other_audio_files) all_sound_files.update(wav_files) - + self.log_debug(f"Walking through {self.corpus_directory}...") for root, _, files in os.walk(self.corpus_directory, followlinks=True): identifiers, wav_files, lab_files, textgrid_files, other_audio_files = find_exts(files) relative_path = root.replace(self.corpus_directory, "").lstrip("/").lstrip("\\") if self.stopped.stop_check(): return + self.log_debug(f"Inside relative root {relative_path}:") + self.log_debug(f" Found {len(identifiers)} identifiers") + self.log_debug(f" Found {len(wav_files)} .wav files") + self.log_debug(f" Found {len(other_audio_files)} other audio files") + self.log_debug(f" Found {len(lab_files)} .lab files") + self.log_debug(f" Found {len(textgrid_files)} .TextGrid files") if not use_audio_directory: all_sound_files = {} wav_files = {k: os.path.join(root, v) for k, v in wav_files.items()} @@ -699,7 +705,6 @@ def _load_corpus_from_source(self) -> None: continue if transcription_path is None: self.no_transcription_files.append(wav_path) - try: if hasattr(self, "construct_sanitize_function"): file = parse_file( @@ -782,14 +787,13 @@ def load_corpus(self) -> None: self.log_debug(f"Wrote lexicon information in {time.time() - begin}") begin = time.time() - for speaker in self.speakers.values(): + for speaker in self.speakers: speaker.set_dictionary(self.get_dictionary(speaker.name)) self.log_debug(f"Set dictionaries for speakers in {time.time() - begin}") begin = time.time() self.initialize_jobs() self.log_debug(f"Initialized jobs in {time.time() - begin}") - begin = time.time() self.write_corpus_information() self.log_debug(f"Wrote corpus information in {time.time() - begin}") @@ -808,7 +812,7 @@ def load_corpus(self) -> None: self.log_debug(f"Setting up corpus took {time.time() - all_begin} seconds") -class AcousticCorpus(AcousticCorpusPronunciationMixin, MfaWorker, TemporaryDirectoryMixin): +class AcousticCorpus(AcousticCorpusMixin, MfaWorker, TemporaryDirectoryMixin): """ Standalone class for working with acoustic corpora and pronunciation dictionaries @@ -891,3 +895,70 @@ def log_warning(self, message: str) -> None: Warning message to log """ print(message) + + +class AcousticCorpusWithPronunciations( + AcousticCorpusPronunciationMixin, MfaWorker, TemporaryDirectoryMixin +): + def __init__(self, num_jobs=3, **kwargs): + super().__init__(**kwargs) + self.num_jobs = num_jobs + + @property + def identifier(self) -> str: + """Identifier for the corpus""" + return self.data_source_identifier + + @property + def output_directory(self) -> str: + """Root temporary directory to store corpus and dictionary files""" + return os.path.join(self.temporary_directory, self.identifier) + + @property + def working_directory(self) -> str: + """Working directory to save temporary corpus and dictionary files""" + return self.output_directory + + def log_debug(self, message: str) -> None: + """ + Print a debug message + + Parameters + ---------- + message: str + Debug message to log + """ + print(message) + + def log_error(self, message: str) -> None: + """ + Print an error message + + Parameters + ---------- + message: str + Error message to log + """ + print(message) + + def log_info(self, message: str) -> None: + """ + Print an info message + + Parameters + ---------- + message: str + Info message to log + """ + print(message) + + def log_warning(self, message: str) -> None: + """ + Print a warning message + + Parameters + ---------- + message: str + Warning message to log + """ + print(message) diff --git a/montreal_forced_aligner/corpus/base.py b/montreal_forced_aligner/corpus/base.py index 59fe09c8..e075b4aa 100644 --- a/montreal_forced_aligner/corpus/base.py +++ b/montreal_forced_aligner/corpus/base.py @@ -6,12 +6,19 @@ import time from abc import ABCMeta, abstractmethod from collections import Counter -from typing import Optional, Union +from typing import Dict, List, Optional, Union import yaml from montreal_forced_aligner.abc import MfaWorker, TemporaryDirectoryMixin -from montreal_forced_aligner.corpus.classes import File, Speaker, Utterance +from montreal_forced_aligner.corpus.classes import ( + File, + FileCollection, + Speaker, + SpeakerCollection, + Utterance, + UtteranceCollection, +) from montreal_forced_aligner.corpus.multiprocessing import Job from montreal_forced_aligner.exceptions import CorpusError from montreal_forced_aligner.helper import output_mapping @@ -47,11 +54,11 @@ class CorpusMixin(MfaWorker, TemporaryDirectoryMixin, metaclass=ABCMeta): Attributes ---------- - speakers: dict[str, Speaker] + speakers: :class:`~montreal_forced_aligner.corpus.classes.SpeakerCollection` Dictionary of speakers in the corpus - files: dict[str, File] + files: :class:`~montreal_forced_aligner.corpus.classes.FileCollection` Dictionary of files in the corpus - utterances: dict[str, Utterance] + utterances: :class:`~montreal_forced_aligner.corpus.classes.UtteranceCollection` Dictionary of utterances in the corpus jobs: list[Job] List of jobs for processing the corpus and splitting speakers @@ -78,9 +85,9 @@ def __init__( raise CorpusError( f"The specified path for the corpus ({corpus_directory}) is not a directory." ) - self.speakers: dict[str, Speaker] = {} - self.files: dict[str, File] = {} - self.utterances: dict[str, Utterance] = {} + self.speakers = SpeakerCollection() + self.files = FileCollection() + self.utterances = UtteranceCollection() self.corpus_directory = corpus_directory self.speaker_characters = speaker_characters self.ignore_speakers = ignore_speakers @@ -88,7 +95,7 @@ def __init__( self.stopped = Stopped() self.decode_error_files = [] self.textgrid_read_errors = {} - self.jobs: list[Job] = [] + self.jobs: List[Job] = [] super().__init__(**kwargs) @property @@ -119,19 +126,19 @@ def write_corpus_information(self) -> None: def _write_spk2utt(self): """Write spk2utt scp file for Kaldi""" data = { - speaker.name: sorted(speaker.utterances.keys()) for speaker in self.speakers.values() + speaker.name: sorted(u.name for u in speaker.utterances) for speaker in self.speakers } output_mapping(data, os.path.join(self.corpus_output_directory, "spk2utt.scp")) def write_utt2spk(self): """Write utt2spk scp file for Kaldi""" - data = {u.name: u.speaker.name for u in self.utterances.values()} + data = {u.name: u.speaker.name for u in self.utterances} output_mapping(data, os.path.join(self.corpus_output_directory, "utt2spk.scp")) def _write_speakers(self): """Write speaker information for speeding up future runs""" to_save = [] - for speaker in self.speakers.values(): + for speaker in self.speakers: to_save.append(speaker.meta) with open( os.path.join(self.corpus_output_directory, "speakers.yaml"), "w", encoding="utf8" @@ -141,7 +148,7 @@ def _write_speakers(self): def _write_files(self): """Write file information for speeding up future runs""" to_save = [] - for file in self.files.values(): + for file in self.files: to_save.append(file.meta) with open( os.path.join(self.corpus_output_directory, "files.yaml"), "w", encoding="utf8" @@ -151,7 +158,7 @@ def _write_files(self): def _write_utterances(self): """Write utterance information for speeding up future runs""" to_save = [] - for utterance in self.utterances.values(): + for utterance in self.utterances: to_save.append(utterance.meta) with open( os.path.join(self.corpus_output_directory, "utterances.yaml"), "w", encoding="utf8" @@ -166,11 +173,11 @@ def create_corpus_split(self) -> None: job.output_to_directory(split_dir) @property - def file_speaker_mapping(self) -> dict[str, list[str]]: + def file_speaker_mapping(self) -> Dict[str, List[str]]: """Speaker ordering for each file""" - return {file_name: file.speaker_ordering for file_name, file in self.files.items()} + return {file.name: file.speaker_ordering for file in self.files} - def get_word_frequency(self) -> dict[str, float]: + def get_word_frequency(self) -> Dict[str, float]: """ Calculate the relative word frequency across all the texts in the corpus @@ -180,7 +187,7 @@ def get_word_frequency(self) -> dict[str, float]: Dictionary of words and their relative frequencies """ word_counts = Counter() - for u in self.utterances.values(): + for u in self.utterances: text = u.text speaker = u.speaker d = speaker.dictionary @@ -196,7 +203,7 @@ def get_word_frequency(self) -> dict[str, float]: return {k: v / sum(word_counts.values()) for k, v in word_counts.items()} @property - def corpus_word_set(self) -> list[str]: + def corpus_word_set(self) -> List[str]: """Set of words used in the corpus""" return sorted(self.word_counts) @@ -209,11 +216,15 @@ def add_utterance(self, utterance: Utterance) -> None: utterance: :class:`~montreal_forced_aligner.corpus.classes.Utterance` Utterance to add """ - self.utterances[utterance.name] = utterance - if utterance.speaker.name not in self.speakers: - self.speakers[utterance.speaker.name] = utterance.speaker - if utterance.file.name not in self.files: - self.files[utterance.file.name] = utterance.file + self.utterances.add_utterance(utterance) + if utterance.speaker not in self.speakers: + self.speakers.add_speaker(utterance.speaker) + speaker = self.speakers[utterance.speaker.name] + speaker.add_utterance(utterance) + if utterance.file not in self.files: + self.files.add_file(utterance.file) + file = self.files[utterance.file.name] + file.add_utterance(utterance) def delete_utterance(self, utterance: Union[str, Utterance]) -> None: """ @@ -226,8 +237,10 @@ def delete_utterance(self, utterance: Union[str, Utterance]) -> None: """ if isinstance(utterance, str): utterance = self.utterances[utterance] - utterance.speaker.delete_utterance(utterance) - utterance.file.delete_utterance(utterance) + speaker = self.speakers[utterance.speaker.name] + file = self.files[utterance.file.name] + speaker.delete_utterance(utterance) + file.delete_utterance(utterance) del self.utterances[utterance.name] def initialize_jobs(self) -> None: @@ -239,7 +252,7 @@ def initialize_jobs(self) -> None: self.num_jobs = len(self.speakers) self.jobs = [Job(i) for i in range(self.num_jobs)] job_ind = 0 - for s in sorted(self.speakers.values()): + for s in sorted(self.speakers): self.jobs[job_ind].add_speaker(s) job_ind += 1 if job_ind == self.num_jobs: @@ -254,14 +267,14 @@ def add_file(self, file: File) -> None: file: :class:`~montreal_forced_aligner.corpus.classes.File` File to be added """ - self.files[file.name] = file + self.files.add_file(file) for speaker in file.speaker_ordering: - if speaker.name not in self.speakers: - self.speakers[speaker.name] = speaker + if speaker not in self.speakers: + self.speakers.add_speaker(speaker) else: self.speakers[speaker.name].merge(speaker) - for u in file.utterances.values(): - self.utterances[u.name] = u + for u in file.utterances: + self.add_utterance(u) if u.text: self.word_counts.update(u.text.split()) @@ -285,14 +298,15 @@ def create_subset(self, subset: int) -> None: if larger_subset_num < self.num_utterances: # Get all shorter utterances that are not one word long utts = sorted( - (utt for utt in self.utterances.values() if " " in utt.text), + (utt for utt in self.utterances if " " in utt.text), key=lambda x: x.duration, ) larger_subset = utts[:larger_subset_num] else: - larger_subset = sorted(self.utterances.values()) + larger_subset = sorted(self.utterances) random.seed(1234) # make it deterministic sampling - subset_utts = set(random.sample(larger_subset, subset)) + subset_utts = UtteranceCollection() + subset_utts.update(random.sample(larger_subset, subset)) log_dir = os.path.join(subset_directory, "log") os.makedirs(log_dir, exist_ok=True) @@ -353,7 +367,7 @@ def _load_corpus(self) -> None: "There were no sound files found of the appropriate format. Please double check the corpus path " "and/or run the validation utility (mfa validate)." ) - average_utterances = sum(len(x.utterances) for x in self.speakers.values()) / num_speakers + average_utterances = sum(len(x.utterances) for x in self.speakers) / num_speakers self.log_info( f"Number of speakers in corpus: {num_speakers}, " f"average number of utterances per speaker: {average_utterances}" @@ -400,14 +414,14 @@ def _load_corpus_from_temp(self) -> bool: speaker_data = yaml.safe_load(f) for entry in speaker_data: - self.speakers[entry["name"]] = Speaker(entry["name"]) + self.speakers.add_speaker(Speaker(entry["name"])) self.speakers[entry["name"]].cmvn = entry["cmvn"] with open(files_path, "r", encoding="utf8") as f: files_data = yaml.safe_load(f) for entry in files_data: - self.files[entry["name"]] = File( - entry["wav_path"], entry["text_path"], entry["relative_path"] + self.files.add_file( + File(entry["wav_path"], entry["text_path"], entry["relative_path"]) ) self.files[entry["name"]].speaker_ordering = [ self.speakers[x] for x in entry["speaker_ordering"] @@ -432,6 +446,7 @@ def _load_corpus_from_temp(self) -> bool: self.word_counts.update(u.text.split()) self.utterances[u.name].features = entry["features"] self.utterances[u.name].ignored = entry["ignored"] + self.add_utterance(u) self.log_debug( f"Loaded from corpus_data temp directory in {time.time() - begin_time} seconds" diff --git a/montreal_forced_aligner/corpus/classes.py b/montreal_forced_aligner/corpus/classes.py index 5dc06f38..90b8daf0 100644 --- a/montreal_forced_aligner/corpus/classes.py +++ b/montreal_forced_aligner/corpus/classes.py @@ -1,12 +1,28 @@ """Class definitions for Speakers, Files, Utterances and Jobs""" from __future__ import annotations +import abc import os import sys import traceback from collections import Counter -from typing import TYPE_CHECKING, Any, Callable, Optional, Union - +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + Generator, + List, + Optional, + Set, + Tuple, + TypeVar, + Union, +) + +import librosa +import numpy as np from praatio import textgrid from praatio.utilities.constants import Interval @@ -14,7 +30,6 @@ from montreal_forced_aligner.exceptions import CorpusError, TextGridParseError, TextParseError if TYPE_CHECKING: - from montreal_forced_aligner.abc import MetaDict from montreal_forced_aligner.dictionary import DictionaryData from montreal_forced_aligner.dictionary.mixins import SanitizeFunction from montreal_forced_aligner.dictionary.pronunciation import PronunciationDictionaryMixin @@ -83,7 +98,14 @@ def parse_file( return file -class Speaker: +class MfaCorpusClass(metaclass=abc.ABCMeta): + @property + @abc.abstractmethod + def name(self) -> str: + ... + + +class Speaker(MfaCorpusClass): """ Class representing information about a speaker @@ -94,7 +116,7 @@ class Speaker: Attributes ---------- - utterances: dict[str, :class:`~montreal_forced_aligner.corpus.classes.Utterance`] + utterances: :class:`~montreal_forced_aligner.corpus.classes.UtteranceCollection` Utterances that the speaker is associated with cmvn: str, optional String pointing to any CMVN that has been calculated for this speaker @@ -105,74 +127,83 @@ class Speaker: """ def __init__(self, name): - self.name = name - self.utterances = {} + self._name = name + self.utterances = UtteranceCollection() self.cmvn = None self.dictionary: Optional[PronunciationDictionaryMixin] = None self.dictionary_data: Optional[DictionaryData] = None self.dictionary_name: Optional[str] = None self.word_counts = Counter() - def __getstate__(self): + @property + def name(self) -> str: + return self._name + + def __getstate__(self) -> Dict[str, str]: """Get dictionary for pickling""" data = {"name": self.name, "cmvn": self.cmvn, "dictionary_name": self.dictionary_name} return data - def __setstate__(self, state): + def __setstate__(self, state) -> None: """Recreate object following pickling""" - self.name = state["name"] + self._name = state["name"] self.cmvn = state["cmvn"] self.dictionary_name = state["dictionary_name"] - def __str__(self): + def __str__(self) -> str: """Return Speaker's name""" return self.name - def __eq__(self, other): + def __eq__(self, other: Union[Speaker, str]) -> bool: """Check if a Speaker is equal to another Speaker""" if isinstance(other, Speaker): return other.name == self.name if isinstance(other, str): return self.name == other - raise NotImplementedError + raise TypeError("Speakers can only be compared to other speakers and strings.") - def __lt__(self, other): + def __lt__(self, other: Union[Speaker, str]) -> bool: """Check if a Speaker is less than another Speaker""" if isinstance(other, Speaker): return other.name < self.name if isinstance(other, str): return self.name < other - raise NotImplementedError + raise TypeError("Speakers can only be compared to other speakers and strings.") - def __lte__(self, other): + def __lte__(self, other: Union[Speaker, str]) -> bool: """Check if a Speaker is less than or equal to another Speaker""" if isinstance(other, Speaker): return other.name <= self.name if isinstance(other, str): return self.name <= other - raise NotImplementedError + raise TypeError("Speakers can only be compared to other speakers and strings.") - def __gt__(self, other): + def __gt__(self, other: Union[Speaker, str]) -> bool: """Check if a Speaker is greater than another Speaker""" if isinstance(other, Speaker): return other.name > self.name if isinstance(other, str): return self.name > other - raise NotImplementedError + raise TypeError("Speakers can only be compared to other speakers and strings.") - def __gte__(self, other): + def __gte__(self, other: Union[Speaker, str]) -> bool: """Check if a Speaker is greater than or equal to another Speaker""" if isinstance(other, Speaker): return other.name >= self.name if isinstance(other, str): return self.name >= other - raise NotImplementedError + raise TypeError("Speakers can only be compared to other speakers and strings.") - def __hash__(self): + def __hash__(self) -> hash: """Get the hash of the speaker""" return hash(self.name) - def add_utterance(self, utterance: Utterance): + @property + def num_utterances(self) -> int: + """Get the number of utterances for the speaker""" + return len(self.utterances) + + def add_utterance(self, utterance: Utterance) -> None: """ Associate an utterance with a speaker @@ -181,12 +212,9 @@ def add_utterance(self, utterance: Utterance): utterance: :class:`~montreal_forced_aligner.corpus.classes.Utterance` Utterance to be added """ - utterance.speaker = self - self.utterances[utterance.name] = utterance - if utterance.text: - self.word_counts.update(utterance.text.split()) + self.utterances.add_utterance(utterance) - def delete_utterance(self, utterance: Utterance): + def delete_utterance(self, utterance: Utterance) -> None: """ Delete an utterance associated with a speaker @@ -196,10 +224,9 @@ def delete_utterance(self, utterance: Utterance): Utterance to be deleted """ identifier = utterance.name - utterance.speaker = None del self.utterances[identifier] - def merge(self, speaker: Speaker): + def merge(self, speaker: Speaker) -> None: """ Merge two speakers together @@ -208,11 +235,11 @@ def merge(self, speaker: Speaker): speaker: :class:`~montreal_forced_aligner.corpus.classes.Speaker` Other speaker to take utterances from """ - for u in speaker.utterances.values(): + for u in speaker.utterances: self.add_utterance(u) - speaker.utterances = [] + speaker.utterances = UtteranceCollection() - def word_set(self) -> set[str]: + def word_set(self) -> Set[str]: """ Generate the word set of all the words in a speaker's utterances @@ -222,6 +249,12 @@ def word_set(self) -> set[str]: Speaker's word set """ words = set() + if self.dictionary is not None: + words.update(self.dictionary.specials_set) + words.update(self.dictionary.clitic_set) + self.word_counts = Counter() + for u in self.utterances: + self.word_counts.update(u.text.split()) for word in self.word_counts: if self.dictionary is not None: word = self.dictionary._lookup(word) @@ -244,15 +277,15 @@ def set_dictionary(self, dictionary: PronunciationDictionaryMixin) -> None: self.dictionary_data = dictionary.data(self.word_set()) @property - def files(self) -> set["File"]: + def files(self) -> Set["File"]: """Files that the speaker is associated with""" files = set() - for u in self.utterances.values(): + for u in self.utterances: files.add(u.file) return files @property - def meta(self): + def meta(self) -> Dict[str, str]: """Metadata for the speaker""" data = { "name": self.name, @@ -263,7 +296,7 @@ def meta(self): return data -class File: +class File(MfaCorpusClass): """ File class for representing metadata and associations of Files @@ -276,6 +309,19 @@ class File: relative_path: str, optional Relative path to the corpus root + Attributes + ---------- + utterances: :class:`~montreal_forced_aligner.corpus.classes.UtteranceCollection` + Utterances in the file + speaker_ordering: list[Speaker] + Ordering of speakers in the transcription file + wav_info: dict[str, Any] + Information about sound file + waveform: np.array + Audio samples + aligned: bool + Flag for whether a file has alignments + Raises ------ :class:`~montreal_forced_aligner.exceptions.CorpusError` @@ -291,19 +337,68 @@ def __init__( self.wav_path = wav_path self.text_path = text_path if self.wav_path is not None: - self.name = os.path.splitext(os.path.basename(self.wav_path))[0] + self._name = os.path.splitext(os.path.basename(self.wav_path))[0] elif self.text_path is not None: - self.name = os.path.splitext(os.path.basename(self.text_path))[0] + self._name = os.path.splitext(os.path.basename(self.text_path))[0] else: raise CorpusError("File objects must have either a wav_path or text_path") self.relative_path = relative_path self.wav_info = None - self.speaker_ordering: list[Speaker] = [] - self.utterances: dict[str, Utterance] = {} + self.waveform = None + self.speaker_ordering: List[Speaker] = [] + self.utterances = UtteranceCollection() self.aligned = False + def __eq__(self, other: Union[File, str]) -> bool: + """Check if a File is equal to another File""" + if isinstance(other, File): + return other.name == self.name + if isinstance(other, str): + return self.name == other + raise TypeError("Files can only be compared to other files and strings.") + + def __lt__(self, other: Union[File, str]) -> bool: + """Check if a File is less than another File""" + if isinstance(other, File): + return other.name < self.name + if isinstance(other, str): + return self.name < other + raise TypeError("Files can only be compared to other files and strings.") + + def __lte__(self, other: Union[File, str]) -> bool: + """Check if a File is less than or equal to another File""" + if isinstance(other, File): + return other.name <= self.name + if isinstance(other, str): + return self.name <= other + raise TypeError("Files can only be compared to other files and strings.") + + def __gt__(self, other: Union[File, str]) -> bool: + """Check if a File is greater than another File""" + if isinstance(other, File): + return other.name > self.name + if isinstance(other, str): + return self.name > other + raise TypeError("Files can only be compared to other files and strings.") + + def __gte__(self, other: Union[File, str]) -> bool: + """Check if a File is greater than or equal to another File""" + if isinstance(other, File): + return other.name >= self.name + if isinstance(other, str): + return self.name >= other + raise TypeError("Files can only be compared to other files and strings.") + + def __hash__(self) -> hash: + """Get the hash of the file""" + return hash(self.name) + + @property + def name(self) -> str: + return self._name + def has_fully_aligned_speaker(self, speaker: Speaker) -> bool: - for u in self.utterances.values(): + for u in self.utterances: if u.speaker != speaker: continue if u.word_labels is None: @@ -312,11 +407,11 @@ def has_fully_aligned_speaker(self, speaker: Speaker) -> bool: return False return True - def __repr__(self): + def __repr__(self) -> str: """Representation of File objects""" return f'' - def __getstate__(self): + def __getstate__(self) -> Dict[str, Any]: """Create dictionary for pickle""" return { "name": self.name, @@ -325,20 +420,22 @@ def __getstate__(self): "relative_path": self.relative_path, "aligned": self.aligned, "wav_info": self.wav_info, + "waveform": self.waveform, "speaker_ordering": [x.__getstate__() for x in self.speaker_ordering], - "utterances": self.utterances.values(), + "utterances": self.utterances, } - def __setstate__(self, state): + def __setstate__(self, state) -> None: """Update object following pickling""" - self.name = state["name"] + self._name = state["name"] self.wav_path = state["wav_path"] self.text_path = state["text_path"] self.relative_path = state["relative_path"] self.wav_info = state["wav_info"] + self.waveform = state["waveform"] self.aligned = state["aligned"] self.speaker_ordering = state["speaker_ordering"] - self.utterances = {} + self.utterances = UtteranceCollection() for i, s in enumerate(self.speaker_ordering): self.speaker_ordering[i] = Speaker("") self.speaker_ordering[i].__setstate__(s) @@ -352,7 +449,7 @@ def __setstate__(self, state): def save( self, output_directory: Optional[str] = None, backup_output_directory: Optional[str] = None - ): + ) -> None: """ Output File to TextGrid or lab @@ -366,7 +463,7 @@ def save( """ utterance_count = len(self.utterances) if utterance_count == 1: - utterance = next(iter(self.utterances.values())) + utterance = next(iter(self.utterances)) if utterance.begin is None and not utterance.phone_labels: output_path = self.construct_output_path( output_directory, backup_output_directory, enforce_lab=True @@ -388,7 +485,7 @@ def save( tg = textgrid.Textgrid() tg.maxTimestamp = max_time - for utterance in self.utterances.values(): + for utterance in self.utterances: if utterance.speaker is None: speaker = "speech" @@ -413,7 +510,7 @@ def save( tg.save(output_path, includeBlankSpaces=True, format="long_textgrid") @property - def meta(self): + def meta(self) -> Dict[str, Any]: """Metadata for the File""" return { "wav_path": self.wav_path, @@ -425,21 +522,21 @@ def meta(self): } @property - def has_sound_file(self): + def has_sound_file(self) -> bool: """Flag for whether the File has a sound file""" if self.wav_path is not None and os.path.exists(self.wav_path): return True return False @property - def has_text_file(self): + def has_text_file(self) -> bool: """Flag for whether the File has a text file""" if self.text_path is not None and os.path.exists(self.text_path): return True return False @property - def text_type(self): + def text_type(self) -> Optional[str]: """Type of text file""" if self.has_text_file: if os.path.splitext(self.text_path)[1].lower() == ".textgrid": @@ -579,8 +676,7 @@ def add_utterance(self, utterance: Utterance) -> None: utterance: :class:`~montreal_forced_aligner.corpus.classes.Utterance` Utterance to add """ - utterance.file = self - self.utterances[utterance.name] = utterance + self.utterances.add_utterance(utterance) self.add_speaker(utterance.speaker) def delete_utterance(self, utterance: Utterance) -> None: @@ -593,7 +689,6 @@ def delete_utterance(self, utterance: Utterance) -> None: Utterance to remove """ identifier = utterance.name - utterance.file = None del self.utterances[identifier] def load_info(self) -> None: @@ -621,6 +716,25 @@ def num_channels(self) -> int: self.load_info() return self.wav_info["num_channels"] + @property + def num_utterances(self) -> int: + """Get the number of utterances for the sound file""" + return len(self.utterances) + + @property + def num_speakers(self) -> int: + """Get the number of speakers in the sound file""" + return len(self.speaker_ordering) + + @property + def sample_rate(self) -> int: + """Get the sample rate of the sound file""" + if self.wav_path is None: + return 0 + if not self.wav_info: + self.load_info() + return self.wav_info["sample_rate"] + @property def format(self) -> str: """Get the sound file format""" @@ -635,6 +749,34 @@ def sox_string(self) -> str: self.load_info() return self.wav_info["sox_string"] + def load_wav_data(self) -> None: + self.waveform, _ = librosa.load(self.wav_path, sr=None, mono=False) + + def normalized_waveform( + self, begin: float = 0, end: Optional[float] = None + ) -> Tuple[np.array, np.array]: + if self.waveform is None: + self.load_wav_data() + if end is None: + end = self.duration + + begin_sample = int(begin * self.sample_rate) + end_sample = int(end * self.sample_rate) + if len(self.waveform.shape) > 1 and self.waveform.shape[0] == 2: + y = self.waveform[:, begin_sample:end_sample] / np.max( + np.abs(self.waveform[:, begin_sample:end_sample]), axis=0 + ) + y[np.isnan(y)] = 0 + y[0, :] += 3 + y[0, :] += 1 + else: + y = ( + self.waveform[begin_sample:end_sample] + / np.max(np.abs(self.waveform[begin_sample:end_sample]), axis=0) + ) + 1 + x = np.arange(start=begin_sample, stop=end_sample) / self.sample_rate + return x, y + def for_wav_scp(self) -> str: """ Generate the string to use in feature generation @@ -649,7 +791,7 @@ def for_wav_scp(self) -> str: return self.wav_path -class Utterance: +class Utterance(MfaCorpusClass): """ Class for information about specific utterances @@ -714,13 +856,11 @@ def __init__( self.ignored = False self.features = None self.feature_length = None - self.phone_labels: Optional[list[CtmInterval]] = None - self.word_labels: Optional[list[CtmInterval]] = None + self.phone_labels: Optional[List[CtmInterval]] = None + self.word_labels: Optional[List[CtmInterval]] = None self.oovs = set() - self.speaker.add_utterance(self) - self.file.add_utterance(self) - def __getstate__(self): + def __getstate__(self) -> Dict[str, Any]: """Get the state of the object for pickling""" return { "file_name": self.file_name, @@ -738,7 +878,7 @@ def __getstate__(self): "word_labels": self.word_labels, } - def __setstate__(self, state): + def __setstate__(self, state) -> None: """Reconstruct the object following pickling""" self.file_name = state["file_name"] self.speaker_name = state["speaker_name"] @@ -754,141 +894,55 @@ def __setstate__(self, state): self.phone_labels = state["phone_labels"] self.word_labels = state["word_labels"] - def delete(self): - """Delete this utterance and clean up references in other objects""" - pass - - def __str__(self): + def __str__(self) -> str: """String representation""" return self.name - def __repr__(self): + def __repr__(self) -> str: """Object representation""" return f'' - def __eq__(self, other) -> bool: - """ - Check if this utterance is equal to another one - - Parameters - ---------- - other: :class:`~montreal_forced_aligner.corpus.classes.Utterance` or str - Utterance to compare against - - Returns - ------- - bool - True if same name - - Raises - ------ - NotImplementedError - If other is not an Utterance or a str - """ + def __eq__(self, other: Union[Utterance, str]) -> bool: + """Check if a Utterance is equal to another Utterance""" if isinstance(other, Utterance): return other.name == self.name if isinstance(other, str): return self.name == other - raise NotImplementedError - - def __lt__(self, other) -> bool: - """ - Check if this utterance is less than another one - - Parameters - ---------- - other: :class:`~montreal_forced_aligner.corpus.classes.Utterance` or str - Utterance to compare against - - Returns - ------- - bool - True if name is less than other's name + raise TypeError("Utterances can only be compared to other utterances and strings.") - Raises - ------ - NotImplementedError - If other is not an Utterance or a str""" + def __lt__(self, other: Union[Utterance, str]) -> bool: + """Check if a Utterance is less than another Utterance""" if isinstance(other, Utterance): return other.name < self.name if isinstance(other, str): return self.name < other - raise NotImplementedError - - def __lte__(self, other) -> bool: - """ - Check if this utterance is less than or equal to another one - - Parameters - ---------- - other: :class:`~montreal_forced_aligner.corpus.classes.Utterance` or str - Utterance to compare against - - Returns - ------- - bool - True if name is less than or equal to other's name + raise TypeError("Utterances can only be compared to other utterances and strings.") - Raises - ------ - NotImplementedError - If other is not an Utterance or a str""" + def __lte__(self, other: Union[Utterance, str]) -> bool: + """Check if a Utterance is less than or equal to another Utterance""" if isinstance(other, Utterance): return other.name <= self.name if isinstance(other, str): return self.name <= other - raise NotImplementedError + raise TypeError("Utterances can only be compared to other utterances and strings.") - def __gt__(self, other) -> bool: - """ - Check if this utterance is greater than another one - - Parameters - ---------- - other: :class:`~montreal_forced_aligner.corpus.classes.Utterance` or str - Utterance to compare against - - Returns - ------- - bool - True if name is greater than other's name - - Raises - ------ - NotImplementedError - If other is not an Utterance or a str - """ + def __gt__(self, other: Union[Utterance, str]) -> bool: + """Check if a Utterance is greater than another Utterance""" if isinstance(other, Utterance): return other.name > self.name if isinstance(other, str): return self.name > other - raise NotImplementedError - - def __gte__(self, other) -> bool: - """ - Check if this utterance is greater than or equal to another one + raise TypeError("Utterances can only be compared to other utterances and strings.") - Parameters - ---------- - other: :class:`~montreal_forced_aligner.corpus.classes.Utterance` or str - Utterance to compare against - - Returns - ------- - bool - True if name is greater than or equal to other's name - - Raises - ------ - NotImplementedError - If other is not an Utterance or a str""" + def __gte__(self, other: Union[Utterance, str]) -> bool: + """Check if a Utterance is greater than or equal to another Utterance""" if isinstance(other, Utterance): return other.name >= self.name if isinstance(other, str): return self.name >= other - raise NotImplementedError + raise TypeError("Utterances can only be compared to other utterances and strings.") - def __hash__(self): + def __hash__(self) -> hash: """Compute the hash of this function""" return hash(self.name) @@ -900,7 +954,7 @@ def duration(self) -> float: return self.file.duration @property - def meta(self) -> MetaDict: + def meta(self) -> Dict[str, Any]: """Metadata dictionary for the utterance""" return { "speaker": self.speaker.name, @@ -914,7 +968,7 @@ def meta(self) -> MetaDict: "feature_length": self.feature_length, } - def set_speaker(self, speaker: Speaker): + def set_speaker(self, speaker: Speaker) -> None: """ Set the speaker of the utterance and updates other objects @@ -928,11 +982,11 @@ def set_speaker(self, speaker: Speaker): self.file.add_utterance(self) @property - def is_segment(self): + def is_segment(self) -> bool: """Check if this utterance is a segment of a longer file""" return self.begin is not None and self.end is not None - def text_for_scp(self) -> list[str]: + def text_for_scp(self) -> List[str]: """ Generate the text for exporting to Kaldi's text scp @@ -943,7 +997,7 @@ def text_for_scp(self) -> list[str]: """ return self.text.split() - def text_int_for_scp(self) -> Optional[list[int]]: + def text_int_for_scp(self) -> Optional[List[int]]: """ Generate the text for exporting to Kaldi's text int scp @@ -964,7 +1018,7 @@ def text_int_for_scp(self) -> Optional[list[int]]: new_text.append(w) return new_text - def segment_for_scp(self) -> list[Any]: + def segment_for_scp(self) -> List[Any]: """ Generate data for Kaldi's segments scp file @@ -976,11 +1030,143 @@ def segment_for_scp(self) -> list[Any]: return [self.file.name, self.begin, self.end, self.channel] @property - def name(self): + def name(self) -> str: """The name of the utterance""" base = f"{self.file_name}" + base = base.replace(" ", "-space-").replace(".", "-").replace("_", "-") if not base.startswith(f"{self.speaker_name}-"): base = f"{self.speaker_name}-" + base if self.is_segment: - base = f"{self.file_name}-{self.begin}-{self.end}" + base = f"{base}-{self.begin}-{self.end}" return base.replace(" ", "-space-").replace(".", "-").replace("_", "-") + + +T = TypeVar("T", Speaker, File, Utterance) + + +class Collection: + """ + Utility class for storing collections of corpus objects, allowing iteration, sorting, and + look up via names. + """ + + CLASS_TYPE = ClassVar[MfaCorpusClass] + + def __init__(self): + self._data: Dict[str, T] = {} + + def __iter__(self) -> Generator[T]: + """Iterator over the collection""" + for v in self._data.values(): + yield v + + def __getitem__(self, key: str) -> T: + """Get an item by identifier""" + return self._data[key] + + def __delitem__(self, key: str) -> None: + """Delete an item by identifier""" + del self._data[key] + + def __setitem__(self, key: str, item: T) -> None: + """Set an item by identifier""" + self._data[key] = item + + def __len__(self) -> int: + """Number of items in the collection""" + return len(self._data) + + def __bool__(self) -> bool: + """Check for whether the collection contains any items""" + return bool(self._data) + + def __contains__(self, item: Union[str, T]) -> bool: + """Check for whether the collection contains a specific item""" + if not isinstance(item, str): + item = item.name + return item in self._data + + def update(self, other: Union[Collection, Set[T], List[T]]) -> None: + """Update collection from another collection""" + if isinstance(other, Collection): + self._data.update(other._data) + else: + for item in other: + self._data[item.name] = item + + def __str__(self) -> str: + """String representation""" + return str(self._data) + + def __repr__(self) -> str: + """Object representation""" + return f"" + + +class SpeakerCollection(Collection): + """ + Utility class for storing collections of speakers + """ + + CLASS_TYPE = Speaker + + def add_speaker(self, speaker: Speaker) -> None: + """ + Add speaker to the collection + + Parameters + ---------- + speaker: :class:`~montreal_forced_aligner.corpus.classes.Speaker` + Speaker to be added + """ + self[speaker.name] = speaker + + def __repr__(self) -> str: + """Object representation""" + return f"" + + +class FileCollection(Collection): + """ + Utility class for storing collections of speakers + """ + + CLASS_TYPE = File + + def add_file(self, file: File) -> None: + """ + Add file to the collection + + Parameters + ---------- + speaker: :class:`~montreal_forced_aligner.corpus.classes.File` + File to be added + """ + self[file.name] = file + + def __repr__(self) -> str: + """Object representation""" + return f"" + + +class UtteranceCollection(Collection): + """ + Utility class for storing collections of speakers + """ + + CLASS_TYPE = Utterance + + def add_utterance(self, utterance: Utterance) -> None: + """ + Add utterance to the collection + + Parameters + ---------- + speaker: :class:`~montreal_forced_aligner.corpus.classes.Utterance` + Utterance to be added + """ + self[utterance.name] = utterance + + def __repr__(self) -> str: + """Object representation""" + return f"" diff --git a/montreal_forced_aligner/corpus/features.py b/montreal_forced_aligner/corpus/features.py index a45b520d..3df4c82f 100644 --- a/montreal_forced_aligner/corpus/features.py +++ b/montreal_forced_aligner/corpus/features.py @@ -4,7 +4,7 @@ import os import subprocess from abc import abstractmethod -from typing import TYPE_CHECKING, Any, NamedTuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Union from montreal_forced_aligner.utils import thirdparty_binary @@ -27,9 +27,9 @@ class VadArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.corpus.features.compute_vad_func`""" log_path: str - dictionaries: list[str] - feats_scp_paths: dict[str, str] - vad_scp_paths: dict[str, str] + dictionaries: List[str] + feats_scp_paths: Dict[str, str] + vad_scp_paths: Dict[str, str] vad_options: MetaDict @@ -39,11 +39,11 @@ class MfccArguments(NamedTuple): """ log_path: str - dictionaries: list[str] - feats_scp_paths: dict[str, str] - lengths_paths: dict[str, str] - segment_paths: dict[str, str] - wav_paths: dict[str, str] + dictionaries: List[str] + feats_scp_paths: Dict[str, str] + lengths_paths: Dict[str, str] + segment_paths: Dict[str, str] + wav_paths: Dict[str, str] mfcc_options: MetaDict @@ -51,13 +51,13 @@ class CalcFmllrArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.corpus.features.calc_fmllr_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] - ali_paths: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] + ali_paths: Dict[str, str] ali_model_path: str model_path: str - spk2utt_paths: dict[str, str] - trans_paths: dict[str, str] + spk2utt_paths: Dict[str, str] + trans_paths: Dict[str, str] fmllr_options: MetaDict @@ -82,11 +82,11 @@ def make_safe(value: Any) -> str: def mfcc_func( log_path: str, - dictionaries: list[str], - feats_scp_paths: dict[str, str], - lengths_paths: dict[str, str], - segment_paths: dict[str, str], - wav_paths: dict[str, str], + dictionaries: List[str], + feats_scp_paths: Dict[str, str], + lengths_paths: Dict[str, str], + segment_paths: Dict[str, str], + wav_paths: Dict[str, str], mfcc_options: MetaDict, ) -> None: """ @@ -182,9 +182,9 @@ def mfcc_func( def compute_vad_func( log_path: str, - dictionaries: list[str], - feats_scp_paths: dict[str, str], - vad_scp_paths: dict[str, str], + dictionaries: List[str], + feats_scp_paths: Dict[str, str], + vad_scp_paths: Dict[str, str], vad_options: MetaDict, ) -> None: """ @@ -232,13 +232,13 @@ def compute_vad_func( def calc_fmllr_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], - ali_paths: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], + ali_paths: Dict[str, str], ali_model_path: str, model_path: str, - spk2utt_paths: dict[str, str], - trans_paths: dict[str, str], + spk2utt_paths: Dict[str, str], + trans_paths: Dict[str, str], fmllr_options: MetaDict, ) -> None: """ @@ -687,26 +687,26 @@ class ExtractIvectorsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.corpus.features.extract_ivectors_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] ivector_options: MetaDict - ali_paths: dict[str, str] + ali_paths: Dict[str, str] ie_path: str - ivector_paths: dict[str, str] - weight_paths: dict[str, str] + ivector_paths: Dict[str, str] + weight_paths: Dict[str, str] model_path: str dubm_path: str def extract_ivectors_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], ivector_options: MetaDict, - ali_paths: dict[str, str], + ali_paths: Dict[str, str], ie_path: str, - ivector_paths: dict[str, str], - weight_paths: dict[str, str], + ivector_paths: Dict[str, str], + weight_paths: Dict[str, str], model_path: str, dubm_path: str, ) -> None: diff --git a/montreal_forced_aligner/corpus/helper.py b/montreal_forced_aligner/corpus/helper.py index b9e5fa04..d58eee6b 100644 --- a/montreal_forced_aligner/corpus/helper.py +++ b/montreal_forced_aligner/corpus/helper.py @@ -4,14 +4,14 @@ import os import shutil import subprocess -from typing import Any, Optional, Union +from typing import Any, Dict, List, Optional, Tuple, Union import soundfile from montreal_forced_aligner.dictionary.mixins import SanitizeFunction from montreal_forced_aligner.exceptions import SoxError -SoundFileInfoDict = dict[str, Union[int, float, str]] +SoundFileInfoDict = Dict[str, Union[int, float, str]] supported_audio_extensions = [".flac", ".ogg", ".aiff", ".mp3"] @@ -37,7 +37,7 @@ def load_text(path: str) -> str: return text -def parse_transcription(text: str, sanitize_function=Optional[SanitizeFunction]) -> list[str]: +def parse_transcription(text: str, sanitize_function=Optional[SanitizeFunction]) -> List[str]: """ Parse an orthographic transcription given punctuation and clitic markers @@ -65,8 +65,8 @@ def parse_transcription(text: str, sanitize_function=Optional[SanitizeFunction]) def find_exts( - files: list[str], -) -> tuple[list[str], dict[str, str], dict[str, str], dict[str, str], dict[str, str]]: + files: List[str], +) -> Tuple[List[str], Dict[str, str], Dict[str, str], Dict[str, str], Dict[str, str]]: """ Find and group sound file extensions and transcription file extensions @@ -113,7 +113,7 @@ def find_exts( return identifiers, wav_files, lab_files, textgrid_files, other_audio_files -def get_wav_info(file_path: str) -> dict[str, Any]: +def get_wav_info(file_path: str) -> Dict[str, Any]: """ Get sound file information diff --git a/montreal_forced_aligner/corpus/ivector_corpus.py b/montreal_forced_aligner/corpus/ivector_corpus.py index 687944d3..edeec75c 100644 --- a/montreal_forced_aligner/corpus/ivector_corpus.py +++ b/montreal_forced_aligner/corpus/ivector_corpus.py @@ -1,5 +1,6 @@ """Classes for corpora that use ivectors as features""" import os +from typing import List from montreal_forced_aligner.corpus.acoustic_corpus import AcousticCorpusMixin from montreal_forced_aligner.corpus.features import ( @@ -50,10 +51,10 @@ def _write_utt2spk(self): with open( os.path.join(self.corpus_output_directory, "utt2spk.scp"), "w", encoding="utf8" ) as f: - for utterance in self.utterances.values(): + for utterance in self.utterances: f.write(f"{utterance.name} {utterance.speaker.name}\n") - def extract_ivectors_arguments(self) -> list[ExtractIvectorsArguments]: + def extract_ivectors_arguments(self) -> List[ExtractIvectorsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.corpus.features.extract_ivectors_func` diff --git a/montreal_forced_aligner/corpus/multiprocessing.py b/montreal_forced_aligner/corpus/multiprocessing.py index 20ccdd6f..44b953d4 100644 --- a/montreal_forced_aligner/corpus/multiprocessing.py +++ b/montreal_forced_aligner/corpus/multiprocessing.py @@ -11,8 +11,13 @@ import sys import traceback from queue import Empty -from typing import TYPE_CHECKING, Collection, Optional, Union +from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Set, Union +from montreal_forced_aligner.corpus.classes import ( + FileCollection, + SpeakerCollection, + UtteranceCollection, +) from montreal_forced_aligner.exceptions import TextGridParseError, TextParseError from montreal_forced_aligner.helper import output_mapping @@ -21,11 +26,11 @@ from montreal_forced_aligner.abc import OneToManyMappingType, OneToOneMappingType from montreal_forced_aligner.corpus.helper import SoundFileInfoDict - FileInfoDict = dict[ + FileInfoDict = Dict[ str, Union[str, SoundFileInfoDict, OneToOneMappingType, OneToManyMappingType] ] from montreal_forced_aligner.abc import MappingType, ReversedMappingType, WordsType - from montreal_forced_aligner.corpus.classes import File, Speaker, Utterance + from montreal_forced_aligner.corpus.classes import Speaker from montreal_forced_aligner.dictionary import DictionaryData, PronunciationDictionaryMixin from montreal_forced_aligner.utils import Stopped @@ -126,19 +131,19 @@ class Job: """ name: int - speakers: list[Speaker] - subset_utts: set[Utterance] - subset_speakers: set[Speaker] - dictionaries: set[PronunciationDictionaryMixin] - subset_dictionaries: set[PronunciationDictionaryMixin] + speakers: SpeakerCollection + subset_utts: UtteranceCollection + subset_speakers: SpeakerCollection + dictionaries: Set[PronunciationDictionaryMixin] + subset_dictionaries: Set[PronunciationDictionaryMixin] def __init__(self, name: int): self.name = name - self.speakers = [] + self.speakers = SpeakerCollection() self.dictionaries = set() - self.subset_utts = set() - self.subset_speakers = set() + self.subset_utts = UtteranceCollection() + self.subset_speakers = SpeakerCollection() self.subset_dictionaries = set() def add_speaker(self, speaker: Speaker) -> None: @@ -150,10 +155,10 @@ def add_speaker(self, speaker: Speaker) -> None: speaker: :class:`~montreal_forced_aligner.corpus.classes.Speaker` Speaker to add """ - self.speakers.append(speaker) + self.speakers.add_speaker(speaker) self.dictionaries.add(speaker.dictionary) - def set_subset(self, subset_utts: Optional[Collection[Utterance]]) -> None: + def set_subset(self, subset_utts: Optional[UtteranceCollection]) -> None: """ Set the current subset for the trainer @@ -162,16 +167,18 @@ def set_subset(self, subset_utts: Optional[Collection[Utterance]]) -> None: subset_utts: Collection[:class:`~montreal_forced_aligner.corpus.classes.Utterance`], optional Subset of utterances for this job to use """ - if subset_utts is None: - self.subset_utts = set() - self.subset_speakers = set() - self.subset_dictionaries = set() - else: - self.subset_utts = set(u for u in subset_utts if u.speaker in self.speakers) - self.subset_speakers = {u.speaker for u in subset_utts if u.speaker in self.speakers} + self.subset_utts = UtteranceCollection() + self.subset_speakers = SpeakerCollection() + self.subset_dictionaries = set() + if subset_utts: + for u in subset_utts: + if u.speaker not in self.speakers: + continue + self.subset_utts.add_utterance(u) + self.subset_speakers.add_speaker(u.speaker) self.subset_dictionaries = {s.dictionary for s in self.subset_speakers} - def text_scp_data(self) -> dict[str, dict[str, list[str]]]: + def text_scp_data(self) -> Dict[str, Dict[str, List[str]]]: """ Generate the job's data for Kaldi's text scp files @@ -184,13 +191,13 @@ def text_scp_data(self) -> dict[str, dict[str, list[str]]]: utts = self.job_utts() for dict_name, utt_data in utts.items(): data[dict_name] = {} - for utt in utt_data.values(): + for utt in utt_data: if not utt.text: continue data[dict_name][utt.name] = " ".join(map(str, utt.text_for_scp())) return data - def text_int_scp_data(self) -> dict[str, dict[str, str]]: + def text_int_scp_data(self) -> Dict[str, Dict[str, str]]: """ Generate the job's data for Kaldi's text int scp files @@ -203,15 +210,16 @@ def text_int_scp_data(self) -> dict[str, dict[str, str]]: utts = self.job_utts() for dict_name, utt_data in utts.items(): data[dict_name] = {} - for utt in utt_data.values(): + for utt in utt_data: if utt.speaker.dictionary is None: continue if not utt.text: continue data[dict_name][utt.name] = " ".join(map(str, utt.text_int_for_scp())) + utt.speaker.dictionary.oovs_found.update(utt.oovs) return data - def wav_scp_data(self) -> dict[str, dict[str, str]]: + def wav_scp_data(self) -> Dict[str, Dict[str, str]]: """ Generate the job's data for Kaldi's wav scp files @@ -226,7 +234,7 @@ def wav_scp_data(self) -> dict[str, dict[str, str]]: for dict_name, utt_data in utts.items(): data[dict_name] = {} done[dict_name] = set() - for utt in utt_data.values(): + for utt in utt_data: if not utt.is_segment: data[dict_name][utt.name] = utt.file.for_wav_scp() elif utt.file.name not in done: @@ -234,7 +242,7 @@ def wav_scp_data(self) -> dict[str, dict[str, str]]: done[dict_name].add(utt.file.name) return data - def utt2spk_scp_data(self) -> dict[str, dict[str, str]]: + def utt2spk_scp_data(self) -> Dict[str, Dict[str, str]]: """ Generate the job's data for Kaldi's utt2spk scp files @@ -247,11 +255,11 @@ def utt2spk_scp_data(self) -> dict[str, dict[str, str]]: utts = self.job_utts() for dict_name, utt_data in utts.items(): data[dict_name] = {} - for utt in utt_data.values(): + for utt in utt_data: data[dict_name][utt.name] = utt.speaker_name return data - def feat_scp_data(self) -> dict[str, dict[str, str]]: + def feat_scp_data(self) -> Dict[str, Dict[str, str]]: """ Generate the job's data for Kaldi's feature scp files @@ -264,13 +272,13 @@ def feat_scp_data(self) -> dict[str, dict[str, str]]: utts = self.job_utts() for dict_name, utt_data in utts.items(): data[dict_name] = {} - for utt in utt_data.values(): + for utt in utt_data: if not utt.features: continue data[dict_name][utt.name] = utt.features return data - def spk2utt_scp_data(self) -> dict[str, dict[str, list[str]]]: + def spk2utt_scp_data(self) -> Dict[str, Dict[str, List[str]]]: """ Generate the job's data for Kaldi's spk2utt scp files @@ -283,7 +291,7 @@ def spk2utt_scp_data(self) -> dict[str, dict[str, list[str]]]: utts = self.job_utts() for dict_name, utt_data in utts.items(): data[dict_name] = {} - for utt in utt_data.values(): + for utt in utt_data: if utt.speaker.name not in data[dict_name]: data[dict_name][utt.speaker.name] = [] data[dict_name][utt.speaker.name].append(str(utt)) @@ -292,7 +300,7 @@ def spk2utt_scp_data(self) -> dict[str, dict[str, list[str]]]: data[k][s] = sorted(utts) return data - def cmvn_scp_data(self) -> dict[str, dict[str, str]]: + def cmvn_scp_data(self) -> Dict[str, Dict[str, str]]: """ Generate the job's data for Kaldi's CMVN scp files @@ -315,7 +323,7 @@ def cmvn_scp_data(self) -> dict[str, dict[str, str]]: data[key][s.name] = s.cmvn return data - def segments_scp_data(self) -> dict[str, dict[str, str]]: + def segments_scp_data(self) -> Dict[str, Dict[str, str]]: """ Generate the job's data for Kaldi's segments scp files @@ -328,7 +336,7 @@ def segments_scp_data(self) -> dict[str, dict[str, str]]: utts = self.job_utts() for dict_name, utt_data in utts.items(): data[dict_name] = {} - for utt in utt_data.values(): + for utt in utt_data: if not utt.is_segment: continue data[dict_name][utt.name] = utt.segment_for_scp() @@ -336,7 +344,7 @@ def segments_scp_data(self) -> dict[str, dict[str, str]]: def construct_path_dictionary( self, directory: str, identifier: str, extension: str - ) -> dict[str, str]: + ) -> Dict[str, str]: """ Helper function for constructing dictionary-dependent paths for the Job @@ -363,7 +371,7 @@ def construct_path_dictionary( def construct_dictionary_dependent_paths( self, directory: str, identifier: str, extension: str - ) -> dict[str, str]: + ) -> Dict[str, str]: """ Helper function for constructing paths that depend only on the dictionaries of the job, and not the job name itself. These paths should be merged with all other jobs to get a full set of dictionary paths. @@ -402,7 +410,7 @@ def current_dictionaries(self) -> Collection[PronunciationDictionaryMixin]: return self.dictionaries @property - def current_dictionary_names(self) -> list[Optional[str]]: + def current_dictionary_names(self) -> List[Optional[str]]: """Current dictionary names depending on whether a subset is being used""" if self.subset_dictionaries: return sorted(x.name for x in self.subset_dictionaries) @@ -410,7 +418,7 @@ def current_dictionary_names(self) -> list[Optional[str]]: return [None] return sorted(x.name for x in self.dictionaries) - def word_boundary_int_files(self) -> dict[str, str]: + def word_boundary_int_files(self) -> Dict[str, str]: """ Generate mapping for dictionaries to word boundary int files @@ -424,7 +432,7 @@ def word_boundary_int_files(self) -> dict[str, str]: data[dictionary.name] = os.path.join(dictionary.phones_dir, "word_boundary.int") return data - def reversed_phone_mappings(self) -> dict[str, ReversedMappingType]: + def reversed_phone_mappings(self) -> Dict[str, ReversedMappingType]: """ Generate mapping for dictionaries to reversed phone mapping @@ -438,7 +446,7 @@ def reversed_phone_mappings(self) -> dict[str, ReversedMappingType]: data[dictionary.name] = dictionary.reversed_phone_mapping return data - def reversed_word_mappings(self) -> dict[str, ReversedMappingType]: + def reversed_word_mappings(self) -> Dict[str, ReversedMappingType]: """ Generate mapping for dictionaries to reversed word mapping @@ -452,7 +460,7 @@ def reversed_word_mappings(self) -> dict[str, ReversedMappingType]: data[dictionary.name] = dictionary.reversed_word_mapping return data - def words_mappings(self) -> dict[str, MappingType]: + def words_mappings(self) -> Dict[str, MappingType]: """ Generate mapping for dictionaries to word mapping @@ -466,7 +474,7 @@ def words_mappings(self) -> dict[str, MappingType]: data[dictionary.name] = dictionary.words_mapping return data - def words(self) -> dict[str, WordsType]: + def words(self) -> Dict[str, WordsType]: """ Generate mapping for dictionaries to words @@ -494,7 +502,7 @@ def punctuation(self): data[dictionary.name] = dictionary.punctuation return data - def clitic_set(self) -> dict[str, set[str]]: + def clitic_set(self) -> Dict[str, Set[str]]: """ Generate mapping for dictionaries to clitic sets @@ -508,7 +516,7 @@ def clitic_set(self) -> dict[str, set[str]]: data[dictionary.name] = dictionary.clitic_set return data - def clitic_markers(self) -> dict[str, list[str]]: + def clitic_markers(self) -> Dict[str, List[str]]: """ Generate mapping for dictionaries to clitic markers @@ -522,7 +530,7 @@ def clitic_markers(self) -> dict[str, list[str]]: data[dictionary.name] = dictionary.clitic_markers return data - def compound_markers(self) -> dict[str, list[str]]: + def compound_markers(self) -> Dict[str, List[str]]: """ Generate mapping for dictionaries to compound markers @@ -536,7 +544,7 @@ def compound_markers(self) -> dict[str, list[str]]: data[dictionary.name] = dictionary.compound_markers return data - def strip_diacritics(self) -> dict[str, list[str]]: + def strip_diacritics(self) -> Dict[str, List[str]]: """ Generate mapping for dictionaries to diacritics to strip @@ -550,7 +558,7 @@ def strip_diacritics(self) -> dict[str, list[str]]: data[dictionary.name] = dictionary.strip_diacritics return data - def oov_codes(self) -> dict[str, str]: + def oov_codes(self) -> Dict[str, str]: """ Generate mapping for dictionaries to oov symbols @@ -564,7 +572,7 @@ def oov_codes(self) -> dict[str, str]: data[dictionary.name] = dictionary.oov_word return data - def oov_ints(self) -> dict[str, int]: + def oov_ints(self) -> Dict[str, int]: """ Generate mapping for dictionaries to oov ints @@ -578,7 +586,7 @@ def oov_ints(self) -> dict[str, int]: data[dictionary.name] = dictionary.oov_int return data - def positions(self) -> dict[str, list[str]]: + def positions(self) -> Dict[str, List[str]]: """ Generate mapping for dictionaries to positions @@ -592,7 +600,7 @@ def positions(self) -> dict[str, list[str]]: data[dictionary.name] = dictionary.positions return data - def silences(self) -> dict[str, set[str]]: + def silences(self) -> Dict[str, Set[str]]: """ Generate mapping for dictionaries to silence symbols @@ -606,7 +614,7 @@ def silences(self) -> dict[str, set[str]]: data[dictionary.name] = dictionary.silences return data - def multilingual_ipa(self) -> dict[str, bool]: + def multilingual_ipa(self) -> Dict[str, bool]: """ Generate mapping for dictionaries to multilingual IPA flags @@ -620,76 +628,79 @@ def multilingual_ipa(self) -> dict[str, bool]: data[dictionary.name] = dictionary.multilingual_ipa return data - def job_utts(self) -> dict[str, dict[str, Utterance]]: + def job_utts(self) -> Dict[str, UtteranceCollection]: """ Generate utterances by dictionary name for the Job Returns ------- - dict[str, dict[str, :class:`~montreal_forced_aligner.corpus.classes.Utterance`]] + dict[str, :class:`~montreal_forced_aligner.corpus.classes.UtteranceCollection`] Mapping of dictionary name to Utterance mappings """ data = {} if self.subset_utts: utterances = self.subset_utts else: - utterances = set() + utterances = UtteranceCollection() for s in self.speakers: - utterances.update(s.utterances.values()) + utterances.update(s.utterances) for u in utterances: if u.ignored: continue - if u.speaker.dictionary is None: + dictionary = self.speakers[u.speaker.name].dictionary + if dictionary is None: dict_name = None else: - dict_name = u.speaker.dictionary.name + u.speaker.dictionary = dictionary + u.speaker.dictionary_name = dictionary.name + u.speaker.dictionary_data = self.speakers[u.speaker.name].dictionary_data + dict_name = dictionary.name if dict_name not in data: - data[dict_name] = {} - data[dict_name][u.name] = u - + data[dict_name] = UtteranceCollection() + data[dict_name].add_utterance(u) return data - def job_files(self) -> dict[str, File]: + def job_files(self) -> FileCollection: """ Generate files for the Job Returns ------- - dict[str, :class:`~montreal_forced_aligner.corpus.classes.File`] - Mapping of file name to File objects + :class:`~montreal_forced_aligner.corpus.classes.FileCollection` + Collection of files """ - data = {} + data = FileCollection() if self.subset_utts: utterances = self.subset_utts else: utterances = set() for s in self.speakers: - utterances.update(s.utterances.values()) + utterances.update(s.utterances) for u in utterances: if u.ignored: continue - data[u.file_name] = u.file + data.add_file(u.file) return data - def job_speakers(self) -> dict[str, Speaker]: + def job_speakers(self) -> SpeakerCollection: """ - Generate files for the Job + Generate speakers for the Job Returns ------- - dict[str, :class:`~montreal_forced_aligner.corpus.classes.Speaker`] - Mapping of file name to File objects + :class:`~montreal_forced_aligner.corpus.classes.SpeakerCollection` + Collection of speakers """ - data = {} + data = SpeakerCollection() if self.subset_speakers: speakers = self.subset_speakers else: speakers = self.speakers for s in speakers: - data[s.name] = s + data.add_speaker(s) return data - def dictionary_data(self) -> dict[str, DictionaryData]: + def dictionary_data(self) -> Dict[str, DictionaryData]: """ Generate dictionary data for the job @@ -753,7 +764,7 @@ def output_to_directory(self, split_directory: str) -> None: if not scp: continue text_scp_path = os.path.join(split_directory, f"text.{dict_name}.{self.name}.scp") - output_mapping(scp, text_scp_path) + output_mapping(scp, text_scp_path, skip_safe=True) text_int = self.text_int_scp_data() for dict_name, scp in text_int.items(): diff --git a/montreal_forced_aligner/corpus/text_corpus.py b/montreal_forced_aligner/corpus/text_corpus.py index 474f2a33..c5f22132 100644 --- a/montreal_forced_aligner/corpus/text_corpus.py +++ b/montreal_forced_aligner/corpus/text_corpus.py @@ -225,7 +225,7 @@ def load_corpus(self) -> None: self.set_lexicon_word_set(self.corpus_word_set) self.write_lexicon_information() - for speaker in self.speakers.values(): + for speaker in self.speakers: speaker.set_dictionary(self.get_dictionary(speaker.name)) self.initialize_jobs() self.write_corpus_information() diff --git a/montreal_forced_aligner/dictionary/mixins.py b/montreal_forced_aligner/dictionary/mixins.py index 2a535f8f..ff695577 100644 --- a/montreal_forced_aligner/dictionary/mixins.py +++ b/montreal_forced_aligner/dictionary/mixins.py @@ -7,7 +7,7 @@ import re from collections import Counter from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple from montreal_forced_aligner.abc import TemporaryDirectoryMixin from montreal_forced_aligner.data import CtmInterval @@ -50,10 +50,10 @@ class SanitizeFunction: def __init__( self, - punctuation: list[str], - clitic_markers: list[str], - compound_markers: list[str], - brackets: list[tuple[str, str]], + punctuation: List[str], + clitic_markers: List[str], + compound_markers: List[str], + brackets: List[Tuple[str, str]], ): self.punctuation = punctuation self.clitic_markers = clitic_markers @@ -101,12 +101,12 @@ class SplitWordsFunction: def __init__( self, - punctuation: list[str], - clitic_markers: list[str], - compound_markers: list[str], - brackets: list[tuple[str, str]], - clitic_set: set[str], - word_set: Optional[set[str]] = None, + punctuation: List[str], + clitic_markers: List[str], + compound_markers: List[str], + brackets: List[Tuple[str, str]], + clitic_set: Set[str], + word_set: Optional[Set[str]] = None, ): self.punctuation = punctuation self.clitic_markers = clitic_markers @@ -142,7 +142,7 @@ def __init__( def split_clitics( self, item: str, - ) -> list[str]: + ) -> List[str]: """ Split a word into subwords based on dictionary information @@ -178,7 +178,7 @@ def split_clitics( def __call__( self, item: str, - ) -> list[str]: + ) -> List[str]: """ Return the list of sub words if necessary taking into account clitic and compound markers @@ -251,31 +251,34 @@ class DictionaryMixin: Maximum number of disambiguation symbols required, defaults to 0 """ - positions: list[str] = ["_B", "_E", "_I", "_S"] + positions: List[str] = ["_B", "_E", "_I", "_S"] def __init__( self, oov_word: str = "", - silence_word: str = "!sil", - nonoptional_silence_phone: str = "sil", - optional_silence_phone: str = "sp", + silence_word: str = "", + noise_word: str = "", + optional_silence_phone: str = "sil", oov_phone: str = "spn", - other_noise_phone: str = "spn", + other_noise_phone: str = "noi", position_dependent_phones: bool = True, num_silence_states: int = 5, + num_noise_states: int = 5, num_non_silence_states: int = 3, - shared_silence_phones: bool = True, + shared_silence_phones: bool = False, silence_probability: float = 0.5, - punctuation: list[str] = None, - clitic_markers: list[str] = None, - compound_markers: list[str] = None, + punctuation: List[str] = None, + phone_set_type: Optional[str] = None, + base_phone_regex: Optional[str] = None, + clitic_markers: List[str] = None, + compound_markers: List[str] = None, multilingual_ipa: bool = False, - strip_diacritics: list[str] = None, - digraphs: list[str] = None, - brackets: list[tuple[str, str]] = None, - non_silence_phones: set[str] = None, - disambiguation_symbols: set[str] = None, - clitic_set: set[str] = None, + strip_diacritics: List[str] = None, + digraphs: List[str] = None, + brackets: List[Tuple[str, str]] = None, + non_silence_phones: Set[str] = None, + disambiguation_symbols: Set[str] = None, + clitic_set: Set[str] = None, max_disambiguation_symbol: int = 0, **kwargs, ): @@ -301,14 +304,19 @@ def __init__( self.multilingual_ipa = multilingual_ipa self.num_silence_states = num_silence_states + self.num_noise_states = num_noise_states self.num_non_silence_states = num_non_silence_states self.shared_silence_phones = shared_silence_phones self.silence_probability = silence_probability self.oov_word = oov_word self.silence_word = silence_word + self.noise_word = noise_word + if base_phone_regex is not None: + base_phone_regex = re.compile(base_phone_regex) + self.base_phone_regex: re.Pattern = base_phone_regex + self.phone_set_type = phone_set_type self.position_dependent_phones = position_dependent_phones self.optional_silence_phone = optional_silence_phone - self.nonoptional_silence_phone = nonoptional_silence_phone self.oov_phone = oov_phone self.oovs_found = Counter() self.other_noise_phone = other_noise_phone @@ -323,6 +331,38 @@ def __init__( clitic_set = set() self.clitic_set = clitic_set + @property + def extra_questions_mapping(self) -> Dict[str, List[str]]: + mapping = {} + mapping["silence_question"] = [] + for p in sorted(self.silence_phones): + mapping["silence_question"].append(p) + if self.position_dependent_phones: + mapping["silence_question"].extend([p + x for x in self.positions]) + if self.phone_set_type == "ARPA": + mapping["non_silence_arpa_questions"] = [] + for p in self.kaldi_grouped_phones.keys(): + if self.position_dependent_phones: + mapping["non_silence_arpa_questions"].extend([p + x for x in self.positions]) + else: + mapping["non_silence_arpa_questions"].append(p) + # extra stress questions + for i in range(3): + mapping[f"stress_{i}"] = [] + for p in self.kaldi_non_silence_phones: + if str(i) not in p: + continue + mapping[f"stress_{i}"].append(p) + if self.position_dependent_phones: + phones = sorted(self.non_silence_phones) + for pos in self.positions: + mapping[f"non_silence{pos}"] = [x + pos for x in phones] + silence_phones = sorted(self.silence_phones) + for pos in [""] + self.positions: + mapping[f"silence{pos}"] = [x + pos for x in silence_phones] + + return mapping + @property def dictionary_options(self) -> MetaDict: """Dictionary options""" @@ -343,7 +383,6 @@ def dictionary_options(self) -> MetaDict: "silence_word": self.silence_word, "position_dependent_phones": self.position_dependent_phones, "optional_silence_phone": self.optional_silence_phone, - "nonoptional_silence_phone": self.nonoptional_silence_phone, "oov_phone": self.oov_phone, "other_noise_phone": self.other_noise_phone, "non_silence_phones": self.non_silence_phones, @@ -355,37 +394,32 @@ def dictionary_options(self) -> MetaDict: def silence_phones(self): """Silence phones""" return { - self.oov_phone, self.optional_silence_phone, - self.nonoptional_silence_phone, + self.oov_phone, self.other_noise_phone, } + @property + def context_independent_csl(self): + return ":".join(str(self.phone_mapping[x]) for x in self.silence_phones) + @property def specials_set(self): """Special words, like the ``oov_word`` ``silence_word``, ````, ````, and ````""" - return {self.oov_word, self.silence_word, "", "", ""} + return {self.oov_word, self.silence_word, self.noise_word, "", "", ""} @property - def phone_mapping(self) -> dict[str, int]: + def phone_mapping(self) -> Dict[str, int]: """Mapping of phones to integer IDs""" phone_mapping = {} i = 0 phone_mapping[""] = i - if self.position_dependent_phones: - for p in self.positional_silence_phones: - i += 1 - phone_mapping[p] = i - for p in self.positional_non_silence_phones: - i += 1 - phone_mapping[p] = i - else: - for p in sorted(self.silence_phones): - i += 1 - phone_mapping[p] = i - for p in sorted(self.non_silence_phones): - i += 1 - phone_mapping[p] = i + for p in self.kaldi_silence_phones: + i += 1 + phone_mapping[p] = i + for p in self.kaldi_non_silence_phones: + i += 1 + phone_mapping[p] = i i = max(phone_mapping.values()) for x in range(self.max_disambiguation_symbol + 2): p = f"#{x}" @@ -395,7 +429,7 @@ def phone_mapping(self) -> dict[str, int]: return phone_mapping @property - def positional_silence_phones(self) -> list[str]: + def positional_silence_phones(self) -> List[str]: """ List of silence phones with positions """ @@ -407,16 +441,70 @@ def positional_silence_phones(self) -> list[str]: return silence_phones @property - def positional_non_silence_phones(self) -> list[str]: + def positional_non_silence_phones(self) -> List[str]: """ List of non-silence phones with positions """ non_silence_phones = [] for p in sorted(self.non_silence_phones): + if self.phone_set_type == "ARPA": + m = re.match(self.base_phone_regex, p) + if m: + base_phone = m.group(0) + for pos in self.positions: + pos_p = base_phone + pos + if pos_p not in non_silence_phones: + non_silence_phones.append(pos_p) for pos in self.positions: - non_silence_phones.append(p + pos) + pos_p = p + pos + if pos_p not in non_silence_phones: + non_silence_phones.append(pos_p) return non_silence_phones + @property + def kaldi_non_silence_phones(self): + """Non silence phones in Kaldi format""" + if self.position_dependent_phones: + return self.positional_non_silence_phones + base_phones = set() + if self.phone_set_type == "ARPA": + for p in self.non_silence_phones: + m = re.match(self.base_phone_regex, p) + if m: + base_phone = m.group(0) + base_phones.add(base_phone) + + return sorted(self.non_silence_phones | base_phones) + + @property + def kaldi_grouped_phones(self) -> Dict[str, List[str]]: + """Non silence phones in Kaldi format""" + groups = {} + for p in sorted(self.non_silence_phones): + if self.phone_set_type == "ARPA": + m = re.match(self.base_phone_regex, p) + if m: + base_phone = m.group(0) + if base_phone not in groups: + groups[base_phone] = [] + if self.position_dependent_phones: + groups[base_phone] = [base_phone + pos for pos in self.positions] + else: + groups[base_phone] = [base_phone] + if base_phone == p: + continue + if self.position_dependent_phones: + groups[base_phone].extend([p + pos for pos in self.positions]) + else: + groups[base_phone].append(p) + else: + if self.position_dependent_phones: + groups[p] = [p + pos for pos in self.positions] + else: + groups[p] = [p] + + return groups + @property def kaldi_silence_phones(self): """Silence phones in Kaldi format""" @@ -440,13 +528,6 @@ def save_oovs_found(self, directory: str) -> None: f.write(oov + "\n") cf.write(f"{oov}\t{self.oovs_found[oov]}\n") - @property - def kaldi_non_silence_phones(self): - """Non silence phones in Kaldi format""" - if self.position_dependent_phones: - return self.positional_non_silence_phones - return sorted(self.non_silence_phones) - @property def optional_silence_csl(self) -> str: """ @@ -461,6 +542,13 @@ def silence_csl(self) -> str: """ return ":".join(map(str, (self.phone_mapping[x] for x in self.kaldi_silence_phones))) + @property + def non_silence_csl(self) -> str: + """ + A colon-separated string of non-silence phone ids + """ + return ":".join(map(str, (self.phone_mapping[x] for x in self.kaldi_non_silence_phones))) + @property def phones(self) -> set: """ @@ -518,7 +606,7 @@ def sanitize(self, item: str) -> str: """ return self.construct_sanitize_function()(item) - def parse_ipa(self, transcription: list[str]) -> tuple[str, ...]: + def parse_ipa(self, transcription: List[str]) -> Tuple[str, ...]: """ Parse a transcription in a multilingual IPA format (strips out diacritics and splits digraphs). @@ -602,50 +690,50 @@ def _write_topo(self) -> None: topo_transition_template.format(self.num_silence_states - 1, 0.75), topo_transition_template.format(self.num_silence_states, 0.25), ] + states = [] + for i in range(self.num_non_silence_states): + states.append(topo_template.format(cur_state=i, next_state=i + 1)) + states.append(f" {self.num_non_silence_states} ") + non_silence_state_string = "\n".join(states) + + states = [] + for i in range(self.num_silence_states): + if i == 0: + transition = " ".join(initial_transition) + elif i == self.num_silence_states - 1: + transition = " ".join(final_transition) + else: + transition = " ".join(middle_transition) + states.append(topo_sil_template.format(cur_state=i, transitions=transition)) + states.append(f" {self.num_silence_states} ") + silence_state_string = "\n".join(states) + with open(self.topo_path, "w") as f: - f.write("\n") - f.write("\n") - f.write("\n") - phones = self.kaldi_non_silence_phones - f.write(f"{' '.join(str(self.phone_mapping[x]) for x in phones)}\n") - f.write("\n") - states = [ - topo_template.format(cur_state=x, next_state=x + 1) - for x in range(self.num_non_silence_states) - ] - f.write("\n".join(states)) - f.write(f"\n {self.num_non_silence_states} \n") - f.write("\n") - - f.write("\n") - f.write("\n") - - phones = self.kaldi_silence_phones - f.write(f"{' '.join(str(self.phone_mapping[x]) for x in phones)}\n") - f.write("\n") - states = [] - for i in range(self.num_silence_states): - if i == 0: - transition = " ".join(initial_transition) - elif i == self.num_silence_states - 1: - transition = " ".join(final_transition) - else: - transition = " ".join(middle_transition) - states.append(topo_sil_template.format(cur_state=i, transitions=transition)) - f.write("\n".join(states)) - f.write(f"\n {self.num_silence_states} \n") - f.write("\n") - f.write("\n") + f.write( + f""" + + + + {' '.join(str(self.phone_mapping[x]) for x in self.kaldi_silence_phones)} + + {silence_state_string} + + + + + + {' '.join(str(self.phone_mapping[x]) for x in self.kaldi_non_silence_phones)} + + {non_silence_state_string} + + + """ + ) def _write_phone_sets(self) -> None: """ Write phone symbol sets to the temporary directory """ - sharesplit = ["shared", "split"] - if not self.shared_silence_phones: - sil_sharesplit = ["not-shared", "not-split"] - else: - sil_sharesplit = sharesplit sets_file = os.path.join(self.dictionary_output_directory, "phones", "sets.txt") roots_file = os.path.join(self.dictionary_output_directory, "phones", "roots.txt") @@ -660,34 +748,37 @@ def _write_phone_sets(self) -> None: ) as rootintf: # process silence phones - for i, sp in enumerate(self.silence_phones): - if self.position_dependent_phones: - mapped = [sp + x for x in [""] + self.positions] - else: - mapped = [sp] - setf.write(" ".join(mapped) + "\n") - setintf.write(" ".join(map(str, (self.phone_mapping[x] for x in mapped))) + "\n") - if i == 0: - line = sil_sharesplit + mapped - lineint = sil_sharesplit + [str(self.phone_mapping[x]) for x in mapped] - else: - line = sharesplit + mapped - lineint = sharesplit + [str(self.phone_mapping[x]) for x in mapped] - rootf.write(" ".join(line) + "\n") - rootintf.write(" ".join(lineint) + "\n") + if self.shared_silence_phones: + phone_string = " ".join(self.kaldi_silence_phones) + phone_int_string = " ".join( + str(self.phone_mapping[x]) for x in self.kaldi_silence_phones + ) + setf.write(f"{phone_string}\n") + setintf.write(f"{phone_int_string}\n") + rootf.write(f"not-shared not-split {phone_string}\n") + rootintf.write(f"not-shared not-split {phone_int_string}\n") + else: + for sp in self.silence_phones: + if self.position_dependent_phones: + mapped = [sp + x for x in [""] + self.positions] + else: + mapped = [sp] + phone_string = " ".join(mapped) + phone_int_string = " ".join(str(self.phone_mapping[x]) for x in mapped) + setf.write(f"{phone_string}\n") + setintf.write(f"{phone_int_string}\n") + rootf.write(f"shared split {phone_string}\n") + rootintf.write(f"shared split {phone_int_string}\n") # process nonsilence phones - for nsp in sorted(self.non_silence_phones): - if self.position_dependent_phones: - mapped = [nsp + x for x in self.positions] - else: - mapped = [nsp] - setf.write(" ".join(mapped) + "\n") - setintf.write(" ".join(map(str, (self.phone_mapping[x] for x in mapped))) + "\n") - line = sharesplit + mapped - lineint = sharesplit + [str(self.phone_mapping[x]) for x in mapped] - rootf.write(" ".join(line) + "\n") - rootintf.write(" ".join(lineint) + "\n") + for group in self.kaldi_grouped_phones.values(): + + phone_string = " ".join(group) + phone_int_string = " ".join(str(self.phone_mapping[x]) for x in group) + setf.write(f"{phone_string}\n") + setintf.write(f"{phone_int_string}\n") + rootf.write(f"shared split {phone_string}\n") + rootintf.write(f"shared split {phone_int_string}\n") @property def phone_symbol_table_path(self): @@ -731,22 +822,10 @@ def _write_extra_questions(self) -> None: with open(phone_extra, "w", encoding="utf8") as outf, open( phone_extra_int, "w", encoding="utf8" ) as intf: - silences = self.kaldi_silence_phones - outf.write(" ".join(silences) + "\n") - intf.write(" ".join(str(self.phone_mapping[x]) for x in silences) + "\n") - - non_silences = self.kaldi_non_silence_phones - outf.write(" ".join(non_silences) + "\n") - intf.write(" ".join(str(self.phone_mapping[x]) for x in non_silences) + "\n") - if self.position_dependent_phones: - for p in self.positions: - line = [x + p for x in sorted(self.non_silence_phones)] - outf.write(" ".join(line) + "\n") - intf.write(" ".join(str(self.phone_mapping[x]) for x in line) + "\n") - for p in [""] + self.positions: - line = [x + p for x in sorted(self.silence_phones)] - outf.write(" ".join(line) + "\n") - intf.write(" ".join(str(self.phone_mapping[x]) for x in line) + "\n") + for v in self.extra_questions_mapping.values(): + outf.write(f"{' '.join(v)}\n") + intf.write(f"{' '.join(str(self.phone_mapping[x]) for x in v)}\n") + # error def _write_disambig(self) -> None: """ @@ -761,25 +840,6 @@ def _write_disambig(self) -> None: outf.write(f"{d}\n") intf.write(f"{self.phone_mapping[d]}\n") - def _write_phone_map_file(self) -> None: - """ - Write the phone map to the temporary directory - """ - outfile = os.path.join(self.phones_dir, "phone_map.txt") - with open(outfile, "w", encoding="utf8") as f: - for sp in self.silence_phones: - if self.position_dependent_phones: - new_phones = [sp + x for x in ["", ""] + self.positions] - else: - new_phones = [sp] - f.write(" ".join(new_phones) + "\n") - for nsp in self.non_silence_phones: - if self.position_dependent_phones: - new_phones = [nsp + x for x in [""] + self.positions] - else: - new_phones = [nsp] - f.write(" ".join(new_phones) + "\n") - @dataclass class DictionaryData: @@ -808,40 +868,55 @@ class DictionaryData: words_mapping: MappingType reversed_words_mapping: ReversedMappingType words: WordsType - lookup_cache: dict[str, list[str]] + lookup_cache: Dict[str, List[str]] @property def oov_word(self) -> str: """Out of vocabulary code""" return self.dictionary_options["oov_word"] + @property + def oov_phone(self) -> str: + """Out of vocabulary code""" + return self.dictionary_options["oov_phone"] + + @property + def other_noise_phone(self) -> str: + """Out of vocabulary code""" + return self.dictionary_options["other_noise_phone"] + + @property + def optional_silence_phone(self) -> str: + """Out of vocabulary code""" + return self.dictionary_options["optional_silence_phone"] + @property def oov_int(self) -> int: """Out of vocabulary integer ID""" return self.words_mapping[self.oov_word] @property - def compound_markers(self) -> list[str]: + def compound_markers(self) -> List[str]: """Characters that separate compound words""" return self.dictionary_options["compound_markers"] @property - def clitic_markers(self) -> list[str]: + def clitic_markers(self) -> List[str]: """Characters that mark clitics""" return self.dictionary_options["clitic_markers"] @property - def clitic_set(self) -> set[str]: + def clitic_set(self) -> Set[str]: """Set of clitics""" return self.dictionary_options["clitic_set"] @property - def punctuation(self) -> list[str]: + def punctuation(self) -> List[str]: """Characters to treat as punctuation""" return self.dictionary_options["punctuation"] @property - def strip_diacritics(self) -> list[str]: + def strip_diacritics(self) -> List[str]: """IPA diacritics to strip in multilingual IPA mode""" return self.dictionary_options["strip_diacritics"] @@ -851,19 +926,18 @@ def multilingual_ipa(self) -> bool: return self.dictionary_options["multilingual_ipa"] @property - def silence_phones(self) -> set[str]: + def silence_phones(self) -> Set[str]: """Silence phones""" return { - self.dictionary_options["oov_phone"], self.dictionary_options["optional_silence_phone"], - self.dictionary_options["nonoptional_silence_phone"], + self.dictionary_options["oov_phone"], self.dictionary_options["other_noise_phone"], } def lookup( self, item: str, - ) -> list[str]: + ) -> List[str]: """ Look up a word and return the list of sub words if necessary taking into account clitic and compound markers @@ -899,7 +973,7 @@ def lookup( def to_int( self, item: str, - ) -> list[int]: + ) -> List[int]: """ Convert a given word into integer IDs @@ -955,8 +1029,8 @@ def check_word(self, item: str) -> bool: return False def map_to_original_pronunciation( - self, phones: list[CtmInterval], subpronunciations: list[DictionaryEntryType] - ) -> list[CtmInterval]: + self, phones: List[CtmInterval], subpronunciations: List[DictionaryEntryType] + ) -> List[CtmInterval]: """ Convert phone transcriptions from multilingual IPA mode to their original IPA transcription diff --git a/montreal_forced_aligner/dictionary/multispeaker.py b/montreal_forced_aligner/dictionary/multispeaker.py index b14d318b..e7c0108c 100644 --- a/montreal_forced_aligner/dictionary/multispeaker.py +++ b/montreal_forced_aligner/dictionary/multispeaker.py @@ -4,7 +4,7 @@ import abc import os -from typing import TYPE_CHECKING, Collection, Optional, Union +from typing import TYPE_CHECKING, Collection, Dict, Optional, Union from montreal_forced_aligner.dictionary.mixins import TemporaryDictionaryMixin from montreal_forced_aligner.dictionary.pronunciation import PronunciationDictionary @@ -49,8 +49,10 @@ class MultispeakerDictionaryMixin(TemporaryDictionaryMixin, metaclass=abc.ABCMet def __init__(self, dictionary_path: str = None, **kwargs): super().__init__(**kwargs) self.dictionary_model = DictionaryModel(dictionary_path) + self.base_phone_regex = self.dictionary_model.base_phone_regex + self.phone_set_type = self.dictionary_model.phone_set_type self.speaker_mapping = {} - self.dictionary_mapping: dict[str, PronunciationDictionary] = {} + self.dictionary_mapping: Dict[str, PronunciationDictionary] = {} def dictionary_setup(self): """Setup the dictionary for processing""" @@ -78,6 +80,7 @@ def calculate_oovs_found(self) -> None: """Sum the counts of oovs found in pronunciation dictionaries""" for dictionary in self.dictionary_mapping.values(): self.oovs_found.update(dictionary.oovs_found) + self.save_oovs_found(self.output_directory) @property def default_dictionary(self) -> PronunciationDictionary: @@ -135,7 +138,6 @@ def write_lexicon_information(self, write_disambiguation: Optional[bool] = False if d.max_disambiguation_symbol > self.max_disambiguation_symbol: self.max_disambiguation_symbol = d.max_disambiguation_symbol self._write_word_boundaries() - self._write_phone_map_file() self._write_phone_sets() self._write_phone_symbol_table() self._write_disambig() @@ -157,7 +159,7 @@ def set_lexicon_word_set(self, word_set: Collection[str]) -> None: d.set_lexicon_word_set(word_set) @property - def output_paths(self) -> dict[str, str]: + def output_paths(self) -> Dict[str, str]: """ Mapping of output directory for child directories """ diff --git a/montreal_forced_aligner/dictionary/pronunciation.py b/montreal_forced_aligner/dictionary/pronunciation.py index 39d320ff..47faaefc 100644 --- a/montreal_forced_aligner/dictionary/pronunciation.py +++ b/montreal_forced_aligner/dictionary/pronunciation.py @@ -7,7 +7,7 @@ import subprocess import sys from collections import Counter, defaultdict -from typing import TYPE_CHECKING, Any, Collection, Optional +from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Set, Tuple if TYPE_CHECKING: from montreal_forced_aligner.dictionary.multispeaker import MultispeakerDictionaryMixin @@ -62,16 +62,24 @@ class PronunciationDictionaryMixin(TemporaryDictionaryMixin): """ def __init__(self, dictionary_path, root_dictionary=None, **kwargs): - super().__init__(**kwargs) self.dictionary_model = DictionaryModel(dictionary_path) + super().__init__(**kwargs) + self.base_phone_regex = self.dictionary_model.base_phone_regex + self.phone_set_type = self.dictionary_model.phone_set_type self.root_dictionary = root_dictionary os.makedirs(self.dictionary_output_directory, exist_ok=True) self.words = {} self.graphemes = set() - self.words[self.silence_word] = [ - {"pronunciation": (self.nonoptional_silence_phone,), "probability": 1} + self.words[""] = [ + { + "pronunciation": (self.optional_silence_phone,), + "probability": 1, + "disambiguation": None, + } + ] + self.words[self.oov_word] = [ + {"pronunciation": (self.oov_phone,), "probability": 1, "disambiguation": None} ] - self.words[self.oov_word] = [{"pronunciation": (self.oov_phone,), "probability": 1}] self.lookup_cache = {} self.int_cache = {} self.check_cache = {} @@ -86,7 +94,7 @@ def __init__(self, dictionary_path, root_dictionary=None, **kwargs): raise DictionaryError( f"Line {i} of {self.dictionary_model.path} does not have a pronunciation." ) - if word in [self.silence_word, self.oov_word]: + if word in self.specials_set: continue self.graphemes.update(word) prob = 1 @@ -153,12 +161,17 @@ def dictionary_output_directory(self) -> str: return os.path.join(self.temporary_directory, self.name) @property - def silences(self) -> set[str]: + def silences(self) -> Set[str]: """ Set of symbols that correspond to silence """ return self.silence_phones + @property + def actual_words(self): + """Words in the dictionary stripping out Kaldi's internal words""" + return {k: v for k, v in self.words.items() if k not in self.specials_set} + def data(self, word_set: Optional[Collection[str]] = None) -> DictionaryData: """ Generates a dictionary data for use in parsing utilities @@ -173,6 +186,12 @@ def data(self, word_set: Optional[Collection[str]] = None) -> DictionaryData: DictionaryData Data necessary for parsing text """ + if ( + self._dictionary_data is not None + and word_set is None + and self._dictionary_data.words_mapping + ): + return self._dictionary_data def word_check(word): """Check whether a word should be included in the output""" @@ -243,7 +262,7 @@ def set_lexicon_word_set(self, word_set: Collection[str]) -> None: self._dictionary_data = self.data(self.lexicon_word_set) self.generate_mappings() - def split_clitics(self, item: str) -> list[str]: + def split_clitics(self, item: str) -> List[str]: """ Split a word into subwords based on clitic and compound markers @@ -345,7 +364,7 @@ def add_disambiguation(self) -> None: self.max_disambiguation_symbol, max(last_used.values()) ) - def create_utterance_fst(self, text: list[str], frequent_words: list[tuple[str, int]]) -> str: + def create_utterance_fst(self, text: List[str], frequent_words: List[Tuple[str, int]]) -> str: """ Create an FST for an utterance with frequent words as a unigram language model @@ -372,7 +391,7 @@ def create_utterance_fst(self, text: list[str], frequent_words: list[tuple[str, fst_text += f"0 {-1 * math.log(1 / num_words)}\n" return fst_text - def to_int(self, item: str) -> list[int]: + def to_int(self, item: str) -> List[int]: """ Convert a given word into integer IDs @@ -387,10 +406,10 @@ def to_int(self, item: str) -> list[int]: List of integer IDs corresponding to each subword """ if item not in self.int_cache: - self.int_cache[item] = self._dictionary_data.to_int(item) + self.int_cache[item] = self.data().to_int(item) return self.int_cache[item] - def _lookup(self, item: str) -> list[str]: + def _lookup(self, item: str) -> List[str]: """ Look up a word and return the list of sub words if necessary taking into account clitic and compound markers @@ -427,7 +446,7 @@ def check_word(self, item: str) -> bool: True if the look up would not result in an OOV item """ if item not in self.check_cache: - self.check_cache[item] = self._dictionary_data.check_word(item) + self.check_cache[item] = self.data().check_word(item) return self.check_cache[item] @property @@ -440,6 +459,13 @@ def reversed_word_mapping(self) -> ReversedMappingType: mapping[v] = k return mapping + @property + def phone_mapping(self) -> Dict[str, int]: + """Mapping of phones to integer IDs""" + if self.root_dictionary is not None: + return self.root_dictionary.phone_mapping + return super().phone_mapping + @property def reversed_phone_mapping(self) -> ReversedMappingType: """ @@ -464,8 +490,14 @@ def phones_dir(self) -> str: """ if self.root_dictionary is not None: return self.root_dictionary.phones_dir + return super().phones_dir - return os.path.join(self.dictionary_output_directory, "phones") + @property + def phone_symbol_table_path(self) -> str: + """Path to file containing phone symbols and their integer IDs""" + if self.root_dictionary is not None: + return self.root_dictionary.phone_symbol_table_path + return super().phone_symbol_table_path @property def words_symbol_path(self) -> str: @@ -507,13 +539,13 @@ def write( self.generate_mappings() os.makedirs(self.phones_dir, exist_ok=True) self._write_word_boundaries() - self._write_phone_map_file() self._write_phone_sets() self._write_phone_symbol_table() self._write_disambig() self._write_topo() self._write_extra_questions() - + if debug: + self.export_lexicon(os.path.join(self.dictionary_output_directory, "lexicon.txt")) self._write_graphemes() self._write_word_file() self._write_align_lexicon() @@ -592,10 +624,6 @@ def _write_align_lexicon(self) -> None: """ Write the alignment lexicon text file to the temporary directory """ - if self.root_dictionary is None: - phone_mapping = self.phone_mapping - else: - phone_mapping = self.root_dictionary.phone_mapping path = os.path.join(self.phones_dir, "align_lexicon.int") if os.path.exists(path): return @@ -623,7 +651,7 @@ def _write_align_lexicon(self) -> None: phones[j] += "_E" else: phones[j] += "_I" - p = " ".join(str(phone_mapping[x]) for x in phones) + p = " ".join(str(self.phone_mapping[x]) for x in phones) f.write(f"{i} {i} {p}\n".format(i=i, p=p)) def _write_fst_binary( @@ -655,12 +683,7 @@ def _write_fst_binary( else: lexicon_fst_path = os.path.join(self.dictionary_output_directory, "lexicon.text.fst") output_fst = os.path.join(self.dictionary_output_directory, "L.fst") - if self.root_dictionary is not None: - phone_mapping = self.root_dictionary.phone_mapping - phones_file_path = self.root_dictionary.phone_symbol_table_path - else: - phone_mapping = self.phone_mapping - phones_file_path = self.phone_symbol_table_path + words_file_path = os.path.join(self.dictionary_output_directory, "words.txt") log_path = os.path.join(self.dictionary_output_directory, "fst.log") @@ -669,7 +692,7 @@ def _write_fst_binary( compile_proc = subprocess.Popen( [ thirdparty_binary("fstcompile"), - f"--isymbols={phones_file_path}", + f"--isymbols={self.phone_symbol_table_path}", f"--osymbols={words_file_path}", "--keep_isymbols=false", "--keep_osymbols=false", @@ -688,7 +711,7 @@ def _write_fst_binary( self.dictionary_output_directory, "phone_disambig0.txt" ) with open(phone_disambig_path, "w") as f: - f.write(str(phone_mapping["#0"])) + f.write(str(self.phone_mapping["#0"])) with open(word_disambig_path, "w") as f: f.write(str(self.words_mapping["#0"])) selfloop_proc = subprocess.Popen( @@ -727,26 +750,21 @@ def _write_basic_fst_text(self) -> None: """ Write the L.fst text file to the temporary directory """ - nonoptional_silence = None - optional_silence_phone = None lexicon_fst_path = os.path.join(self.dictionary_output_directory, "lexicon.text.fst") start_state = 0 silence_state = 0 - silence_cost = 0 no_silence_cost = 0 loop_state = 0 next_state = 1 - if self.silence_probability: - optional_silence_phone = self.optional_silence_phone - nonoptional_silence = self.nonoptional_silence_phone - - silence_cost = -1 * math.log(self.silence_probability) - no_silence_cost = -1 * math.log(1.0 - self.silence_probability) - loop_state = 1 - silence_state = 2 with open(lexicon_fst_path, "w", encoding="utf8") as outf: if self.silence_probability: + optional_silence_phone = self.optional_silence_phone + + silence_cost = -1 * math.log(self.silence_probability) + no_silence_cost = -1 * math.log(1.0 - self.silence_probability) + loop_state = 1 + silence_state = 2 outf.write( "\t".join( map(str, [start_state, loop_state, "", "", no_silence_cost]) @@ -758,7 +776,13 @@ def _write_basic_fst_text(self) -> None: "\t".join( map( str, - [start_state, loop_state, nonoptional_silence, "", silence_cost], + [ + start_state, + loop_state, + optional_silence_phone, + "", + silence_cost, + ], ) ) + "\n" @@ -848,7 +872,7 @@ def _write_fst_text_disambiguated( silence_state = 2 next_state = 3 - silence_phone = self.nonoptional_silence_phone + silence_phone = self.optional_silence_phone silence_cost = -1 * math.log(self.silence_probability) no_silence_cost = -1 * math.log(1 - self.silence_probability) diff --git a/montreal_forced_aligner/exceptions.py b/montreal_forced_aligner/exceptions.py index 138ef7fb..fbccae87 100644 --- a/montreal_forced_aligner/exceptions.py +++ b/montreal_forced_aligner/exceptions.py @@ -7,7 +7,7 @@ import logging import sys -from typing import TYPE_CHECKING, Collection, Optional +from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Tuple from colorama import Fore, Style @@ -169,7 +169,7 @@ class ThirdpartyError(MFAError): def __init__(self, binary_name, open_fst=False, open_blas=False, libc=False, sox=False): super().__init__() self.message = f"Could not find '{self.error_text(binary_name)}'. " - extra = "Please ensure that you have downloaded the correct binaries." + extra = "Please ensure that you have installed MFA's conda dependencies." if open_fst: extra = ( f"Please ensure that you are in an environment that has the {self.emphasized_text('openfst')} conda package installed, " @@ -362,7 +362,7 @@ class AlignmentError(MFAError): List of Kaldi log files with errors """ - def __init__(self, error_logs: list[str]): + def __init__(self, error_logs: List[str]): super().__init__() output = "\n".join(error_logs) self.message = ( @@ -382,7 +382,7 @@ class AlignmentExportError(AlignmentError): """ - def __init__(self, error_dict: dict[tuple[str, int], str]): + def __init__(self, error_dict: Dict[Tuple[str, int], str]): MFAError.__init__(self) message = "Error was encountered in processing CTMs:\n\n" @@ -498,7 +498,7 @@ class PretrainedModelNotFoundError(ArgumentError): """ def __init__( - self, name: str, model_type: Optional[str] = None, available: Optional[list[str]] = None + self, name: str, model_type: Optional[str] = None, available: Optional[List[str]] = None ): super().__init__() extra = "" @@ -523,7 +523,7 @@ class MultipleModelTypesFoundError(ArgumentError): List of model types that have a model with the given name """ - def __init__(self, name: str, possible_model_types: list[str]): + def __init__(self, name: str, possible_model_types: List[str]): super().__init__() possible_model_types = [f"{self.error_text(x)}" for x in possible_model_types] @@ -547,7 +547,7 @@ class ModelExtensionError(ArgumentError): Extensions that the model supports """ - def __init__(self, name: str, model_type: str, extensions: list[str]): + def __init__(self, name: str, model_type: str, extensions: List[str]): super().__init__() extra = "" if model_type: @@ -649,7 +649,7 @@ class KaldiProcessingError(MFAError): Overall log file to find more information """ - def __init__(self, error_logs: list[str], log_file: Optional[str] = None): + def __init__(self, error_logs: List[str], log_file: Optional[str] = None): super().__init__() self.message = ( f"There were {len(error_logs)} job(s) with errors when running Kaldi binaries." diff --git a/montreal_forced_aligner/g2p/generator.py b/montreal_forced_aligner/g2p/generator.py index aa5039c5..9d23d3d8 100644 --- a/montreal_forced_aligner/g2p/generator.py +++ b/montreal_forced_aligner/g2p/generator.py @@ -8,7 +8,7 @@ import sys import time import traceback -from typing import TYPE_CHECKING, Any, Union +from typing import TYPE_CHECKING, Any, Dict, List, Set, Tuple, Union import tqdm @@ -75,7 +75,7 @@ class RewriterWorker(mp.Process): def __init__( self, job_q: mp.Queue, - return_dict: dict[str, Union[str, Any]], + return_dict: Dict[str, Union[str, Any]], rewriter: Rewriter, counter: Counter, stopped: Stopped, @@ -112,7 +112,7 @@ def run(self) -> None: return -def clean_up_word(word: str, graphemes: set[str]) -> tuple[str, list[str]]: +def clean_up_word(word: str, graphemes: Set[str]) -> Tuple[str, List[str]]: """ Clean up word by removing graphemes not in a specified set @@ -150,7 +150,7 @@ class OrthographyGenerator(G2PTopLevelMixin): For top level G2P generation parameters """ - def generate_pronunciations(self) -> dict[str, list[str]]: + def generate_pronunciations(self) -> Dict[str, List[str]]: """ Generate pronunciations for the word set @@ -190,7 +190,7 @@ def __init__(self, g2p_model_path: str, **kwargs): self.g2p_model = G2PModel(g2p_model_path) super().__init__(**kwargs) - def generate_pronunciations(self) -> dict[str, list[str]]: + def generate_pronunciations(self) -> Dict[str, List[str]]: """ Generate pronunciations @@ -334,7 +334,7 @@ def setup(self) -> None: self.initialized = True @property - def words_to_g2p(self) -> list[str]: + def words_to_g2p(self) -> List[str]: """Words to produce pronunciations""" return self.word_list @@ -365,7 +365,7 @@ def setup(self) -> None: self.initialized = True @property - def words_to_g2p(self) -> list[str]: + def words_to_g2p(self) -> List[str]: """Words to produce pronunciations""" word_list = self.corpus_word_set if not self.include_bracketed: @@ -398,7 +398,7 @@ def setup(self) -> None: self.initialized = True @property - def words_to_g2p(self) -> list[str]: + def words_to_g2p(self) -> List[str]: """Words to produce pronunciations""" word_list = self.corpus_word_set if not self.include_bracketed: @@ -444,6 +444,6 @@ def setup(self) -> None: self.initialized = True @property - def words_to_g2p(self) -> list[str]: + def words_to_g2p(self) -> List[str]: """Words to produce pronunciations""" return self.word_list diff --git a/montreal_forced_aligner/g2p/mixins.py b/montreal_forced_aligner/g2p/mixins.py index fa6212c3..3bc92d31 100644 --- a/montreal_forced_aligner/g2p/mixins.py +++ b/montreal_forced_aligner/g2p/mixins.py @@ -1,4 +1,5 @@ from abc import ABCMeta, abstractmethod +from typing import Dict, List from montreal_forced_aligner.abc import MfaWorker from montreal_forced_aligner.dictionary.mixins import DictionaryMixin @@ -22,7 +23,7 @@ def __init__(self, include_bracketed: bool = False, num_pronunciations: int = 1, self.include_bracketed = include_bracketed @abstractmethod - def generate_pronunciations(self) -> dict[str, list[str]]: + def generate_pronunciations(self) -> Dict[str, List[str]]: """ Generate pronunciations @@ -57,7 +58,7 @@ class G2PTopLevelMixin(MfaWorker, DictionaryMixin, G2PMixin): def __init__(self, **kwargs): super().__init__(**kwargs) - def generate_pronunciations(self) -> dict[str, list[str]]: + def generate_pronunciations(self) -> Dict[str, List[str]]: """ Generate pronunciations diff --git a/montreal_forced_aligner/g2p/trainer.py b/montreal_forced_aligner/g2p/trainer.py index 7ab0317f..a6042c43 100644 --- a/montreal_forced_aligner/g2p/trainer.py +++ b/montreal_forced_aligner/g2p/trainer.py @@ -14,7 +14,7 @@ import sys import time import traceback -from typing import Any, Callable, NamedTuple, Optional +from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple import tqdm @@ -46,7 +46,7 @@ def convert(x): G2P_DISABLED = True -Labels = list[Any] +Labels = List[Any] TOKEN_TYPES = ["byte", "utf8"] INF = float("inf") @@ -64,7 +64,7 @@ class RandomStart(NamedTuple): p_path: str c_path: str tempdir: str - train_opts: list[str] + train_opts: List[str] class RandomStartWorker(mp.Process): @@ -177,7 +177,7 @@ def align( self.logger.info("Success! FAR path: %s; encoder path: %s", far_path, encoder_path) @staticmethod - def _label_union(labels: set[int], epsilon: bool) -> Fst: + def _label_union(labels: Set[int], epsilon: bool) -> Fst: """Creates FSA over a union of the labels.""" side = pynini.Fst() src = side.add_state() @@ -209,8 +209,8 @@ def _lexicon_covering( ) -> None: """Builds covering grammar and lexicon FARs.""" # Sets of labels for the covering grammar. - g_labels: set[int] = set() - p_labels: set[int] = set() + g_labels: Set[int] = set() + p_labels: Set[int] = set() self.logger.info("Constructing grapheme and phoneme FARs") g_writer = pywrapfst.FarWriter.create(self.g_path) p_writer = pywrapfst.FarWriter.create(self.p_path) @@ -241,7 +241,7 @@ def _lexicon_covering( covering.write(self.c_path) @staticmethod - def _random_start(random_start: RandomStart) -> tuple[str, float]: + def _random_start(random_start: RandomStart) -> Tuple[str, float]: """Performs a single random start.""" start = time.time() logger = logging.getLogger("g2p_aligner") @@ -439,17 +439,17 @@ class PyniniValidator(PyniniGenerator): For parameters to generate pronunciations """ - def __init__(self, word_list: list[str], **kwargs): + def __init__(self, word_list: List[str], **kwargs): super().__init__(**kwargs) self.word_list = word_list @property - def words_to_g2p(self) -> list[str]: + def words_to_g2p(self) -> List[str]: """Words to produce pronunciations""" return self.word_list -class G2PTrainer(MfaWorker, TrainerMixin, PronunciationDictionaryMixin): +class G2PTrainer(MfaWorker, TrainerMixin): """ Abstract mixin class for G2P training @@ -497,7 +497,7 @@ def __init__( self.g2p_graphemes = set() -class PyniniTrainer(G2PTrainer, TopLevelMfaWorker): +class PyniniTrainer(G2PTrainer, PronunciationDictionaryMixin, TopLevelMfaWorker): """ Top-level G2P trainer that uses Pynini functionality @@ -836,7 +836,7 @@ def evaluate_g2p_model(self) -> None: def compute_validation_errors( self, - hypothesis_values: dict[str, list[str]], + hypothesis_values: Dict[str, List[str]], ): """ Computes validation errors diff --git a/montreal_forced_aligner/helper.py b/montreal_forced_aligner/helper.py index 14aab486..1cd82d52 100644 --- a/montreal_forced_aligner/helper.py +++ b/montreal_forced_aligner/helper.py @@ -8,7 +8,7 @@ import functools import sys import textwrap -from typing import TYPE_CHECKING, Any, Optional, Type +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Type import numpy from colorama import Fore, Style @@ -217,7 +217,7 @@ def print_information_line( print(wrapper.fill(f"{self.colorize(key, key_color)} {value}")) -def comma_join(sequence: list[Any]) -> str: +def comma_join(sequence: List[Any]) -> str: """ Helper function to combine a list into a human-readable expression with commas and a final "and" separator @@ -451,7 +451,7 @@ def edit_distance(x: Labels, y: Labels) -> int: return int(table[-1][-1]) -def score(gold: Labels, hypo: Labels, multiple_hypotheses=False) -> tuple[int, int]: +def score(gold: Labels, hypo: Labels, multiple_hypotheses=False) -> Tuple[int, int]: """ Computes sufficient statistics for LER calculation. @@ -484,7 +484,7 @@ def score(gold: Labels, hypo: Labels, multiple_hypotheses=False) -> tuple[int, i return edits, len(gold) -def compare_labels(ref: str, test: str, mapping: Optional[dict[str, str]] = None) -> int: +def compare_labels(ref: str, test: str, mapping: Optional[Dict[str, str]] = None) -> int: """ Parameters @@ -512,7 +512,7 @@ def compare_labels(ref: str, test: str, mapping: Optional[dict[str, str]] = None def overlap_scoring( first_element: CtmInterval, second_element: CtmInterval, - mapping: Optional[dict[str, str]] = None, + mapping: Optional[Dict[str, str]] = None, ) -> float: r""" Method to calculate overlap scoring @@ -551,11 +551,11 @@ def overlap_scoring( def align_phones( - ref: list[CtmInterval], - test: list[CtmInterval], - silence_phones: set[str], - custom_mapping: Optional[dict[str, str]] = None, -) -> tuple[Optional[float], Optional[int], Optional[int]]: + ref: List[CtmInterval], + test: List[CtmInterval], + silence_phones: Set[str], + custom_mapping: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[float], Optional[int], Optional[int]]: """ Align phones based on how much they overlap and their phone label, with the ability to specify a custom mapping for different phone labels to be scored as if they're the same phone diff --git a/montreal_forced_aligner/ivector/trainer.py b/montreal_forced_aligner/ivector/trainer.py index bbc09c2b..ffa79c4a 100644 --- a/montreal_forced_aligner/ivector/trainer.py +++ b/montreal_forced_aligner/ivector/trainer.py @@ -5,7 +5,7 @@ import shutil import subprocess import time -from typing import TYPE_CHECKING, Any, NamedTuple, Optional +from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Tuple import yaml @@ -63,22 +63,22 @@ class GmmGselectArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.ivector.trainer.gmm_gselect_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] ivector_options: MetaDict dubm_model: str - gselect_paths: dict[str, str] + gselect_paths: Dict[str, str] class AccGlobalStatsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.ivector.trainer.acc_global_stats_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] ivector_options: MetaDict - gselect_paths: dict[str, str] - acc_paths: dict[str, str] + gselect_paths: Dict[str, str] + acc_paths: Dict[str, str] dubm_path: str @@ -86,10 +86,10 @@ class GaussToPostArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.ivector.trainer.gauss_to_post_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] ivector_options: MetaDict - post_paths: dict[str, str] + post_paths: Dict[str, str] dubm_path: str @@ -97,21 +97,21 @@ class AccIvectorStatsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.ivector.trainer.acc_ivector_stats_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] ivector_options: MetaDict ie_path: str - post_paths: dict[str, str] - acc_init_paths: dict[str, str] + post_paths: Dict[str, str] + acc_init_paths: Dict[str, str] def gmm_gselect_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], dubm_options: MetaDict, dubm_path: str, - gselect_paths: dict[str, str], + gselect_paths: Dict[str, str], ) -> None: """ Multiprocessing function for selecting GMM indices. @@ -175,10 +175,10 @@ def gmm_gselect_func( def gauss_to_post_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], ivector_options: MetaDict, - post_paths: dict[str, str], + post_paths: Dict[str, str], dubm_path: str, ): """ @@ -258,11 +258,11 @@ def gauss_to_post_func( def acc_global_stats_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], dubm_options: MetaDict, - gselect_paths: dict[str, str], - acc_paths: dict[str, str], + gselect_paths: Dict[str, str], + acc_paths: Dict[str, str], dubm_path: str, ) -> None: """ @@ -329,12 +329,12 @@ def acc_global_stats_func( def acc_ivector_stats_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], ivector_options: MetaDict, ie_path: str, - post_paths: dict[str, str], - acc_init_paths: dict[str, str], + post_paths: Dict[str, str], + acc_init_paths: Dict[str, str], ) -> None: """ Multiprocessing function that accumulates stats for ivector training. @@ -468,7 +468,7 @@ def dubm_options(self): """Options for DUBM training""" return {"subsample": self.subsample, "num_gselect": self.num_gselect} - def gmm_gselect_arguments(self) -> list[GmmGselectArguments]: + def gmm_gselect_arguments(self) -> List[GmmGselectArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.ivector.trainer.gmm_gselect_func` @@ -492,7 +492,7 @@ def gmm_gselect_arguments(self) -> list[GmmGselectArguments]: def acc_global_stats_arguments( self, - ) -> list[AccGlobalStatsArguments]: + ) -> List[AccGlobalStatsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.ivector.trainer.acc_global_stats_func` @@ -720,7 +720,7 @@ def exported_model_path(self) -> str: """Temporary directory path that trainer will save ivector extractor model""" return os.path.join(self.working_log_directory, "ivector_model.zip") - def acc_ivector_stats_arguments(self) -> list[AccIvectorStatsArguments]: + def acc_ivector_stats_arguments(self) -> List[AccIvectorStatsArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.ivector.trainer.acc_ivector_stats_func` @@ -775,7 +775,7 @@ def _trainer_initialization(self) -> None: self.gauss_to_post() parse_logs(log_directory) - def gauss_to_post_arguments(self) -> list[GaussToPostArguments]: + def gauss_to_post_arguments(self) -> List[GaussToPostArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.ivector.trainer.gauss_to_post_func` @@ -963,7 +963,7 @@ class TrainableIvectorExtractor(IvectorCorpusMixin, TopLevelMfaWorker, ModelExpo For model export parameters """ - def __init__(self, training_configuration: list[tuple[str, dict[str, Any]]] = None, **kwargs): + def __init__(self, training_configuration: List[Tuple[str, Dict[str, Any]]] = None, **kwargs): self.param_dict = { k: v for k, v in kwargs.items() @@ -974,7 +974,7 @@ def __init__(self, training_configuration: list[tuple[str, dict[str, Any]]] = No self.final_identifier = None super().__init__(**kwargs) os.makedirs(self.output_directory, exist_ok=True) - self.training_configs: dict[str, AcousticModelTrainingMixin] = {} + self.training_configs: Dict[str, AcousticModelTrainingMixin] = {} self.current_model = None if training_configuration is None: training_configuration = [("dubm", {}), ("ivector", {})] @@ -1073,7 +1073,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ Parse configuration parameters from a config file and command line arguments diff --git a/montreal_forced_aligner/language_modeling/trainer.py b/montreal_forced_aligner/language_modeling/trainer.py index 8ab481c5..88ed18b8 100644 --- a/montreal_forced_aligner/language_modeling/trainer.py +++ b/montreal_forced_aligner/language_modeling/trainer.py @@ -325,7 +325,7 @@ def normalized_text_iter(self, min_count: int = 1) -> Generator: Normalized text """ unk_words = {k for k, v in self.word_counts.items() if v <= min_count} - for u in self.utterances.values(): + for u in self.utterances: text = u.text.split() new_text = [] for t in text: diff --git a/montreal_forced_aligner/models.py b/montreal_forced_aligner/models.py index a6954e9b..73e6e801 100644 --- a/montreal_forced_aligner/models.py +++ b/montreal_forced_aligner/models.py @@ -6,9 +6,10 @@ from __future__ import annotations import os +import re import shutil from shutil import copy, copyfile, make_archive, move, rmtree, unpack_archive -from typing import TYPE_CHECKING, Collection, Optional, Union +from typing import TYPE_CHECKING, Collection, Dict, Optional, Union import yaml @@ -322,6 +323,9 @@ def parameters(self) -> MetaDict: for key in ["multilingual_ipa"]: params[key] = self.meta[key] params["non_silence_phones"] = {x for x in self.meta["phones"]} + params["oov_phone"] = self.meta["oov_phone"] + params["optional_silence_phone"] = self.meta["optional_silence_phone"] + params["other_noise_phone"] = self.meta["other_noise_phone"] return params @property @@ -366,6 +370,16 @@ def meta(self) -> MetaDict: self._meta["features"] = default_features if "phone_type" not in self._meta: self._meta["phone_type"] = "triphone" + if "optional_silence_phone" not in self._meta: + self._meta["optional_silence_phone"] = "sil" + if "oov_phone" not in self._meta: + self._meta["oov_phone"] = "spn" + if "other_noise_phone" not in self._meta: + self._meta["other_noise_phone"] = "sp" + if "phone_set_type" not in self._meta: + self._meta["phone_set_type"] = "UNKNOWN" + if "base_phone_regex" not in self._meta: + self._meta["base_phone_regex"] = None self._meta["phones"] = set(self._meta.get("phones", [])) self.parse_old_features() return self._meta @@ -761,12 +775,19 @@ def __init__(self, path: str): count = 0 self.pronunciation_probabilities = True self.silence_probabilities = True + self.phone_set_type = "UNKNOWN" + arpa_detect = re.compile(r"^\D{2}\d$") with open(self.path, "r", encoding="utf8") as f: for line in f: line = line.strip() if not line: continue line = line.split() + for phone in line: + m = re.match(arpa_detect, phone) + if m: + self.phone_set_type = "ARPA" + _ = line.pop(0) # word next_item = line.pop(0) if self.pronunciation_probabilities: @@ -791,6 +812,13 @@ def __init__(self, path: str): if count > 10: break + @property + def base_phone_regex(self) -> Optional[str]: + if self.phone_set_type == "UNKNOWN": + return None + if self.phone_set_type == "ARPA": + return r"(\D+)" + @property def meta(self) -> MetaDict: """Metadata for the dictionary""" @@ -865,7 +893,7 @@ def name(self) -> str: """Name of the dictionary""" return os.path.splitext(os.path.basename(self.path))[0] - def load_dictionary_paths(self) -> dict[str, DictionaryModel]: + def load_dictionary_paths(self) -> Dict[str, DictionaryModel]: """ Load the pronunciation dictionaries diff --git a/montreal_forced_aligner/segmenter.py b/montreal_forced_aligner/segmenter.py index 91baa2c4..ee42b18d 100644 --- a/montreal_forced_aligner/segmenter.py +++ b/montreal_forced_aligner/segmenter.py @@ -10,7 +10,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, NamedTuple, Optional, Union +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Union import yaml @@ -25,7 +25,7 @@ if TYPE_CHECKING: from argparse import Namespace -SegmentationType = list[dict[str, float]] +SegmentationType = List[Dict[str, float]] __all__ = ["Segmenter"] @@ -33,12 +33,12 @@ class SegmentVadArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.segmenter.segment_vad_func`""" - dictionaries: list[str] - vad_paths: dict[str, str] + dictionaries: List[str] + vad_paths: Dict[str, str] segmentation_options: MetaDict -def get_initial_segmentation(frames: list[Union[int, str]], frame_shift: int) -> SegmentationType: +def get_initial_segmentation(frames: List[Union[int, str]], frame_shift: int) -> SegmentationType: """ Compute initial segmentation over voice activity @@ -124,10 +124,10 @@ def merge_segments( def segment_vad_func( - dictionaries: list[str], - vad_paths: dict[str, str], + dictionaries: List[str], + vad_paths: Dict[str, str], segmentation_options: MetaDict, -) -> dict[str, Utterance]: +) -> Dict[str, Utterance]: """ Multiprocessing function to generate segments from VAD output. @@ -206,7 +206,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ Parse parameters for segmentation from a config path or command-line arguments @@ -240,7 +240,7 @@ def parse_parameters( global_params.update(cls.parse_args(args, unknown_args)) return global_params - def segment_vad_arguments(self) -> list[SegmentVadArguments]: + def segment_vad_arguments(self) -> List[SegmentVadArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.segmenter.segment_vad_func` @@ -303,7 +303,7 @@ def segment_vad(self) -> None: utterance.file = file utterance.set_speaker(speaker) self.add_utterance(utterance) - utterance_ids = [x.name for x in self.utterances.values() if x.begin is None] + utterance_ids = [x.name for x in self.utterances if x.begin is None] for u in utterance_ids: self.delete_utterance(u) @@ -366,5 +366,5 @@ def export_files(self, output_directory: str) -> None: if not self.overwrite: backup_output_directory = os.path.join(self.working_directory, "transcriptions") os.makedirs(backup_output_directory, exist_ok=True) - for f in self.files.values(): + for f in self.files: f.save(output_directory, backup_output_directory) diff --git a/montreal_forced_aligner/speaker_classifier.py b/montreal_forced_aligner/speaker_classifier.py index 31d8aef0..e2db4273 100644 --- a/montreal_forced_aligner/speaker_classifier.py +++ b/montreal_forced_aligner/speaker_classifier.py @@ -7,7 +7,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional import numpy as np import yaml @@ -60,7 +60,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ Parse parameters for speaker classification from a config path or command-line arguments @@ -169,7 +169,7 @@ def cluster_utterances(self) -> None: km.fit(x) y = km.labels_ for i, u in enumerate(self.ivectors.keys()): - speaker_name = y[i] + speaker_name = f"Speaker {y[i]}" utterance = self.utterances[u] if speaker_name not in self.speakers: self.speakers[speaker_name] = Speaker(speaker_name) @@ -189,5 +189,5 @@ def export_files(self, output_directory: str) -> None: backup_output_directory = os.path.join(self.working_directory, "output") os.makedirs(backup_output_directory, exist_ok=True) - for file in self.files.values(): + for file in self.files: file.save(output_directory, backup_output_directory) diff --git a/montreal_forced_aligner/textgrid.py b/montreal_forced_aligner/textgrid.py index ab286607..3a96a0e1 100644 --- a/montreal_forced_aligner/textgrid.py +++ b/montreal_forced_aligner/textgrid.py @@ -6,7 +6,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Dict, List, Optional from praatio import textgrid as tgio @@ -60,8 +60,8 @@ def process_ctm_line(line: str) -> CtmInterval: def parse_from_word( - ctm_labels: list[CtmInterval], text: list[str], dictionary_data: DictionaryData -) -> list[CtmInterval]: + ctm_labels: List[CtmInterval], text: List[str], dictionary_data: DictionaryData +) -> List[CtmInterval]: """ Parse CTM intervals into the corresponding text for an utterance @@ -102,8 +102,8 @@ def parse_from_word( def parse_from_word_no_cleanup( - ctm_labels: list[CtmInterval], reversed_word_mapping: ReversedMappingType -) -> list[CtmInterval]: + ctm_labels: List[CtmInterval], reversed_word_mapping: ReversedMappingType +) -> List[CtmInterval]: """ Assume that subwords in the CTM files are desired, so just does a reverse look up to get the sub word text @@ -127,10 +127,10 @@ def parse_from_word_no_cleanup( def parse_from_phone( - ctm_labels: list[CtmInterval], + ctm_labels: List[CtmInterval], reversed_phone_mapping: ReversedMappingType, - positions: list[str], -) -> list[CtmInterval]: + positions: List[str], +) -> List[CtmInterval]: """ Parse CtmIntervals to original phone transcriptions @@ -157,7 +157,7 @@ def parse_from_phone( return ctm_labels -def output_textgrid_writing_errors(output_directory: str, export_errors: dict[str, str]) -> None: +def output_textgrid_writing_errors(output_directory: str, export_errors: Dict[str, str]) -> None: """ Output any errors that were encountered in writing TextGrids @@ -184,7 +184,7 @@ def output_textgrid_writing_errors(output_directory: str, export_errors: dict[st def generate_tiers( file: File, cleanup_textgrids: Optional[bool] = True -) -> dict[Speaker, dict[str, list[CtmInterval]]]: +) -> Dict[Speaker, Dict[str, List[CtmInterval]]]: """ Generate TextGrid tiers for a given File @@ -202,11 +202,11 @@ def generate_tiers( """ output = {} - for u in file.utterances.values(): + for u in file.utterances: if not u.word_labels: continue speaker = u.speaker - dictionary_data = speaker.dictionary_data + dictionary_data: DictionaryData = speaker.dictionary_data words = [] phones = [] @@ -226,7 +226,7 @@ def generate_tiers( cur_phones = [] while u.phone_labels[phone_ind].end <= end: p = u.phone_labels[phone_ind] - if p.label in dictionary_data.silence_phones: + if p.label == dictionary_data.optional_silence_phone: phone_ind += 1 continue cur_phones.append(p) @@ -242,7 +242,7 @@ def generate_tiers( for interval in u.word_labels: words.append(interval) for interval in u.phone_labels: - if interval.label in dictionary_data.silence_phones and cleanup_textgrids: + if interval.label == dictionary_data.optional_silence_phone and cleanup_textgrids: continue phones.append(interval) if speaker not in output: @@ -256,7 +256,7 @@ def generate_tiers( def export_textgrid( file: File, output_path: str, - speaker_data: dict[Speaker, dict[str, list[CtmInterval]]], + speaker_data: Dict[Speaker, Dict[str, List[CtmInterval]]], frame_shift: int, first_file_write: Optional[bool] = True, ) -> None: diff --git a/montreal_forced_aligner/transcription/multiprocessing.py b/montreal_forced_aligner/transcription/multiprocessing.py index 05990dd8..7a2bbcc2 100644 --- a/montreal_forced_aligner/transcription/multiprocessing.py +++ b/montreal_forced_aligner/transcription/multiprocessing.py @@ -9,7 +9,7 @@ import re import subprocess import sys -from typing import TYPE_CHECKING, NamedTuple, TextIO +from typing import TYPE_CHECKING, Dict, List, NamedTuple, TextIO from ..abc import MetaDict from ..utils import thirdparty_binary @@ -62,100 +62,100 @@ class DecodeArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.decode_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] decode_options: MetaDict model_path: str - lat_paths: dict[str, str] - words_paths: dict[str, str] - hclg_paths: dict[str, str] + lat_paths: Dict[str, str] + words_paths: Dict[str, str] + hclg_paths: Dict[str, str] class ScoreArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.score_func`""" log_path: str - dictionaries: list[str] + dictionaries: List[str] score_options: MetaDict - lat_paths: dict[str, str] - rescored_lat_paths: dict[str, str] - carpa_rescored_lat_paths: dict[str, str] - words_paths: dict[str, str] - tra_paths: dict[str, str] + lat_paths: Dict[str, str] + rescored_lat_paths: Dict[str, str] + carpa_rescored_lat_paths: Dict[str, str] + words_paths: Dict[str, str] + tra_paths: Dict[str, str] class LmRescoreArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.lm_rescore_func`""" log_path: str - dictionaries: list[str] + dictionaries: List[str] lm_rescore_options: MetaDict - lat_paths: dict[str, str] - rescored_lat_paths: dict[str, str] - old_g_paths: dict[str, str] - new_g_paths: dict[str, str] + lat_paths: Dict[str, str] + rescored_lat_paths: Dict[str, str] + old_g_paths: Dict[str, str] + new_g_paths: Dict[str, str] class CarpaLmRescoreArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.carpa_lm_rescore_func`""" log_path: str - dictionaries: list[str] - lat_paths: dict[str, str] - rescored_lat_paths: dict[str, str] - old_g_paths: dict[str, str] - new_g_paths: dict[str, str] + dictionaries: List[str] + lat_paths: Dict[str, str] + rescored_lat_paths: Dict[str, str] + old_g_paths: Dict[str, str] + new_g_paths: Dict[str, str] class InitialFmllrArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.initial_fmllr_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] model_path: str fmllr_options: MetaDict - pre_trans_paths: dict[str, str] - lat_paths: dict[str, str] - spk2utt_paths: dict[str, str] + pre_trans_paths: Dict[str, str] + lat_paths: Dict[str, str] + spk2utt_paths: Dict[str, str] class LatGenFmllrArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.lat_gen_fmllr_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] model_path: str decode_options: MetaDict - words_paths: dict[str, str] - hclg_paths: dict[str, str] - tmp_lat_paths: dict[str, str] + words_paths: Dict[str, str] + hclg_paths: Dict[str, str] + tmp_lat_paths: Dict[str, str] class FinalFmllrArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.final_fmllr_est_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] model_path: str fmllr_options: MetaDict - trans_paths: dict[str, str] - spk2utt_paths: dict[str, str] - tmp_lat_paths: dict[str, str] + trans_paths: Dict[str, str] + spk2utt_paths: Dict[str, str] + tmp_lat_paths: Dict[str, str] class FmllrRescoreArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.fmllr_rescore_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] model_path: str fmllr_options: MetaDict - tmp_lat_paths: dict[str, str] - final_lat_paths: dict[str, str] + tmp_lat_paths: Dict[str, str] + final_lat_paths: Dict[str, str] def compose_lg(dictionary_path: str, small_g_path: str, lg_path: str, log_file: TextIO) -> None: @@ -659,13 +659,13 @@ def create_hclg_func( def decode_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], decode_options: MetaDict, model_path: str, - lat_paths: dict[str, str], - word_symbol_paths: dict[str, str], - hclg_paths: dict[str, str], + lat_paths: Dict[str, str], + word_symbol_paths: Dict[str, str], + hclg_paths: Dict[str, str], ) -> None: """ Multiprocessing function for performing decoding @@ -742,13 +742,13 @@ def decode_func( def score_func( log_path: str, - dictionaries: list[str], + dictionaries: List[str], score_options: MetaDict, - lat_paths: dict[str, str], - rescored_lat_paths: dict[str, str], - carpa_rescored_lat_paths: dict[str, str], - words_paths: dict[str, str], - tra_paths: dict[str, str], + lat_paths: Dict[str, str], + rescored_lat_paths: Dict[str, str], + carpa_rescored_lat_paths: Dict[str, str], + words_paths: Dict[str, str], + tra_paths: Dict[str, str], ) -> None: """ Multiprocessing function for scoring lattices @@ -837,12 +837,12 @@ def score_func( def lm_rescore_func( log_path: str, - dictionaries: list[str], + dictionaries: List[str], lm_rescore_options: MetaDict, - lat_paths: dict[str, str], - rescored_lat_paths: dict[str, str], - old_g_paths: dict[str, str], - new_g_paths: dict[str, str], + lat_paths: Dict[str, str], + rescored_lat_paths: Dict[str, str], + old_g_paths: Dict[str, str], + new_g_paths: Dict[str, str], ) -> None: """ Multiprocessing function rescore lattices by replacing the small G.fst with the medium G.fst @@ -912,11 +912,11 @@ def lm_rescore_func( def carpa_lm_rescore_func( log_path: str, - dictionaries: list[str], - lat_paths: dict[str, str], - rescored_lat_paths: dict[str, str], - old_g_paths: dict[str, str], - new_g_paths: dict[str, str], + dictionaries: List[str], + lat_paths: Dict[str, str], + rescored_lat_paths: Dict[str, str], + old_g_paths: Dict[str, str], + new_g_paths: Dict[str, str], ) -> None: """ Multiprocessing function to rescore lattices by replacing medium G.fst with large G.carpa @@ -997,13 +997,13 @@ def carpa_lm_rescore_func( def initial_fmllr_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], model_path: str, fmllr_options: MetaDict, - trans_paths: dict[str, str], - lat_paths: dict[str, str], - spk2utt_paths: dict[str, str], + trans_paths: Dict[str, str], + lat_paths: Dict[str, str], + spk2utt_paths: Dict[str, str], ) -> None: """ Multiprocessing function for running initial fMLLR calculation @@ -1107,13 +1107,13 @@ def initial_fmllr_func( def lat_gen_fmllr_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], model_path: str, decode_options: MetaDict, - word_symbol_paths: dict[str, str], - hclg_paths: dict[str, str], - tmp_lat_paths: dict[str, str], + word_symbol_paths: Dict[str, str], + hclg_paths: Dict[str, str], + tmp_lat_paths: Dict[str, str], ) -> None: """ Regenerate lattices using initial fMLLR transforms @@ -1176,13 +1176,13 @@ def lat_gen_fmllr_func( def final_fmllr_est_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], model_path: str, fmllr_options: MetaDict, - trans_paths: dict[str, str], - spk2utt_paths: dict[str, str], - tmp_lat_paths: dict[str, str], + trans_paths: Dict[str, str], + spk2utt_paths: Dict[str, str], + tmp_lat_paths: Dict[str, str], ) -> None: """ Multiprocessing function for running final fMLLR estimation @@ -1304,12 +1304,12 @@ def final_fmllr_est_func( def fmllr_rescore_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], model_path: str, fmllr_options: MetaDict, - tmp_lat_paths: dict[str, str], - final_lat_paths: dict[str, str], + tmp_lat_paths: Dict[str, str], + final_lat_paths: Dict[str, str], ) -> None: """ Multiprocessing function to rescore lattices following fMLLR estimation diff --git a/montreal_forced_aligner/transcription/transcriber.py b/montreal_forced_aligner/transcription/transcriber.py index 6957fa72..5478580b 100644 --- a/montreal_forced_aligner/transcription/transcriber.py +++ b/montreal_forced_aligner/transcription/transcriber.py @@ -12,7 +12,7 @@ import sys import time from abc import abstractmethod -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple import yaml @@ -210,7 +210,7 @@ def __init__( evaluation_mode: bool = False, min_language_model_weight: int = 7, max_language_model_weight: int = 17, - word_insertion_penalties: list[float] = None, + word_insertion_penalties: List[float] = None, **kwargs, ): self.acoustic_model = AcousticModel(acoustic_model_path) @@ -229,7 +229,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ Parse configuration parameters from a config file and command line arguments @@ -282,7 +282,7 @@ def setup(self) -> None: self.initialized = True self.logger.debug(f"Setup for transcription in {time.time() - begin} seconds") - def create_hclgs_arguments(self) -> dict[str, CreateHclgArguments]: + def create_hclgs_arguments(self) -> Dict[str, CreateHclgArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.create_hclg_func` @@ -310,7 +310,7 @@ def create_hclgs_arguments(self) -> dict[str, CreateHclgArguments]: ) return args - def decode_arguments(self) -> list[DecodeArguments]: + def decode_arguments(self) -> List[DecodeArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.decode_func` @@ -334,7 +334,7 @@ def decode_arguments(self) -> list[DecodeArguments]: for j in self.jobs ] - def score_arguments(self) -> list[ScoreArguments]: + def score_arguments(self) -> List[ScoreArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.score_func` @@ -357,7 +357,7 @@ def score_arguments(self) -> list[ScoreArguments]: for j in self.jobs ] - def lm_rescore_arguments(self) -> list[LmRescoreArguments]: + def lm_rescore_arguments(self) -> List[LmRescoreArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.lm_rescore_func` @@ -379,7 +379,7 @@ def lm_rescore_arguments(self) -> list[LmRescoreArguments]: for j in self.jobs ] - def carpa_lm_rescore_arguments(self) -> list[CarpaLmRescoreArguments]: + def carpa_lm_rescore_arguments(self) -> List[CarpaLmRescoreArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.carpa_lm_rescore_func` @@ -409,7 +409,7 @@ def fmllr_options(self) -> MetaDict: options["lattice_beam"] = self.lattice_beam return options - def initial_fmllr_arguments(self) -> list[InitialFmllrArguments]: + def initial_fmllr_arguments(self) -> List[InitialFmllrArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.initial_fmllr_func` @@ -433,7 +433,7 @@ def initial_fmllr_arguments(self) -> list[InitialFmllrArguments]: for j in self.jobs ] - def lat_gen_fmllr_arguments(self) -> list[LatGenFmllrArguments]: + def lat_gen_fmllr_arguments(self) -> List[LatGenFmllrArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.lat_gen_fmllr_func` @@ -457,7 +457,7 @@ def lat_gen_fmllr_arguments(self) -> list[LatGenFmllrArguments]: for j in self.jobs ] - def final_fmllr_arguments(self) -> list[FinalFmllrArguments]: + def final_fmllr_arguments(self) -> List[FinalFmllrArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.final_fmllr_est_func` @@ -481,7 +481,7 @@ def final_fmllr_arguments(self) -> list[FinalFmllrArguments]: for j in self.jobs ] - def fmllr_rescore_arguments(self) -> list[FmllrRescoreArguments]: + def fmllr_rescore_arguments(self) -> List[FmllrRescoreArguments]: """ Generate Job arguments for :func:`~montreal_forced_aligner.transcription.multiprocessing.fmllr_rescore_func` @@ -551,7 +551,7 @@ def hclg_options(self): "transition_scale": self.transition_scale, } - def get_tree_info(self) -> tuple[int, int]: + def get_tree_info(self) -> Tuple[int, int]: """ Get the context width and central position for the acoustic model @@ -931,5 +931,5 @@ def export_files(self, output_directory: str) -> None: backup_output_directory = os.path.join(self.working_directory, "transcriptions") os.makedirs(backup_output_directory, exist_ok=True) self._load_transcripts() - for file in self.files.values(): + for file in self.files: file.save(output_directory, backup_output_directory) diff --git a/montreal_forced_aligner/utils.py b/montreal_forced_aligner/utils.py index 5d6e1296..a61176ff 100644 --- a/montreal_forced_aligner/utils.py +++ b/montreal_forced_aligner/utils.py @@ -13,7 +13,7 @@ import textwrap import traceback from queue import Empty -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union from colorama import Fore, Style @@ -50,6 +50,25 @@ def get_mfa_version() -> str: return __version__ +def check_third_party(): + """ + Checks whether third party software is available on the path + + Raises + ------- + :class:`~montreal_forced_aligner.exceptions.ThirdpartyError` + """ + bin_path = shutil.which("sox") + if bin_path is None: + raise ThirdpartyError("sox") + bin_path = shutil.which("fstcompile") + if bin_path is None: + raise ThirdpartyError("fstcompile", open_fst=True) + bin_path = shutil.which("compute-mfcc-feats") + if bin_path is None: + raise ThirdpartyError("compute-mfcc-feats") + + def thirdparty_binary(binary_name: str) -> str: """ Generate full path to a given binary name @@ -77,7 +96,7 @@ def thirdparty_binary(binary_name: str) -> str: return bin_path -def log_kaldi_errors(error_logs: list[str], logger: logging.Logger) -> None: +def log_kaldi_errors(error_logs: List[str], logger: logging.Logger) -> None: """ Save details of Kaldi processing errors to a logger @@ -97,7 +116,7 @@ def log_kaldi_errors(error_logs: list[str], logger: logging.Logger) -> None: logger.debug("\t" + line.strip()) -def guess_model_type(path: str) -> list[str]: +def guess_model_type(path: str) -> List[str]: """ Guess a model type given a path @@ -307,7 +326,7 @@ def __init__( function: Callable, return_dict: dict, stopped: Stopped, - return_info: Optional[dict[int, Any]] = None, + return_info: Optional[Dict[int, Any]] = None, ): mp.Process.__init__(self) self.job_name = job_name @@ -339,10 +358,10 @@ def run(self) -> None: def run_non_mp( function: Callable, - argument_list: list[tuple[Any, ...]], + argument_list: List[Tuple[Any, ...]], log_directory: str, return_info: bool = False, -) -> Optional[dict[Any, Any]]: +) -> Optional[Dict[Any, Any]]: """ Similar to :func:`run_mp`, but no additional processes are used and the jobs are evaluated in sequential order @@ -376,10 +395,10 @@ def run_non_mp( def run_mp( function: Callable, - argument_list: list[tuple[Any, ...]], + argument_list: List[Tuple[Any, ...]], log_directory: str, return_info: bool = False, -) -> Optional[dict[int, Any]]: +) -> Optional[Dict[int, Any]]: """ Apply a function for each job in parallel diff --git a/montreal_forced_aligner/validator.py b/montreal_forced_aligner/validator.py index 986dd1d4..17562fa1 100644 --- a/montreal_forced_aligner/validator.py +++ b/montreal_forced_aligner/validator.py @@ -9,7 +9,7 @@ import subprocess import time from decimal import Decimal -from typing import TYPE_CHECKING, Any, NamedTuple, Optional +from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Tuple import yaml @@ -33,11 +33,11 @@ class CompileUtteranceTrainGraphsArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.validator.compile_utterance_train_graphs_func`""" log_path: str - dictionaries: list[str] - disambig_int_paths: dict[str, str] - disambig_L_fst_paths: dict[str, str] - fst_paths: dict[str, str] - graphs_paths: dict[str, str] + dictionaries: List[str] + disambig_int_paths: Dict[str, str] + disambig_L_fst_paths: Dict[str, str] + fst_paths: Dict[str, str] + graphs_paths: Dict[str, str] model_path: str tree_path: str @@ -46,25 +46,25 @@ class TestUtterancesArguments(NamedTuple): """Arguments for :func:`~montreal_forced_aligner.validator.test_utterances_func`""" log_path: str - dictionaries: list[str] - feature_strings: dict[str, str] - words_paths: dict[str, str] - graphs_paths: dict[str, str] - text_int_paths: dict[str, str] - edits_paths: dict[str, str] - out_int_paths: dict[str, str] + dictionaries: List[str] + feature_strings: Dict[str, str] + words_paths: Dict[str, str] + graphs_paths: Dict[str, str] + text_int_paths: Dict[str, str] + edits_paths: Dict[str, str] + out_int_paths: Dict[str, str] model_path: str def test_utterances_func( log_path: str, - dictionaries: list[str], - feature_strings: dict[str, str], - words_paths: dict[str, str], - graphs_paths: dict[str, str], - text_int_paths: dict[str, str], - edits_paths: dict[str, str], - out_int_paths: dict[str, str], + dictionaries: List[str], + feature_strings: Dict[str, str], + words_paths: Dict[str, str], + graphs_paths: Dict[str, str], + text_int_paths: Dict[str, str], + edits_paths: Dict[str, str], + out_int_paths: Dict[str, str], model_path: str, ): """ @@ -143,11 +143,11 @@ def test_utterances_func( def compile_utterance_train_graphs_func( log_path: str, - dictionaries: list[str], - disambig_int_paths: dict[str, str], - disambig_L_fst_paths: dict[str, str], - fst_paths: dict[str, str], - graphs_paths: dict[str, str], + dictionaries: List[str], + disambig_int_paths: Dict[str, str], + disambig_L_fst_paths: Dict[str, str], + fst_paths: Dict[str, str], + graphs_paths: Dict[str, str], model_path: str, tree_path: str, ): @@ -240,7 +240,7 @@ def workflow_identifier(self) -> str: def utt2fst_scp_data( self, num_frequent_words: int = 10 - ) -> list[dict[str, list[tuple[str, str]]]]: + ) -> List[Dict[str, List[Tuple[str, str]]]]: """ Generate Kaldi style utt2fst scp data @@ -261,7 +261,7 @@ def utt2fst_scp_data( utts = j.job_utts() for dict_name, utt_data in utts.items(): data[dict_name] = [] - for u_name, utterance in utt_data.items(): + for utterance in utt_data: new_text = [] dictionary = utterance.speaker.dictionary if dict_name not in most_frequent: @@ -277,7 +277,7 @@ def utt2fst_scp_data( new_text.extend(x for x in lookup if x != "") data[dict_name].append( ( - u_name, + utterance.name, dictionary.create_utterance_fst( new_text, most_frequent[dictionary.name] ), @@ -305,7 +305,7 @@ def output_utt_fsts(self, num_frequent_words: int = 10) -> None: def compile_utterance_train_graphs_arguments( self, - ) -> list[CompileUtteranceTrainGraphsArguments]: + ) -> List[CompileUtteranceTrainGraphsArguments]: """ Generate Job arguments for :func:`compile_utterance_train_graphs_func` @@ -334,7 +334,7 @@ def compile_utterance_train_graphs_arguments( for j in self.jobs ] - def test_utterances_arguments(self) -> list[TestUtterancesArguments]: + def test_utterances_arguments(self) -> List[TestUtterancesArguments]: """ Generate Job arguments for :func:`test_utterances_func` @@ -383,9 +383,12 @@ def setup(self): self.logger.info("Skipping acoustic feature generation") else: self.generate_features() + self.calculate_oovs_found() - if self.test_transcriptions: + if not self.ignore_acoustics and self.test_transcriptions: self.initialize_utt_fsts() + else: + self.logger.info("Skipping transcription testing") except Exception as e: if isinstance(e, KaldiProcessingError): log_kaldi_errors(e.error_logs, self.logger) @@ -468,15 +471,15 @@ def analyze_setup(self) -> None: """ Analyzes the set up process and outputs info to the console """ - total_duration = sum(x.duration for x in self.files.values()) + total_duration = sum(x.duration for x in self.files) total_duration = Decimal(str(total_duration)).quantize(Decimal("0.001")) ignored_count = len(self.no_transcription_files) ignored_count += len(self.textgrid_read_errors) ignored_count += len(self.decode_error_files) - num_sound_files = sum(1 for x in self.files.values() if x.wav_path is not None) - num_lab_files = sum(1 for x in self.files.values() if x.text_type == "lab") - num_textgrid_files = sum(1 for x in self.files.values() if x.text_type == "textgrid") + num_sound_files = sum(1 for x in self.files if x.wav_path is not None) + num_lab_files = sum(1 for x in self.files if x.text_type == "lab") + num_textgrid_files = sum(1 for x in self.files if x.text_type == "textgrid") self._print_header("Corpus") self._print_green_stat(num_sound_files, "sound files") self._print_green_stat(num_lab_files, "lab files") @@ -519,11 +522,11 @@ def analyze_oovs(self) -> None: if oov_types: total_instances = 0 with open(utterance_oov_path, "w", encoding="utf8") as f: - for utt, utterance in sorted(self.utterances.items()): + for utterance in sorted(self.utterances): if not utterance.oovs: continue total_instances += len(utterance.oovs) - f.write(f"{utt} {', '.join(utterance.oovs)}\n") + f.write(f"{utterance.name} {', '.join(utterance.oovs)}\n") self.save_oovs_found(output_dir) self._print_yellow_stat(len(oov_types), "OOV word types") self._print_yellow_stat(total_instances, "total OOV tokens") @@ -579,7 +582,7 @@ def analyze_missing_features(self) -> None: if self.ignore_acoustics: print("Acoustic feature generation was skipped.") output_dir = self.output_directory - missing_features = [x for x in self.utterances.values() if x.ignored] + missing_features = [x for x in self.utterances if x.ignored] if missing_features: path = os.path.join(output_dir, "missing_features.csv") with open(path, "w") as f: @@ -786,6 +789,7 @@ def initialize_utt_fsts(self) -> None: """ Construct utterance FSTs """ + self.logger.info("Initializing for testing transcriptions...") self.output_utt_fsts() def test_utterance_transcriptions(self) -> None: @@ -883,7 +887,7 @@ def parse_parameters( cls, config_path: Optional[str] = None, args: Optional[Namespace] = None, - unknown_args: Optional[list[str]] = None, + unknown_args: Optional[List[str]] = None, ) -> MetaDict: """ @@ -952,7 +956,7 @@ def setup(self): self.set_lexicon_word_set(self.corpus_word_set) self.write_lexicon_information() - for speaker in self.speakers.values(): + for speaker in self.speakers: speaker.set_dictionary(self.get_dictionary(speaker.name)) self.initialize_jobs() self.write_corpus_information() @@ -963,6 +967,7 @@ def setup(self): self.logger.info("Skipping acoustic feature generation") else: self.generate_features() + self.calculate_oovs_found() if self.test_transcriptions: self.initialize_utt_fsts() @@ -1022,17 +1027,13 @@ def setup(self): self.set_lexicon_word_set(self.corpus_word_set) self.write_lexicon_information() - for speaker in self.speakers.values(): + for speaker in self.speakers: speaker.set_dictionary(self.get_dictionary(speaker.name)) self.initialize_jobs() self.write_corpus_information() self.create_corpus_split() if self.test_transcriptions: self.write_lexicon_information(write_disambiguation=True) - if self.ignore_acoustics: - self.logger.info("Skipping acoustic feature generation") - else: - self.generate_features() self.acoustic_model.validate(self) self.acoustic_model.export_model(self.working_directory) self.acoustic_model.log_details(self.logger) @@ -1042,10 +1043,14 @@ def setup(self): self.logger.info("Skipping acoustic feature generation") else: self.generate_features() - - if self.test_transcriptions: + self.calculate_oovs_found() + if not self.ignore_acoustics and self.test_transcriptions: self.initialize_utt_fsts() + else: + self.logger.info("Skipping transcription testing") + self.initialized = True + self.logger.info("Finished initializing!") except Exception as e: if isinstance(e, KaldiProcessingError): log_kaldi_errors(e.error_logs, self.logger) diff --git a/rtd_environment.yml b/rtd_environment.yml index 9803d6dc..bf510635 100644 --- a/rtd_environment.yml +++ b/rtd_environment.yml @@ -2,7 +2,7 @@ name: mfa channels: - conda-forge dependencies: - - python>=3.9 + - python>=3.8 - numpy - librosa - tqdm @@ -14,7 +14,7 @@ dependencies: - pip: - sphinxemoji - sphinxcontrib-autoprogram - - git+https://github.com/pydata/pydata-sphinx-theme.git + - pydata-sphinx-theme - sphinx-panels - interrogate - sphinx diff --git a/tests/test_alignment_pretrained.py b/tests/test_alignment_pretrained.py index 5150ac16..e4eb53c2 100644 --- a/tests/test_alignment_pretrained.py +++ b/tests/test_alignment_pretrained.py @@ -20,5 +20,8 @@ def test_align_sick( export_directory = os.path.join(temp_dir, "test_align_export") shutil.rmtree(export_directory, ignore_errors=True) os.makedirs(export_directory, exist_ok=True) + assert "AY_S" not in a.phone_mapping + assert "AY_S" not in a.default_dictionary.phone_mapping + assert "AY_S" not in a.default_dictionary.reversed_phone_mapping.values() a.export_files(export_directory) assert os.path.exists(os.path.join(export_directory, "michael", "acoustic_corpus.TextGrid")) diff --git a/tests/test_commandline_adapt.py b/tests/test_commandline_adapt.py index 322f7c43..4584acd6 100644 --- a/tests/test_commandline_adapt.py +++ b/tests/test_commandline_adapt.py @@ -58,6 +58,5 @@ def test_adapt_multilingual( "--debug", ] args, unknown = parser.parse_known_args(command) - print(args) run_adapt_model(args, unknown) assert os.path.exists(adapted_model_path) diff --git a/tests/test_corpus.py b/tests/test_corpus.py index 69f7219d..c01a5d39 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -3,7 +3,10 @@ import pytest -from montreal_forced_aligner.corpus.acoustic_corpus import AcousticCorpus +from montreal_forced_aligner.corpus.acoustic_corpus import ( + AcousticCorpus, + AcousticCorpusWithPronunciations, +) from montreal_forced_aligner.corpus.classes import File, Speaker, Utterance from montreal_forced_aligner.corpus.helper import get_wav_info from montreal_forced_aligner.corpus.text_corpus import TextCorpus @@ -21,7 +24,7 @@ def test_mp3(mp3_test_path): def test_speaker_word_set( multilingual_ipa_tg_corpus_dir, multispeaker_dictionary_config_path, temp_dir ): - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=multilingual_ipa_tg_corpus_dir, dictionary_path=multispeaker_dictionary_config_path, temporary_directory=temp_dir, @@ -41,7 +44,6 @@ def test_add(basic_corpus_dir, sick_dict_path, generated_dir): shutil.rmtree(output_directory, ignore_errors=True) corpus = AcousticCorpus( corpus_directory=basic_corpus_dir, - dictionary_path=sick_dict_path, use_mp=True, temporary_directory=output_directory, ) @@ -68,14 +70,14 @@ def test_basic(basic_dict_path, basic_corpus_dir, generated_dir): output_directory = os.path.join(generated_dir, "corpus_tests") if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=basic_corpus_dir, dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) corpus.load_corpus() - for speaker in corpus.speakers.values(): + for speaker in corpus.speakers: data = speaker.dictionary.data() assert speaker.dictionary.silence_phones == data.silence_phones assert speaker.dictionary.multilingual_ipa == data.multilingual_ipa @@ -93,7 +95,6 @@ def test_basic_txt(basic_corpus_txt_dir, basic_dict_path, generated_dir): shutil.rmtree(output_directory, ignore_errors=True) corpus = AcousticCorpus( corpus_directory=basic_corpus_txt_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) @@ -110,7 +111,6 @@ def test_acoustic_from_temp(basic_corpus_txt_dir, basic_dict_path, generated_dir shutil.rmtree(output_directory, ignore_errors=True) corpus = AcousticCorpus( corpus_directory=basic_corpus_txt_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) @@ -120,7 +120,6 @@ def test_acoustic_from_temp(basic_corpus_txt_dir, basic_dict_path, generated_dir new_corpus = AcousticCorpus( corpus_directory=basic_corpus_txt_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) @@ -148,7 +147,7 @@ def test_extra(sick_dict, extra_corpus_dir, generated_dir): if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=extra_corpus_dir, dictionary_path=sick_dict, use_mp=False, @@ -168,7 +167,6 @@ def test_stereo(basic_dict_path, stereo_corpus_dir, generated_dir): corpus = AcousticCorpus( corpus_directory=stereo_corpus_dir, - dictionary_path=basic_dict_path, use_mp=False, num_jobs=1, temporary_directory=output_directory, @@ -186,7 +184,6 @@ def test_stereo_short_tg(basic_dict_path, stereo_corpus_short_tg_dir, generated_ corpus = AcousticCorpus( corpus_directory=stereo_corpus_short_tg_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) @@ -203,7 +200,6 @@ def test_flac(basic_dict_path, flac_corpus_dir, generated_dir): corpus = AcousticCorpus( corpus_directory=flac_corpus_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) @@ -220,7 +216,6 @@ def test_audio_directory(basic_dict_path, basic_split_dir, generated_dir): corpus = AcousticCorpus( corpus_directory=text_dir, - dictionary_path=basic_dict_path, use_mp=False, audio_directory=audio_dir, temporary_directory=output_directory, @@ -234,7 +229,6 @@ def test_audio_directory(basic_dict_path, basic_split_dir, generated_dir): shutil.rmtree(output_directory, ignore_errors=True) corpus = AcousticCorpus( corpus_directory=text_dir, - dictionary_path=basic_dict_path, use_mp=True, audio_directory=audio_dir, temporary_directory=output_directory, @@ -252,7 +246,6 @@ def test_flac_mp(basic_dict_path, flac_corpus_dir, generated_dir): corpus = AcousticCorpus( corpus_directory=flac_corpus_dir, - dictionary_path=basic_dict_path, use_mp=True, temporary_directory=output_directory, ) @@ -269,7 +262,6 @@ def test_flac_tg(basic_dict_path, flac_tg_corpus_dir, generated_dir): corpus = AcousticCorpus( corpus_directory=flac_tg_corpus_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) @@ -286,7 +278,6 @@ def test_flac_tg_mp(basic_dict_path, flac_tg_corpus_dir, generated_dir): corpus = AcousticCorpus( corpus_directory=flac_tg_corpus_dir, - dictionary_path=basic_dict_path, use_mp=True, temporary_directory=output_directory, ) @@ -303,7 +294,6 @@ def test_24bit_wav(transcribe_corpus_24bit_dir, basic_dict_path, generated_dir): corpus = AcousticCorpus( corpus_directory=transcribe_corpus_24bit_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) @@ -313,23 +303,22 @@ def test_24bit_wav(transcribe_corpus_24bit_dir, basic_dict_path, generated_dir): assert len(corpus.files) > 0 -def test_short_segments(basic_dict_path, shortsegments_corpus_dir, generated_dir): +def test_short_segments(shortsegments_corpus_dir, generated_dir): output_directory = os.path.join(generated_dir, "corpus_tests") if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) corpus = AcousticCorpus( corpus_directory=shortsegments_corpus_dir, - dictionary_path=basic_dict_path, use_mp=False, temporary_directory=output_directory, ) corpus.load_corpus() assert len(corpus.utterances) == 3 - assert len([x for x in corpus.utterances.values() if not x.ignored]) == 1 - assert len([x for x in corpus.utterances.values() if x.features is not None]) == 1 - assert len([x for x in corpus.utterances.values() if x.ignored]) == 2 - assert len([x for x in corpus.utterances.values() if x.features is None]) == 2 + assert len([x for x in corpus.utterances if not x.ignored]) == 1 + assert len([x for x in corpus.utterances if x.features is not None]) == 1 + assert len([x for x in corpus.utterances if x.ignored]) == 2 + assert len([x for x in corpus.utterances if x.features is None]) == 2 def test_speaker_groupings(multilingual_ipa_corpus_dir, generated_dir, english_us_ipa_dictionary): @@ -337,7 +326,7 @@ def test_speaker_groupings(multilingual_ipa_corpus_dir, generated_dir, english_u if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=multilingual_ipa_corpus_dir, dictionary_path=english_us_ipa_dictionary, use_mp=True, @@ -353,7 +342,7 @@ def test_speaker_groupings(multilingual_ipa_corpus_dir, generated_dir, english_u assert name in corpus.files shutil.rmtree(output_directory, ignore_errors=True) - new_corpus = AcousticCorpus( + new_corpus = AcousticCorpusWithPronunciations( corpus_directory=multilingual_ipa_corpus_dir, dictionary_path=english_us_ipa_dictionary, num_jobs=1, @@ -376,7 +365,6 @@ def test_subset(multilingual_ipa_corpus_dir, generated_dir, english_us_ipa_dicti corpus = AcousticCorpus( corpus_directory=multilingual_ipa_corpus_dir, - dictionary_path=english_us_ipa_dictionary, use_mp=True, temporary_directory=output_directory, ) @@ -393,7 +381,7 @@ def test_weird_words(weird_words_dir, generated_dir, sick_dict_path): if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=weird_words_dir, dictionary_path=sick_dict_path, use_mp=True, @@ -424,7 +412,7 @@ def test_punctuated(punctuated_dir, generated_dir, sick_dict_path): if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=punctuated_dir, dictionary_path=sick_dict_path, use_mp=True, @@ -445,11 +433,11 @@ def test_alternate_punctuation( output_directory = os.path.join(generated_dir, "corpus_tests") if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) - params = AcousticCorpus.extract_relevant_parameters( + params, skipped = AcousticCorpusWithPronunciations.extract_relevant_parameters( TrainableAligner.parse_parameters(different_punctuation_config_path) ) params["use_mp"] = True - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=punctuated_dir, dictionary_path=sick_dict_path, temporary_directory=output_directory, @@ -470,11 +458,11 @@ def test_xsampa_corpus( output_directory = os.path.join(generated_dir, "corpus_tests") if os.path.exists(output_directory): shutil.rmtree(output_directory, ignore_errors=True) - params = AcousticCorpus.extract_relevant_parameters( + params, skipped = AcousticCorpusWithPronunciations.extract_relevant_parameters( TrainableAligner.parse_parameters(different_punctuation_config_path) ) params["use_mp"] = True - corpus = AcousticCorpus( + corpus = AcousticCorpusWithPronunciations( corpus_directory=xsampa_corpus_dir, dictionary_path=xsampa_dict_path, temporary_directory=output_directory, diff --git a/tests/test_dict.py b/tests/test_dict.py index bdb4b94d..538aa799 100644 --- a/tests/test_dict.py +++ b/tests/test_dict.py @@ -14,7 +14,7 @@ def test_basic(basic_dict_path, generated_dir): assert dictionary assert len(dictionary) > 0 - assert set(dictionary.phones) == {"sil", "sp", "spn", "phonea", "phoneb", "phonec"} + assert set(dictionary.phones) == {"sil", "noi", "spn", "phonea", "phoneb", "phonec"} assert set(dictionary.kaldi_non_silence_phones) == { "phonea_B", "phonea_I", @@ -52,7 +52,7 @@ def test_basic_noposition(basic_dict_path, generated_dir): dictionary.dictionary_setup() dictionary.write_lexicon_information() d = dictionary.default_dictionary - assert set(d.phones) == {"sil", "sp", "spn", "phonea", "phoneb", "phonec"} + assert set(d.phones) == {"sil", "noi", "spn", "phonea", "phoneb", "phonec"} def test_frclitics(frclitics_dict_path, generated_dir): @@ -120,6 +120,15 @@ def test_english_clitics(english_dictionary, generated_dir): dictionary.dictionary_setup() dictionary.write_lexicon_information() d = dictionary.default_dictionary + assert d.dictionary_model.phone_set_type == "ARPA" + assert d.extra_questions_mapping + for k, v in d.extra_questions_mapping.items(): + print(k) + print(v) + assert len(v) == len(set(v)) + assert all(x.endswith("0") for x in d.extra_questions_mapping["stress_0"]) + assert all(x.endswith("1") for x in d.extra_questions_mapping["stress_1"]) + assert all(x.endswith("2") for x in d.extra_questions_mapping["stress_2"]) assert d.split_clitics("l'orme's") == ["l'", "orme's"] assert d.to_int("l'orme's") == [d.words_mapping["l'"], d.words_mapping["orme's"]] diff --git a/tests/test_gui.py b/tests/test_gui.py index 7180398d..066f036d 100644 --- a/tests/test_gui.py +++ b/tests/test_gui.py @@ -4,14 +4,12 @@ def test_save_text_lab( - basic_dict_path, basic_corpus_dir, generated_dir, ): - output_directory = os.path.join(generated_dir, "corpus_tests") + output_directory = os.path.join(generated_dir, "gui_tests") corpus = AcousticCorpus( corpus_directory=basic_corpus_dir, - dictionary_path=basic_dict_path, use_mp=True, temporary_directory=output_directory, ) @@ -19,11 +17,28 @@ def test_save_text_lab( corpus.files["acoustic_corpus"].save() -def test_flac_tg(basic_dict_path, flac_tg_corpus_dir, generated_dir): - output_directory = os.path.join(generated_dir, "corpus_tests") +def test_file_properties( + stereo_corpus_dir, + generated_dir, +): + output_directory = os.path.join(generated_dir, "gui_tests") + corpus = AcousticCorpus( + corpus_directory=stereo_corpus_dir, + use_mp=True, + temporary_directory=output_directory, + ) + corpus._load_corpus() + assert corpus.files["michaelandsickmichael"].num_channels == 2 + assert corpus.files["michaelandsickmichael"].num_speakers == 2 + assert corpus.files["michaelandsickmichael"].num_utterances == 7 + x, y = corpus.files["michaelandsickmichael"].normalized_waveform() + assert y.shape[0] == 2 + + +def test_flac_tg(flac_tg_corpus_dir, generated_dir): + output_directory = os.path.join(generated_dir, "gui_tests") corpus = AcousticCorpus( corpus_directory=flac_tg_corpus_dir, - dictionary_path=basic_dict_path, use_mp=True, temporary_directory=output_directory, ) diff --git a/tox.ini b/tox.ini index be0c2016..1db35516 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py39-{win,unix},coverage,lint,check-formatting,manifest +envlist = py38-{win,unix},coverage,lint,check-formatting,manifest minversion = 3.18.0 requires = tox-conda isolated_build = true @@ -36,12 +36,12 @@ commands = coverage xml coverage html depends = - py39-{win,unix} + py38-{win,unix} ; This env just runs `black` and fails tox if it's not formatted correctly. ; If this env fails on CI, run `tox -e format` locally in order to apply changes. [testenv:check-formatting] -basepython = python3.9 +basepython = python3.8 deps = black==21.8b0 skip_install = true commands = @@ -49,7 +49,7 @@ commands = [testenv:pkg_meta] description = check that the long description is valid -basepython = python3.9 +basepython = python3.8 skip_install = true deps = build>=0.0.4 @@ -65,7 +65,7 @@ ignore = E203 W503 [testenv:docs] -basepython = python3.9 +basepython = python3.8 skip_install=true conda_env = rtd_environment.yml commands = @@ -73,13 +73,13 @@ commands = sphinx-build -v -E -a -n -T -b html docs/source docs/build [testenv:manifest] -basepython = python3.9 +basepython = python3.8 deps = check-manifest skip_install = true commands = check-manifest [testenv:format] -basepython = python3.9 +basepython = python3.8 deps = black==21.8b0 skip_install = true commands = @@ -87,7 +87,7 @@ commands = [gh-actions] python = - 3.9: py39-unix,coverage + 3.8: py38-unix,coverage [testenv:dev] description = dev environment with all deps at {envdir}