diff --git a/docs/source/changelog/changelog_2.0.rst b/docs/source/changelog/changelog_2.0.rst index 1f7046e0..fd00d2d3 100644 --- a/docs/source/changelog/changelog_2.0.rst +++ b/docs/source/changelog/changelog_2.0.rst @@ -10,6 +10,14 @@ Beta releases ============= +2.0.0rc3 +-------- +- Fixed a bug where textgrids weren't being properly generated following training +- Fixed a bug where commands were not always respecting ``--overwrite`` +- Fixed a bug where not all words in multispeaker dictionaries would be parsed +- Improved transcription accuracy calculation to account for compounds and clitics +- Fixed a crash when subsetting corpora that did not all have transcriptions + 2.0.0rc2 -------- - Added configuration parameter (``ignore_case=False``) to allow for disabling the default behavior of making all text and lexicon entries lower case diff --git a/montreal_forced_aligner/abc.py b/montreal_forced_aligner/abc.py index 50560b53..69002c66 100644 --- a/montreal_forced_aligner/abc.py +++ b/montreal_forced_aligner/abc.py @@ -690,9 +690,9 @@ def log_error(self, message: str) -> None: self.logger.error(message) -class ModelExporterMixin(metaclass=abc.ABCMeta): +class ExporterMixin(metaclass=abc.ABCMeta): """ - Abstract mixin class for exporting MFA models + Abstract mixin class for exporting any kind of file Parameters ---------- @@ -704,6 +704,12 @@ def __init__(self, overwrite: bool = False, **kwargs): self.overwrite = overwrite super().__init__(**kwargs) + +class ModelExporterMixin(ExporterMixin, metaclass=abc.ABCMeta): + """ + Abstract mixin class for exporting MFA models + """ + @property @abc.abstractmethod def meta(self) -> MetaDict: @@ -723,19 +729,17 @@ def export_model(self, output_model_path: str) -> None: ... -class FileExporterMixin(metaclass=abc.ABCMeta): +class FileExporterMixin(ExporterMixin, metaclass=abc.ABCMeta): """ Abstract mixin class for exporting TextGrid and text files Parameters ---------- - overwrite: bool - Flag for whether to overwrite files if they already exist - + cleanup_textgrids: bool + Flag for whether to clean up exported TextGrids """ - def __init__(self, overwrite: bool = False, cleanup_textgrids: bool = True, **kwargs): - self.overwrite = overwrite + def __init__(self, cleanup_textgrids: bool = True, **kwargs): self.cleanup_textgrids = cleanup_textgrids super().__init__(**kwargs) diff --git a/montreal_forced_aligner/acoustic_modeling/trainer.py b/montreal_forced_aligner/acoustic_modeling/trainer.py index 28beef15..ce547445 100644 --- a/montreal_forced_aligner/acoustic_modeling/trainer.py +++ b/montreal_forced_aligner/acoustic_modeling/trainer.py @@ -308,6 +308,7 @@ def train(self, generate_final_alignments: bool = True) -> None: previous.exported_model_path, self.working_directory ) self.align() + self.collect_alignments() @property def num_utterances(self) -> int: @@ -347,7 +348,6 @@ def align(self) -> None: f"Analyzing alignment diagnostics for {self.current_aligner.identifier} on the full corpus" ) self.compile_information() - self.collect_alignments() with open(done_path, "w"): pass except Exception as e: diff --git a/montreal_forced_aligner/alignment/base.py b/montreal_forced_aligner/alignment/base.py index ad1c7480..072c3674 100644 --- a/montreal_forced_aligner/alignment/base.py +++ b/montreal_forced_aligner/alignment/base.py @@ -224,11 +224,13 @@ def export_textgrids(self) -> None: Job method for TextGrid export """ begin = time.time() - self.logger.info("Exporting TextGrids...") - os.makedirs(self.export_output_directory, exist_ok=True) - if self.backup_output_directory: - os.makedirs(self.backup_output_directory, exist_ok=True) + export_directory = self.export_output_directory + if os.path.exists(export_directory) and not self.overwrite: + export_directory = self.backup_output_directory + self.log_debug(f"Not overwriting existing directory, exporting to {export_directory}") + self.logger.info(f"Exporting TextGrids to {export_directory}...") + os.makedirs(export_directory, exist_ok=True) export_errors = {} total_files = len(self.files) with tqdm.tqdm(total=total_files) as pbar: @@ -255,9 +257,7 @@ def export_textgrids(self) -> None: try: for file in self.files: tiers = file.aligned_data - output_path = file.construct_output_path( - self.export_output_directory, self.backup_output_directory - ) + output_path = file.construct_output_path(export_directory) duration = file.duration for_write_queue.put((tiers, output_path, duration)) pbar.update(1) @@ -276,24 +276,18 @@ def export_textgrids(self) -> None: for file in self.files: data = file.aligned_data - backup_output_directory = None - if not self.overwrite: - backup_output_directory = self.backup_output_directory - os.makedirs(backup_output_directory, exist_ok=True) - output_path = file.construct_output_path( - self.export_output_directory, backup_output_directory - ) + output_path = file.construct_output_path(export_directory) export_textgrid(data, output_path, file.duration, self.frame_shift) pbar.update(1) if export_errors: self.logger.warning( f"There were {len(export_errors)} errors encountered in generating TextGrids. " - f"Check the output_errors.txt file in {os.path.join(self.export_output_directory)} " + f"Check {os.path.join(export_directory, 'output_errors.txt')} " f"for more details" ) - output_textgrid_writing_errors(self.export_output_directory, export_errors) - self.logger.info("Finished exporting TextGrids!") + output_textgrid_writing_errors(export_directory, export_errors) + self.logger.info(f"Finished exporting TextGrids to {export_directory}!") self.logger.debug(f"Exported TextGrids in a total of {time.time() - begin} seconds") def export_files(self, output_directory: str) -> None: diff --git a/montreal_forced_aligner/command_line/train_acoustic_model.py b/montreal_forced_aligner/command_line/train_acoustic_model.py index 39bf6050..eda78f57 100644 --- a/montreal_forced_aligner/command_line/train_acoustic_model.py +++ b/montreal_forced_aligner/command_line/train_acoustic_model.py @@ -32,7 +32,6 @@ def train_acoustic_model(args: Namespace, unknown_args: Optional[List[str]] = No temporary_directory=args.temporary_directory, **TrainableAligner.parse_parameters(args.config_path, args, unknown_args), ) - try: generate_final_alignments = True if args.output_directory is None: diff --git a/montreal_forced_aligner/corpus/base.py b/montreal_forced_aligner/corpus/base.py index de9c96cd..9fcb2201 100644 --- a/montreal_forced_aligner/corpus/base.py +++ b/montreal_forced_aligner/corpus/base.py @@ -319,7 +319,7 @@ def create_subset(self, subset: int) -> None: if larger_subset_num < len(self.utterances): # Get all shorter utterances that are not one word long utts = sorted( - (utt for utt in self.utterances if " " in utt.text), + (utt for utt in self.utterances if utt.text and " " in utt.text), key=lambda x: x.duration, ) larger_subset = utts[:larger_subset_num] diff --git a/montreal_forced_aligner/corpus/classes.py b/montreal_forced_aligner/corpus/classes.py index 01aea2fa..e83d1074 100644 --- a/montreal_forced_aligner/corpus/classes.py +++ b/montreal_forced_aligner/corpus/classes.py @@ -396,7 +396,6 @@ def __repr__(self) -> str: def save( self, output_directory: Optional[str] = None, - backup_output_directory: Optional[str] = None, text_type: Optional[TextFileType] = None, save_transcription: bool = False, ) -> None: @@ -431,9 +430,7 @@ def save( return elif utterance_count == 0: return - output_path = self.construct_output_path( - output_directory, backup_output_directory, enforce_lab=True - ) + output_path = self.construct_output_path(output_directory, enforce_lab=True) with open(output_path, "w", encoding="utf8") as f: for u in self.utterances: if save_transcription: @@ -442,7 +439,7 @@ def save( f.write(u.text) return elif text_type == TextFileType.TEXTGRID: - output_path = self.construct_output_path(output_directory, backup_output_directory) + output_path = self.construct_output_path(output_directory) max_time = self.duration tiers = {} for speaker in self.speaker_ordering: @@ -556,7 +553,6 @@ def clean_up(self) -> None: def construct_output_path( self, output_directory: Optional[str] = None, - backup_output_directory: Optional[str] = None, enforce_lab: bool = False, ) -> str: """ @@ -566,8 +562,6 @@ def construct_output_path( ---------- output_directory: str, optional Directory to output to, if None, it will overwrite the original file - backup_output_directory: str, optional - Backup directory to write to in order to avoid overwriting an existing file enforce_lab: bool Flag for whether to enforce generating a lab file over a TextGrid @@ -589,8 +583,6 @@ def construct_output_path( else: relative = output_directory tg_path = os.path.join(relative, self._name + extension) - if backup_output_directory is not None and os.path.exists(tg_path): - tg_path = tg_path.replace(output_directory, backup_output_directory) os.makedirs(os.path.dirname(tg_path), exist_ok=True) return tg_path @@ -1037,8 +1029,8 @@ def add_word_intervals(self, intervals: Union[CtmInterval, List[CtmInterval]]) - intervals = [intervals] if self.word_labels is None: self.word_labels = [] - for interval in intervals: - if self.begin is not None: + if self.is_segment: + for interval in intervals: interval.shift_times(self.begin) self.word_labels = intervals @@ -1055,8 +1047,8 @@ def add_phone_intervals(self, intervals: Union[CtmInterval, List[CtmInterval]]) intervals = [intervals] if self.phone_labels is None: self.phone_labels = [] - for interval in intervals: - if self.begin is not None: + if self.is_segment: + for interval in intervals: interval.shift_times(self.begin) self.phone_labels = intervals diff --git a/montreal_forced_aligner/dictionary/mixins.py b/montreal_forced_aligner/dictionary/mixins.py index 3e900290..c45632b3 100644 --- a/montreal_forced_aligner/dictionary/mixins.py +++ b/montreal_forced_aligner/dictionary/mixins.py @@ -338,6 +338,14 @@ def base_phones(self) -> Dict[str, Set[str]]: return base_phones + @property + def split_regex(self) -> re.Pattern: + """Pattern for splitting arbitrary text""" + markers = self.compound_markers + if "-" in markers: + markers = ["-"] + [x for x in self.compound_markers if x != "-"] + return re.compile(rf'[{"".join(markers)} ]') + @property def extra_questions_mapping(self) -> Dict[str, List[str]]: """Mapping of extra questions for the given phone set type""" diff --git a/montreal_forced_aligner/dictionary/multispeaker.py b/montreal_forced_aligner/dictionary/multispeaker.py index b4f82441..4e8fd4c0 100644 --- a/montreal_forced_aligner/dictionary/multispeaker.py +++ b/montreal_forced_aligner/dictionary/multispeaker.py @@ -141,14 +141,22 @@ def dictionary_setup(self): auto_set = {PhoneSetType.AUTO, PhoneSetType.UNKNOWN, "AUTO", "UNKNOWN"} if not isinstance(self.phone_set_type, PhoneSetType): self.phone_set_type = PhoneSetType[self.phone_set_type] + + options = self.dictionary_options + pretrained = False + if self.non_silence_phones: + pretrained = True + for speaker, dictionary in self.dictionary_model.load_dictionary_paths().items(): self.speaker_mapping[speaker] = dictionary.name if dictionary.name not in self.dictionary_mapping: + if not pretrained: + options["non_silence_phones"] = set() self.dictionary_mapping[dictionary.name] = PronunciationDictionary( dictionary_path=dictionary.path, temporary_directory=self.dictionary_output_directory, root_dictionary=self, - **self.dictionary_options, + **options, ) if self.phone_set_type not in auto_set: if ( @@ -161,15 +169,14 @@ def dictionary_setup(self): else: self.phone_set_type = self.dictionary_mapping[dictionary.name].phone_set_type - self.non_silence_phones.update( - self.dictionary_mapping[dictionary.name].non_silence_phones - ) self.excluded_phones.update( self.dictionary_mapping[dictionary.name].excluded_phones ) self.excluded_pronunciation_count += self.dictionary_mapping[ dictionary.name ].excluded_pronunciation_count + for dictionary in self.dictionary_mapping.values(): + self.non_silence_phones.update(dictionary.non_silence_phones) for dictionary in self.dictionary_mapping.values(): dictionary.non_silence_phones = self.non_silence_phones diff --git a/montreal_forced_aligner/language_modeling/trainer.py b/montreal_forced_aligner/language_modeling/trainer.py index eddc399f..e3570a0e 100644 --- a/montreal_forced_aligner/language_modeling/trainer.py +++ b/montreal_forced_aligner/language_modeling/trainer.py @@ -331,7 +331,7 @@ def normalized_text_iter(self, min_count: int = 1) -> Generator: unk_words = {k for k, v in self.word_counts.items() if v <= min_count} for u in self.utterances: normalized = u.normalized_text - if normalized: + if not normalized: normalized = u.text.split() yield " ".join(x if x not in unk_words else self.oov_word for x in normalized) diff --git a/montreal_forced_aligner/segmenter.py b/montreal_forced_aligner/segmenter.py index b6b1bd2f..d4367606 100644 --- a/montreal_forced_aligner/segmenter.py +++ b/montreal_forced_aligner/segmenter.py @@ -425,9 +425,8 @@ def export_files(self, output_directory: str) -> None: output_directory: str Directory to save segmentation TextGrids """ - backup_output_directory = None - if not self.overwrite: - backup_output_directory = os.path.join(self.working_directory, "transcriptions") - os.makedirs(backup_output_directory, exist_ok=True) + if not self.overwrite and os.path.exists(output_directory): + output_directory = os.path.join(self.working_directory, "transcriptions") + os.makedirs(output_directory, exist_ok=True) for f in self.files: - f.save(output_directory, backup_output_directory, text_type=TextFileType.TEXTGRID) + f.save(output_directory, text_type=TextFileType.TEXTGRID) diff --git a/montreal_forced_aligner/speaker_classifier.py b/montreal_forced_aligner/speaker_classifier.py index 5cc61060..29cd7c60 100644 --- a/montreal_forced_aligner/speaker_classifier.py +++ b/montreal_forced_aligner/speaker_classifier.py @@ -178,10 +178,9 @@ def export_files(self, output_directory: str) -> None: output_directory: str Output directory to save files """ - backup_output_directory = None - if not self.overwrite: - backup_output_directory = os.path.join(self.working_directory, "output") - os.makedirs(backup_output_directory, exist_ok=True) + if not self.overwrite and os.path.exists(output_directory): + output_directory = os.path.join(self.working_directory, "transcriptions") + os.makedirs(output_directory, exist_ok=True) for file in self.files: - file.save(output_directory, backup_output_directory) + file.save(output_directory) diff --git a/montreal_forced_aligner/textgrid.py b/montreal_forced_aligner/textgrid.py index 3ee0958e..614ee4b1 100644 --- a/montreal_forced_aligner/textgrid.py +++ b/montreal_forced_aligner/textgrid.py @@ -155,10 +155,12 @@ def export_textgrid( phone_tier = tgio.IntervalTier(phone_tier_name, [], minT=0, maxT=duration) tg.addTier(word_tier) tg.addTier(phone_tier) - + has_data = False for speaker, data in speaker_data.items(): words = data["words"] phones = data["phones"] + if len(words) and len(phones): + has_data = True tg_words = [] tg_phones = [] for w in words: @@ -180,5 +182,7 @@ def export_textgrid( phone_tier = tgio.IntervalTier(phone_tier_name, tg_phones, minT=0, maxT=duration) tg.replaceTier(word_tier_name, word_tier) tg.replaceTier(phone_tier_name, phone_tier) - - tg.save(output_path, includeBlankSpaces=True, format="long_textgrid", reportingMode="error") + if has_data: + tg.save( + output_path, includeBlankSpaces=True, format="long_textgrid", reportingMode="error" + ) diff --git a/montreal_forced_aligner/transcription/transcriber.py b/montreal_forced_aligner/transcription/transcriber.py index 59d6b2f2..c2d61fb1 100644 --- a/montreal_forced_aligner/transcription/transcriber.py +++ b/montreal_forced_aligner/transcription/transcriber.py @@ -9,6 +9,7 @@ import itertools import multiprocessing as mp import os +import re import shutil import subprocess import sys @@ -1347,7 +1348,8 @@ def evaluate(self): utt_name = utterance.name if not utterance.text: continue - g = utterance.text.split() + + g = self.split_regex.split(utterance.text) total_count += 1 total_word_length += len(g) @@ -1416,6 +1418,12 @@ def evaluate(self): def _load_transcripts(self): """Load transcripts from Kaldi temporary files""" + initial_clitics = { + x for x in self.clitic_set if re.match(rf"^.*[{''.join(self.clitic_markers)}]$", x) + } + final_clitics = { + x for x in self.clitic_set if re.match(rf"^[{''.join(self.clitic_markers)}].*$", x) + } for score_args in self.score_arguments(): for tra_path in score_args.tra_paths.values(): @@ -1431,7 +1439,13 @@ def _load_transcripts(self): continue transcription = [] for i in ints: - transcription.append(lookup[int(i)]) + w = lookup[int(i)] + if len(transcription) and ( + w in final_clitics or transcription[-1] in initial_clitics + ): + transcription[-1] += w + continue + transcription.append(w) utterance.transcription_text = " ".join(transcription) def export_files(self, output_directory: str) -> None: @@ -1443,15 +1457,14 @@ def export_files(self, output_directory: str) -> None: output_directory: str Directory to save transcriptions """ - backup_output_directory = None - if not self.overwrite: - backup_output_directory = os.path.join(self.working_directory, "transcriptions") - os.makedirs(backup_output_directory, exist_ok=True) + if not self.overwrite and os.path.exists(output_directory): + output_directory = os.path.join(self.working_directory, "transcriptions") + os.makedirs(output_directory, exist_ok=True) self._load_transcripts() for file in self.files: if len(file.utterances) == 0: self.logger.debug(f"Could not find any utterances for {file.name}") - file.save(output_directory, backup_output_directory, save_transcription=True) + file.save(output_directory, save_transcription=True) if self.evaluation_mode: shutil.copyfile( os.path.join(self.evaluation_directory, "transcription_evaluation.csv"), diff --git a/tests/test_alignment_pretrained.py b/tests/test_alignment_pretrained.py index d24b0d18..c4b4833c 100644 --- a/tests/test_alignment_pretrained.py +++ b/tests/test_alignment_pretrained.py @@ -20,7 +20,6 @@ def test_align_sick( a.align() export_directory = os.path.join(temp_dir, "test_align_export") shutil.rmtree(export_directory, ignore_errors=True) - os.makedirs(export_directory, exist_ok=True) assert "AY_S" not in a.phone_mapping assert "AY_S" not in a.default_dictionary.phone_mapping assert "AY_S" not in a.default_dictionary.reversed_phone_mapping.values() diff --git a/tests/test_commandline_align.py b/tests/test_commandline_align.py index 3a82c3e1..00f3ddc8 100644 --- a/tests/test_commandline_align.py +++ b/tests/test_commandline_align.py @@ -95,7 +95,7 @@ def test_align_basic( assert os.path.exists(align_temp_dir) backup_textgrid_dir = os.path.join(align_temp_dir, "textgrids") - assert not os.listdir(backup_textgrid_dir) + assert not os.path.exists(backup_textgrid_dir) command = [ "align",