Fix LM training bug and multispeaker OOV issue

MontrealCorpusTools · mmcauliffe · Jan 20, 2022 · Jan 16, 2022 · Jan 18, 2022 · Jan 19, 2022
commit 8e95a696ebba7b05e844da5fd46104b6c28584b0
diff --git a/montreal_forced_aligner/dictionary/multispeaker.py b/montreal_forced_aligner/dictionary/multispeaker.py
@@ -141,14 +141,22 @@ def dictionary_setup(self):
         auto_set = {PhoneSetType.AUTO, PhoneSetType.UNKNOWN, "AUTO", "UNKNOWN"}
         if not isinstance(self.phone_set_type, PhoneSetType):
             self.phone_set_type = PhoneSetType[self.phone_set_type]
+
+        options = self.dictionary_options
+        pretrained = False
+        if self.non_silence_phones:
+            pretrained = True
+
         for speaker, dictionary in self.dictionary_model.load_dictionary_paths().items():
             self.speaker_mapping[speaker] = dictionary.name
             if dictionary.name not in self.dictionary_mapping:
+                if not pretrained:
+                    options["non_silence_phones"] = set()
                 self.dictionary_mapping[dictionary.name] = PronunciationDictionary(
                     dictionary_path=dictionary.path,
                     temporary_directory=self.dictionary_output_directory,
                     root_dictionary=self,
-                    **self.dictionary_options,
+                    **options,
                 )
                 if self.phone_set_type not in auto_set:
                     if (
@@ -161,15 +169,14 @@ def dictionary_setup(self):
                 else:
                     self.phone_set_type = self.dictionary_mapping[dictionary.name].phone_set_type
 
-                self.non_silence_phones.update(
-                    self.dictionary_mapping[dictionary.name].non_silence_phones
-                )
                 self.excluded_phones.update(
                     self.dictionary_mapping[dictionary.name].excluded_phones
                 )
                 self.excluded_pronunciation_count += self.dictionary_mapping[
                     dictionary.name
                 ].excluded_pronunciation_count
+        for dictionary in self.dictionary_mapping.values():
+            self.non_silence_phones.update(dictionary.non_silence_phones)
         for dictionary in self.dictionary_mapping.values():
             dictionary.non_silence_phones = self.non_silence_phones
 

diff --git a/montreal_forced_aligner/language_modeling/trainer.py b/montreal_forced_aligner/language_modeling/trainer.py
@@ -331,7 +331,7 @@ def normalized_text_iter(self, min_count: int = 1) -> Generator:
         unk_words = {k for k, v in self.word_counts.items() if v <= min_count}
         for u in self.utterances:
             normalized = u.normalized_text
-            if normalized:
+            if not normalized:
                 normalized = u.text.split()
             yield " ".join(x if x not in unk_words else self.oov_word for x in normalized)
 

diff --git a/montreal_forced_aligner/textgrid.py b/montreal_forced_aligner/textgrid.py
@@ -155,10 +155,12 @@ def export_textgrid(
         phone_tier = tgio.IntervalTier(phone_tier_name, [], minT=0, maxT=duration)
         tg.addTier(word_tier)
         tg.addTier(phone_tier)
-
+    has_data = False
     for speaker, data in speaker_data.items():
         words = data["words"]
         phones = data["phones"]
+        if len(words) and len(phones):
+            has_data = True
         tg_words = []
         tg_phones = []
         for w in words:
@@ -180,5 +182,7 @@ def export_textgrid(
         phone_tier = tgio.IntervalTier(phone_tier_name, tg_phones, minT=0, maxT=duration)
         tg.replaceTier(word_tier_name, word_tier)
         tg.replaceTier(phone_tier_name, phone_tier)
-
-    tg.save(output_path, includeBlankSpaces=True, format="long_textgrid", reportingMode="error")
+    if has_data:
+        tg.save(
+            output_path, includeBlankSpaces=True, format="long_textgrid", reportingMode="error"
+        )