Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* Better WER calculation for compounds and clitics

* Fix subset bug

* Fix LM training bug and multispeaker OOV issue

* Fixed bug with training textgrid export
  • Loading branch information
mmcauliffe committed Jan 20, 2022
1 parent f87c044 commit d5230fd
Show file tree
Hide file tree
Showing 16 changed files with 95 additions and 69 deletions.
8 changes: 8 additions & 0 deletions docs/source/changelog/changelog_2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
Beta releases
=============

2.0.0rc3
--------
- Fixed a bug where textgrids weren't being properly generated following training
- Fixed a bug where commands were not always respecting ``--overwrite``
- Fixed a bug where not all words in multispeaker dictionaries would be parsed
- Improved transcription accuracy calculation to account for compounds and clitics
- Fixed a crash when subsetting corpora that did not all have transcriptions

2.0.0rc2
--------
- Added configuration parameter (``ignore_case=False``) to allow for disabling the default behavior of making all text and lexicon entries lower case
Expand Down
20 changes: 12 additions & 8 deletions montreal_forced_aligner/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,9 +690,9 @@ def log_error(self, message: str) -> None:
self.logger.error(message)


class ModelExporterMixin(metaclass=abc.ABCMeta):
class ExporterMixin(metaclass=abc.ABCMeta):
"""
Abstract mixin class for exporting MFA models
Abstract mixin class for exporting any kind of file
Parameters
----------
Expand All @@ -704,6 +704,12 @@ def __init__(self, overwrite: bool = False, **kwargs):
self.overwrite = overwrite
super().__init__(**kwargs)


class ModelExporterMixin(ExporterMixin, metaclass=abc.ABCMeta):
"""
Abstract mixin class for exporting MFA models
"""

@property
@abc.abstractmethod
def meta(self) -> MetaDict:
Expand All @@ -723,19 +729,17 @@ def export_model(self, output_model_path: str) -> None:
...


class FileExporterMixin(metaclass=abc.ABCMeta):
class FileExporterMixin(ExporterMixin, metaclass=abc.ABCMeta):
"""
Abstract mixin class for exporting TextGrid and text files
Parameters
----------
overwrite: bool
Flag for whether to overwrite files if they already exist
cleanup_textgrids: bool
Flag for whether to clean up exported TextGrids
"""

def __init__(self, overwrite: bool = False, cleanup_textgrids: bool = True, **kwargs):
self.overwrite = overwrite
def __init__(self, cleanup_textgrids: bool = True, **kwargs):
self.cleanup_textgrids = cleanup_textgrids
super().__init__(**kwargs)

Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/acoustic_modeling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ def train(self, generate_final_alignments: bool = True) -> None:
previous.exported_model_path, self.working_directory
)
self.align()
self.collect_alignments()

@property
def num_utterances(self) -> int:
Expand Down Expand Up @@ -347,7 +348,6 @@ def align(self) -> None:
f"Analyzing alignment diagnostics for {self.current_aligner.identifier} on the full corpus"
)
self.compile_information()
self.collect_alignments()
with open(done_path, "w"):
pass
except Exception as e:
Expand Down
28 changes: 11 additions & 17 deletions montreal_forced_aligner/alignment/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,13 @@ def export_textgrids(self) -> None:
Job method for TextGrid export
"""
begin = time.time()
self.logger.info("Exporting TextGrids...")
os.makedirs(self.export_output_directory, exist_ok=True)
if self.backup_output_directory:
os.makedirs(self.backup_output_directory, exist_ok=True)
export_directory = self.export_output_directory
if os.path.exists(export_directory) and not self.overwrite:
export_directory = self.backup_output_directory
self.log_debug(f"Not overwriting existing directory, exporting to {export_directory}")

self.logger.info(f"Exporting TextGrids to {export_directory}...")
os.makedirs(export_directory, exist_ok=True)
export_errors = {}
total_files = len(self.files)
with tqdm.tqdm(total=total_files) as pbar:
Expand All @@ -255,9 +257,7 @@ def export_textgrids(self) -> None:
try:
for file in self.files:
tiers = file.aligned_data
output_path = file.construct_output_path(
self.export_output_directory, self.backup_output_directory
)
output_path = file.construct_output_path(export_directory)
duration = file.duration
for_write_queue.put((tiers, output_path, duration))
pbar.update(1)
Expand All @@ -276,24 +276,18 @@ def export_textgrids(self) -> None:
for file in self.files:
data = file.aligned_data

backup_output_directory = None
if not self.overwrite:
backup_output_directory = self.backup_output_directory
os.makedirs(backup_output_directory, exist_ok=True)
output_path = file.construct_output_path(
self.export_output_directory, backup_output_directory
)
output_path = file.construct_output_path(export_directory)
export_textgrid(data, output_path, file.duration, self.frame_shift)
pbar.update(1)

if export_errors:
self.logger.warning(
f"There were {len(export_errors)} errors encountered in generating TextGrids. "
f"Check the output_errors.txt file in {os.path.join(self.export_output_directory)} "
f"Check {os.path.join(export_directory, 'output_errors.txt')} "
f"for more details"
)
output_textgrid_writing_errors(self.export_output_directory, export_errors)
self.logger.info("Finished exporting TextGrids!")
output_textgrid_writing_errors(export_directory, export_errors)
self.logger.info(f"Finished exporting TextGrids to {export_directory}!")
self.logger.debug(f"Exported TextGrids in a total of {time.time() - begin} seconds")

def export_files(self, output_directory: str) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def train_acoustic_model(args: Namespace, unknown_args: Optional[List[str]] = No
temporary_directory=args.temporary_directory,
**TrainableAligner.parse_parameters(args.config_path, args, unknown_args),
)

try:
generate_final_alignments = True
if args.output_directory is None:
Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/corpus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def create_subset(self, subset: int) -> None:
if larger_subset_num < len(self.utterances):
# Get all shorter utterances that are not one word long
utts = sorted(
(utt for utt in self.utterances if " " in utt.text),
(utt for utt in self.utterances if utt.text and " " in utt.text),
key=lambda x: x.duration,
)
larger_subset = utts[:larger_subset_num]
Expand Down
20 changes: 6 additions & 14 deletions montreal_forced_aligner/corpus/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,6 @@ def __repr__(self) -> str:
def save(
self,
output_directory: Optional[str] = None,
backup_output_directory: Optional[str] = None,
text_type: Optional[TextFileType] = None,
save_transcription: bool = False,
) -> None:
Expand Down Expand Up @@ -431,9 +430,7 @@ def save(
return
elif utterance_count == 0:
return
output_path = self.construct_output_path(
output_directory, backup_output_directory, enforce_lab=True
)
output_path = self.construct_output_path(output_directory, enforce_lab=True)
with open(output_path, "w", encoding="utf8") as f:
for u in self.utterances:
if save_transcription:
Expand All @@ -442,7 +439,7 @@ def save(
f.write(u.text)
return
elif text_type == TextFileType.TEXTGRID:
output_path = self.construct_output_path(output_directory, backup_output_directory)
output_path = self.construct_output_path(output_directory)
max_time = self.duration
tiers = {}
for speaker in self.speaker_ordering:
Expand Down Expand Up @@ -556,7 +553,6 @@ def clean_up(self) -> None:
def construct_output_path(
self,
output_directory: Optional[str] = None,
backup_output_directory: Optional[str] = None,
enforce_lab: bool = False,
) -> str:
"""
Expand All @@ -566,8 +562,6 @@ def construct_output_path(
----------
output_directory: str, optional
Directory to output to, if None, it will overwrite the original file
backup_output_directory: str, optional
Backup directory to write to in order to avoid overwriting an existing file
enforce_lab: bool
Flag for whether to enforce generating a lab file over a TextGrid
Expand All @@ -589,8 +583,6 @@ def construct_output_path(
else:
relative = output_directory
tg_path = os.path.join(relative, self._name + extension)
if backup_output_directory is not None and os.path.exists(tg_path):
tg_path = tg_path.replace(output_directory, backup_output_directory)
os.makedirs(os.path.dirname(tg_path), exist_ok=True)
return tg_path

Expand Down Expand Up @@ -1037,8 +1029,8 @@ def add_word_intervals(self, intervals: Union[CtmInterval, List[CtmInterval]]) -
intervals = [intervals]
if self.word_labels is None:
self.word_labels = []
for interval in intervals:
if self.begin is not None:
if self.is_segment:
for interval in intervals:
interval.shift_times(self.begin)
self.word_labels = intervals

Expand All @@ -1055,8 +1047,8 @@ def add_phone_intervals(self, intervals: Union[CtmInterval, List[CtmInterval]])
intervals = [intervals]
if self.phone_labels is None:
self.phone_labels = []
for interval in intervals:
if self.begin is not None:
if self.is_segment:
for interval in intervals:
interval.shift_times(self.begin)
self.phone_labels = intervals

Expand Down
8 changes: 8 additions & 0 deletions montreal_forced_aligner/dictionary/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,14 @@ def base_phones(self) -> Dict[str, Set[str]]:

return base_phones

@property
def split_regex(self) -> re.Pattern:
"""Pattern for splitting arbitrary text"""
markers = self.compound_markers
if "-" in markers:
markers = ["-"] + [x for x in self.compound_markers if x != "-"]
return re.compile(rf'[{"".join(markers)} ]')

@property
def extra_questions_mapping(self) -> Dict[str, List[str]]:
"""Mapping of extra questions for the given phone set type"""
Expand Down
15 changes: 11 additions & 4 deletions montreal_forced_aligner/dictionary/multispeaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,22 @@ def dictionary_setup(self):
auto_set = {PhoneSetType.AUTO, PhoneSetType.UNKNOWN, "AUTO", "UNKNOWN"}
if not isinstance(self.phone_set_type, PhoneSetType):
self.phone_set_type = PhoneSetType[self.phone_set_type]

options = self.dictionary_options
pretrained = False
if self.non_silence_phones:
pretrained = True

for speaker, dictionary in self.dictionary_model.load_dictionary_paths().items():
self.speaker_mapping[speaker] = dictionary.name
if dictionary.name not in self.dictionary_mapping:
if not pretrained:
options["non_silence_phones"] = set()
self.dictionary_mapping[dictionary.name] = PronunciationDictionary(
dictionary_path=dictionary.path,
temporary_directory=self.dictionary_output_directory,
root_dictionary=self,
**self.dictionary_options,
**options,
)
if self.phone_set_type not in auto_set:
if (
Expand All @@ -161,15 +169,14 @@ def dictionary_setup(self):
else:
self.phone_set_type = self.dictionary_mapping[dictionary.name].phone_set_type

self.non_silence_phones.update(
self.dictionary_mapping[dictionary.name].non_silence_phones
)
self.excluded_phones.update(
self.dictionary_mapping[dictionary.name].excluded_phones
)
self.excluded_pronunciation_count += self.dictionary_mapping[
dictionary.name
].excluded_pronunciation_count
for dictionary in self.dictionary_mapping.values():
self.non_silence_phones.update(dictionary.non_silence_phones)
for dictionary in self.dictionary_mapping.values():
dictionary.non_silence_phones = self.non_silence_phones

Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/language_modeling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def normalized_text_iter(self, min_count: int = 1) -> Generator:
unk_words = {k for k, v in self.word_counts.items() if v <= min_count}
for u in self.utterances:
normalized = u.normalized_text
if normalized:
if not normalized:
normalized = u.text.split()
yield " ".join(x if x not in unk_words else self.oov_word for x in normalized)

Expand Down
9 changes: 4 additions & 5 deletions montreal_forced_aligner/segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,9 +425,8 @@ def export_files(self, output_directory: str) -> None:
output_directory: str
Directory to save segmentation TextGrids
"""
backup_output_directory = None
if not self.overwrite:
backup_output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(backup_output_directory, exist_ok=True)
if not self.overwrite and os.path.exists(output_directory):
output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(output_directory, exist_ok=True)
for f in self.files:
f.save(output_directory, backup_output_directory, text_type=TextFileType.TEXTGRID)
f.save(output_directory, text_type=TextFileType.TEXTGRID)
9 changes: 4 additions & 5 deletions montreal_forced_aligner/speaker_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,9 @@ def export_files(self, output_directory: str) -> None:
output_directory: str
Output directory to save files
"""
backup_output_directory = None
if not self.overwrite:
backup_output_directory = os.path.join(self.working_directory, "output")
os.makedirs(backup_output_directory, exist_ok=True)
if not self.overwrite and os.path.exists(output_directory):
output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(output_directory, exist_ok=True)

for file in self.files:
file.save(output_directory, backup_output_directory)
file.save(output_directory)
10 changes: 7 additions & 3 deletions montreal_forced_aligner/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,12 @@ def export_textgrid(
phone_tier = tgio.IntervalTier(phone_tier_name, [], minT=0, maxT=duration)
tg.addTier(word_tier)
tg.addTier(phone_tier)

has_data = False
for speaker, data in speaker_data.items():
words = data["words"]
phones = data["phones"]
if len(words) and len(phones):
has_data = True
tg_words = []
tg_phones = []
for w in words:
Expand All @@ -180,5 +182,7 @@ def export_textgrid(
phone_tier = tgio.IntervalTier(phone_tier_name, tg_phones, minT=0, maxT=duration)
tg.replaceTier(word_tier_name, word_tier)
tg.replaceTier(phone_tier_name, phone_tier)

tg.save(output_path, includeBlankSpaces=True, format="long_textgrid", reportingMode="error")
if has_data:
tg.save(
output_path, includeBlankSpaces=True, format="long_textgrid", reportingMode="error"
)
Loading

0 comments on commit d5230fd

Please sign in to comment.