Skip to content

Commit

Permalink
Fixed bug with training textgrid export
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe committed Jan 20, 2022
1 parent 8e95a69 commit fa7b68f
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 58 deletions.
8 changes: 8 additions & 0 deletions docs/source/changelog/changelog_2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
Beta releases
=============

2.0.0rc3
--------
- Fixed a bug where textgrids weren't being properly generated following training
- Fixed a bug where commands were not always respecting ``--overwrite``
- Fixed a bug where not all words in multispeaker dictionaries would be parsed
- Improved transcription accuracy calculation to account for compounds and clitics
- Fixed a crash when subsetting corpora that did not all have transcriptions

2.0.0rc2
--------
- Added configuration parameter (``ignore_case=False``) to allow for disabling the default behavior of making all text and lexicon entries lower case
Expand Down
20 changes: 12 additions & 8 deletions montreal_forced_aligner/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,9 +690,9 @@ def log_error(self, message: str) -> None:
self.logger.error(message)


class ModelExporterMixin(metaclass=abc.ABCMeta):
class ExporterMixin(metaclass=abc.ABCMeta):
"""
Abstract mixin class for exporting MFA models
Abstract mixin class for exporting any kind of file
Parameters
----------
Expand All @@ -704,6 +704,12 @@ def __init__(self, overwrite: bool = False, **kwargs):
self.overwrite = overwrite
super().__init__(**kwargs)


class ModelExporterMixin(ExporterMixin, metaclass=abc.ABCMeta):
"""
Abstract mixin class for exporting MFA models
"""

@property
@abc.abstractmethod
def meta(self) -> MetaDict:
Expand All @@ -723,19 +729,17 @@ def export_model(self, output_model_path: str) -> None:
...


class FileExporterMixin(metaclass=abc.ABCMeta):
class FileExporterMixin(ExporterMixin, metaclass=abc.ABCMeta):
"""
Abstract mixin class for exporting TextGrid and text files
Parameters
----------
overwrite: bool
Flag for whether to overwrite files if they already exist
cleanup_textgrids: bool
Flag for whether to clean up exported TextGrids
"""

def __init__(self, overwrite: bool = False, cleanup_textgrids: bool = True, **kwargs):
self.overwrite = overwrite
def __init__(self, cleanup_textgrids: bool = True, **kwargs):
self.cleanup_textgrids = cleanup_textgrids
super().__init__(**kwargs)

Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/acoustic_modeling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ def train(self, generate_final_alignments: bool = True) -> None:
previous.exported_model_path, self.working_directory
)
self.align()
self.collect_alignments()

@property
def num_utterances(self) -> int:
Expand Down Expand Up @@ -347,7 +348,6 @@ def align(self) -> None:
f"Analyzing alignment diagnostics for {self.current_aligner.identifier} on the full corpus"
)
self.compile_information()
self.collect_alignments()
with open(done_path, "w"):
pass
except Exception as e:
Expand Down
28 changes: 11 additions & 17 deletions montreal_forced_aligner/alignment/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,13 @@ def export_textgrids(self) -> None:
Job method for TextGrid export
"""
begin = time.time()
self.logger.info("Exporting TextGrids...")
os.makedirs(self.export_output_directory, exist_ok=True)
if self.backup_output_directory:
os.makedirs(self.backup_output_directory, exist_ok=True)
export_directory = self.export_output_directory
if os.path.exists(export_directory) and not self.overwrite:
export_directory = self.backup_output_directory
self.log_debug(f"Not overwriting existing directory, exporting to {export_directory}")

self.logger.info(f"Exporting TextGrids to {export_directory}...")
os.makedirs(export_directory, exist_ok=True)
export_errors = {}
total_files = len(self.files)
with tqdm.tqdm(total=total_files) as pbar:
Expand All @@ -255,9 +257,7 @@ def export_textgrids(self) -> None:
try:
for file in self.files:
tiers = file.aligned_data
output_path = file.construct_output_path(
self.export_output_directory, self.backup_output_directory
)
output_path = file.construct_output_path(export_directory)
duration = file.duration
for_write_queue.put((tiers, output_path, duration))
pbar.update(1)
Expand All @@ -276,24 +276,18 @@ def export_textgrids(self) -> None:
for file in self.files:
data = file.aligned_data

backup_output_directory = None
if not self.overwrite:
backup_output_directory = self.backup_output_directory
os.makedirs(backup_output_directory, exist_ok=True)
output_path = file.construct_output_path(
self.export_output_directory, backup_output_directory
)
output_path = file.construct_output_path(export_directory)
export_textgrid(data, output_path, file.duration, self.frame_shift)
pbar.update(1)

if export_errors:
self.logger.warning(
f"There were {len(export_errors)} errors encountered in generating TextGrids. "
f"Check the output_errors.txt file in {os.path.join(self.export_output_directory)} "
f"Check {os.path.join(export_directory, 'output_errors.txt')} "
f"for more details"
)
output_textgrid_writing_errors(self.export_output_directory, export_errors)
self.logger.info("Finished exporting TextGrids!")
output_textgrid_writing_errors(export_directory, export_errors)
self.logger.info(f"Finished exporting TextGrids to {export_directory}!")
self.logger.debug(f"Exported TextGrids in a total of {time.time() - begin} seconds")

def export_files(self, output_directory: str) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def train_acoustic_model(args: Namespace, unknown_args: Optional[List[str]] = No
temporary_directory=args.temporary_directory,
**TrainableAligner.parse_parameters(args.config_path, args, unknown_args),
)

try:
generate_final_alignments = True
if args.output_directory is None:
Expand Down
20 changes: 6 additions & 14 deletions montreal_forced_aligner/corpus/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,6 @@ def __repr__(self) -> str:
def save(
self,
output_directory: Optional[str] = None,
backup_output_directory: Optional[str] = None,
text_type: Optional[TextFileType] = None,
save_transcription: bool = False,
) -> None:
Expand Down Expand Up @@ -431,9 +430,7 @@ def save(
return
elif utterance_count == 0:
return
output_path = self.construct_output_path(
output_directory, backup_output_directory, enforce_lab=True
)
output_path = self.construct_output_path(output_directory, enforce_lab=True)
with open(output_path, "w", encoding="utf8") as f:
for u in self.utterances:
if save_transcription:
Expand All @@ -442,7 +439,7 @@ def save(
f.write(u.text)
return
elif text_type == TextFileType.TEXTGRID:
output_path = self.construct_output_path(output_directory, backup_output_directory)
output_path = self.construct_output_path(output_directory)
max_time = self.duration
tiers = {}
for speaker in self.speaker_ordering:
Expand Down Expand Up @@ -556,7 +553,6 @@ def clean_up(self) -> None:
def construct_output_path(
self,
output_directory: Optional[str] = None,
backup_output_directory: Optional[str] = None,
enforce_lab: bool = False,
) -> str:
"""
Expand All @@ -566,8 +562,6 @@ def construct_output_path(
----------
output_directory: str, optional
Directory to output to, if None, it will overwrite the original file
backup_output_directory: str, optional
Backup directory to write to in order to avoid overwriting an existing file
enforce_lab: bool
Flag for whether to enforce generating a lab file over a TextGrid
Expand All @@ -589,8 +583,6 @@ def construct_output_path(
else:
relative = output_directory
tg_path = os.path.join(relative, self._name + extension)
if backup_output_directory is not None and os.path.exists(tg_path):
tg_path = tg_path.replace(output_directory, backup_output_directory)
os.makedirs(os.path.dirname(tg_path), exist_ok=True)
return tg_path

Expand Down Expand Up @@ -1037,8 +1029,8 @@ def add_word_intervals(self, intervals: Union[CtmInterval, List[CtmInterval]]) -
intervals = [intervals]
if self.word_labels is None:
self.word_labels = []
for interval in intervals:
if self.begin is not None:
if self.is_segment:
for interval in intervals:
interval.shift_times(self.begin)
self.word_labels = intervals

Expand All @@ -1055,8 +1047,8 @@ def add_phone_intervals(self, intervals: Union[CtmInterval, List[CtmInterval]])
intervals = [intervals]
if self.phone_labels is None:
self.phone_labels = []
for interval in intervals:
if self.begin is not None:
if self.is_segment:
for interval in intervals:
interval.shift_times(self.begin)
self.phone_labels = intervals

Expand Down
9 changes: 4 additions & 5 deletions montreal_forced_aligner/segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,9 +425,8 @@ def export_files(self, output_directory: str) -> None:
output_directory: str
Directory to save segmentation TextGrids
"""
backup_output_directory = None
if not self.overwrite:
backup_output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(backup_output_directory, exist_ok=True)
if not self.overwrite and os.path.exists(output_directory):
output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(output_directory, exist_ok=True)
for f in self.files:
f.save(output_directory, backup_output_directory, text_type=TextFileType.TEXTGRID)
f.save(output_directory, text_type=TextFileType.TEXTGRID)
9 changes: 4 additions & 5 deletions montreal_forced_aligner/speaker_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,9 @@ def export_files(self, output_directory: str) -> None:
output_directory: str
Output directory to save files
"""
backup_output_directory = None
if not self.overwrite:
backup_output_directory = os.path.join(self.working_directory, "output")
os.makedirs(backup_output_directory, exist_ok=True)
if not self.overwrite and os.path.exists(output_directory):
output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(output_directory, exist_ok=True)

for file in self.files:
file.save(output_directory, backup_output_directory)
file.save(output_directory)
9 changes: 4 additions & 5 deletions montreal_forced_aligner/transcription/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,15 +1457,14 @@ def export_files(self, output_directory: str) -> None:
output_directory: str
Directory to save transcriptions
"""
backup_output_directory = None
if not self.overwrite:
backup_output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(backup_output_directory, exist_ok=True)
if not self.overwrite and os.path.exists(output_directory):
output_directory = os.path.join(self.working_directory, "transcriptions")
os.makedirs(output_directory, exist_ok=True)
self._load_transcripts()
for file in self.files:
if len(file.utterances) == 0:
self.logger.debug(f"Could not find any utterances for {file.name}")
file.save(output_directory, backup_output_directory, save_transcription=True)
file.save(output_directory, save_transcription=True)
if self.evaluation_mode:
shutil.copyfile(
os.path.join(self.evaluation_directory, "transcription_evaluation.csv"),
Expand Down
1 change: 0 additions & 1 deletion tests/test_alignment_pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def test_align_sick(
a.align()
export_directory = os.path.join(temp_dir, "test_align_export")
shutil.rmtree(export_directory, ignore_errors=True)
os.makedirs(export_directory, exist_ok=True)
assert "AY_S" not in a.phone_mapping
assert "AY_S" not in a.default_dictionary.phone_mapping
assert "AY_S" not in a.default_dictionary.reversed_phone_mapping.values()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_commandline_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_align_basic(
assert os.path.exists(align_temp_dir)

backup_textgrid_dir = os.path.join(align_temp_dir, "textgrids")
assert not os.listdir(backup_textgrid_dir)
assert not os.path.exists(backup_textgrid_dir)

command = [
"align",
Expand Down

0 comments on commit fa7b68f

Please sign in to comment.