Skip to content

Commit

Permalink
Fix bugs from filtering short utterances
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe committed Feb 12, 2021
1 parent 16daa2e commit f069159
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 6 deletions.
6 changes: 4 additions & 2 deletions montreal_forced_aligner/corpus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def combine_feats(self):
self.ignored_utterances.append(utt)
run_filter = True
else:
self.utterance_lengths[line[0]] = int(line[1])
self.utterance_lengths[utt] = length
lengths_out_f.write(line + '\n')
if run_filter:
filtered = filter_scp(self.ignored_utterances, path, exclude=True)
Expand All @@ -456,13 +456,15 @@ def combine_feats(self):
continue
self.feat_mapping[f[0]] = f[1]
outf.write(line + '\n')
for utt in self.utt_speak_mapping.keys():
if utt not in self.feat_mapping and utt not in self.ignored_utterances:
self.ignored_utterances.append(utt)
if self.ignored_utterances:
for k, v in self.speak_utt_mapping.items():
self.speak_utt_mapping[k] = list(filter(lambda x: x in self.feat_mapping, v))
self.logger.warning('There were some utterances ignored due to short duration, see the log file for full '
'details or run `mfa validate` on the corpus.')
self.logger.debug('The following utterances were too short to run alignment: {}'.format(' ,'.join(self.ignored_utterances)))
self.figure_utterance_lengths()

def figure_utterance_lengths(self):
feat_path = os.path.join(self.output_directory, 'feats.scp')
Expand Down
1 change: 0 additions & 1 deletion montreal_forced_aligner/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def add_meta_file(self, dictionary, architecture):
def meta(self):
if not self._meta:
meta_path = os.path.join(self.dirname, 'meta.yaml')
print(meta_path)
if not os.path.exists(meta_path):
self._meta = {'version': '0.9.0',
'architecture': 'phonetisaurus'}
Expand Down
6 changes: 4 additions & 2 deletions tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,15 @@ def test_short_segments(basic_dict_path, shortsegments_corpus_dir, temp_dir, def
corpus = AlignableCorpus(shortsegments_corpus_dir, temp, use_mp=False)
corpus.initialize_corpus(dictionary)
default_feature_config.generate_features(corpus)
assert len(corpus.feat_mapping.keys()) == 2
assert len(corpus.feat_mapping.keys()) == 1
assert len(corpus.utt_speak_mapping.keys()) == 3
assert len(corpus.speak_utt_mapping.keys()) == 1
assert len(corpus.text_mapping.keys()) == 3
assert len(corpus.utt_wav_mapping.keys()) == 1
assert len(corpus.segments.keys()) == 3
assert len(corpus.ignored_utterances) == 1
print(corpus.segments)
print(corpus.ignored_utterances)
assert len(corpus.ignored_utterances) == 2


def test_speaker_groupings(large_prosodylab_format_directory, temp_dir, large_dataset_dictionary, default_feature_config):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_training(sick_dict, sick_g2p_model_path, temp_dir):
trainer.validate()

trainer.train()
model = G2PModel(sick_g2p_model_path)
model = G2PModel(sick_g2p_model_path, root_directory=temp_dir)
assert model.meta['version'] == __version__
assert model.meta['architecture'] == 'pynini'
assert model.meta['phones'] == sick_dict.nonsil_phones
Expand Down

0 comments on commit f069159

Please sign in to comment.