Skip to content

Commit

Permalink
2.1.2 changes (MontrealCorpusTools#549)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe authored Feb 7, 2023
1 parent 6c589c5 commit 5e09c64
Show file tree
Hide file tree
Showing 22 changed files with 61 additions and 41 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
- name: Set cache date
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV

- uses: actions/cache@v2
- uses: actions/cache@v3
with:
path: ${{ matrix.prefix }}
key: ${{ matrix.label }}-conda-${{ hashFiles('environment.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
Expand All @@ -74,3 +74,9 @@ jobs:
- name: Run tests
shell: bash -l {0}
run: pytest -x ./tests

- name: "Upload coverage to Codecov"
uses: "codecov/codecov-action@v3"
with:
file: ./coverage.xml
fail_ci_if_error: false
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- name: Set cache date
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV

- uses: actions/cache@v2
- uses: actions/cache@v3
with:
path: /usr/share/miniconda3/envs/my-env
key: linux-64-conda-${{ hashFiles('environment.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
Expand Down
13 changes: 13 additions & 0 deletions docs/source/changelog/changelog_2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@
2.1 Changelog
*************

2.1.2
=====

- Fixed a crash in training when the debug flag was not set
- Set default postgresql port to 5433 to avoid conflicts with any system installations
- Fixed a crash in textgrid export

2.1.1
=====

- Fixed a bug with `mfa` command not working from the command line
- Updated to be compatible with PraatIO 6.0

2.1.0
=====

Expand Down
4 changes: 2 additions & 2 deletions montreal_forced_aligner/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def run(self) -> typing.Generator:
exc_type, exc_value, exc_traceback = sys.exc_info()
error_text = "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback))
raise MultiprocessingError(self.job_name, error_text)
finally:
self.db_engine.dispose()

def _run(self) -> None:
"""Internal logic for running the worker"""
Expand Down Expand Up @@ -649,6 +647,8 @@ def cleanup(self) -> None:

def save_worker_config(self) -> None:
"""Export worker configuration to its working directory"""
if not os.path.exists(self.output_directory):
return
with mfa_open(self.worker_config_path, "w") as f:
yaml.dump(self.configuration, f)

Expand Down
3 changes: 1 addition & 2 deletions montreal_forced_aligner/alignment/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,13 +816,12 @@ def collect_alignments(self) -> None:
phone_buf.truncate(0)
phone_buf.seek(0)
conn.commit()
cursor.close()
conn.close()
with self.session() as session:
if new_words:
session.execute(sqlalchemy.insert(Word).values(new_words))
session.commit()

with self.session() as session:
workflow = (
session.query(CorpusWorkflow)
.filter(CorpusWorkflow.current == True) # noqa
Expand Down
4 changes: 0 additions & 4 deletions montreal_forced_aligner/alignment/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,10 +510,6 @@ def align_utterances(self, training=False) -> None:
workflow.time_stamp = datetime.datetime.now()
workflow.score = log_like_sum / log_like_count
session.commit()
if not GLOBAL_CONFIG.debug:
for file in os.listdir(self.working_directory):
if any(file.startswith(x) for x in ["fsts."]):
os.remove(os.path.join(self.working_directory, file))
logger.debug(f"Alignment round took {time.time() - begin:.3f} seconds")

def compile_information(self) -> None:
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/alignment/multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import numpy as np
import pynini
import pywrapfst
import sqlalchemy
from sqlalchemy.orm import Session, joinedload, selectinload, subqueryload

from montreal_forced_aligner.corpus.features import (
Expand Down Expand Up @@ -2380,7 +2381,8 @@ def __init__(

def run(self) -> None:
"""Run the exporter function"""
with mfa_open(self.log_path, "w") as log_file, Session(self.db_engine) as session:
db_engine = sqlalchemy.create_engine(self.db_string)
with mfa_open(self.log_path, "w") as log_file, Session(db_engine) as session:
workflow: CorpusWorkflow = (
session.query(CorpusWorkflow)
.filter(CorpusWorkflow.current == True) # noqa
Expand Down
18 changes: 12 additions & 6 deletions montreal_forced_aligner/command_line/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,6 @@
"silences and recombines compound words and clitics.",
default=None,
)
@click.option(
"--enable_detect_phone_set/--disable_detect_phone_set",
"detect_phone_set",
help="Turn on/off automatic detection of phone sets during training.",
default=None,
)
@click.option(
"--enable_terminal_colors/--disable_terminal_colors",
"terminal_colors",
Expand All @@ -108,6 +102,18 @@
help="Port for postgresql database.",
type=int,
)
@click.option(
"--bytes_limit",
default=None,
help="Bytes limit for Joblib Memory caching on disk.",
type=int,
)
@click.option(
"--seed",
default=None,
help="Random seed to set for various pseudorandom processes.",
type=int,
)
@click.help_option("-h", "--help")
def configure_cli(**kwargs) -> None:
"""
Expand Down
1 change: 0 additions & 1 deletion montreal_forced_aligner/command_line/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ def check_databases(db_name=None) -> None:
)
conn = engine.connect()
conn.close()
engine.dispose()
return
except Exception:
pass
Expand Down
3 changes: 1 addition & 2 deletions montreal_forced_aligner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,8 @@ class MfaProfile:
overwrite: bool = False
terminal_colors: bool = True
cleanup_textgrids: bool = True
detect_phone_set: bool = False
database_backend: str = "psycopg2"
database_port: int = 5432
database_port: int = 5433
bytes_limit: int = 100e6
seed: int = 0
num_jobs: int = 3
Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/corpus/acoustic_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ def _load_corpus_from_source_mp(self) -> None:

if "error" in error_dict:
session.rollback()
raise error_dict["error"][1]
raise error_dict["error"]
self._finalize_load(session, import_data)
for k in ["sound_file_errors", "decode_error_files", "textgrid_read_errors"]:
if hasattr(self, k):
Expand Down
12 changes: 6 additions & 6 deletions montreal_forced_aligner/corpus/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@ def process_ivectors(self, ivectors: np.ndarray, counts: np.ndarray = None) -> n
numpy.ndarray
Transformed ivectors
"""
ivectors = self.preprocess_ivectors(ivectors)
# ivectors = self.preprocess_ivectors(ivectors)
# ivectors = self.compute_pca_transform(ivectors)
ivectors = self.transform_ivectors(ivectors, counts=counts)
return ivectors
Expand All @@ -2005,7 +2005,6 @@ def preprocess_ivectors(self, ivectors: np.ndarray) -> np.ndarray:
numpy.ndarray
Preprocessed ivectors
"""
print(ivectors.shape)
ivectors = ivectors.T # DX N
dim = ivectors.shape[1]
# preprocessing
Expand Down Expand Up @@ -2123,16 +2122,18 @@ def transform_ivectors(self, ivectors: np.ndarray, counts: np.ndarray = None) ->
# Defaults : normalize_length(true), simple_length_norm(false)
X_new_sq = X_new**2

Dim = D.shape[0]
if counts is not None:
dot_prod = np.zeros((X_new.shape[0], 1))
for i in range(dot_prod.shape[0]):
inv_covar = self.psi + (1.0 / counts[i])
inv_covar = 1.0 / inv_covar
dot_prod[i] = np.dot(X_new_sq[i], inv_covar)
normfactor = np.sqrt(Dim / dot_prod)
else:
normfactor = np.sqrt(Dim) / np.sqrt(np.sum(X_new_sq))
inv_covar = (1.0 / (1.0 + self.psi)).reshape(-1, 1)
dot_prod = X_new_sq @ inv_covar # N X 1
Dim = D.shape[0]
normfactor = np.sqrt(Dim / dot_prod)

X_new = X_new * normfactor

return X_new
Expand Down Expand Up @@ -2208,4 +2209,3 @@ def _run(self) -> typing.Generator[typing.Tuple[int, int, int]]:
utt_id, ark_path = line.split(maxsplit=1)
utt_id = int(utt_id.split("-")[1])
yield utt_id, ark_path
engine.dispose()
7 changes: 5 additions & 2 deletions montreal_forced_aligner/corpus/ivector_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,11 @@ def collect_speaker_ivectors(self) -> None:
utt_count = int(utt_count)
utterance_counts[int(speaker)] = utt_count
copy_proc = subprocess.Popen(
[thirdparty_binary("copy-vector"), f"ark:{speaker_ivector_ark_path}", "ark,t:-"],
[
thirdparty_binary("ivector-subtract-global-mean"),
f"ark:{speaker_ivector_ark_path}",
"ark,t:-",
],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
env=os.environ,
Expand All @@ -406,7 +410,6 @@ def collect_speaker_ivectors(self) -> None:
ivectors = np.array(ivectors)
if len(ivectors.shape) < 2:
ivectors = ivectors[np.newaxis, :]
print(ivectors.shape)
speaker_counts = np.array(speaker_counts)
ivectors = self.plda.process_ivectors(ivectors, counts=speaker_counts)
for i, speaker_id in enumerate(speaker_ids):
Expand Down
5 changes: 1 addition & 4 deletions montreal_forced_aligner/diarization/multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def _run(self) -> typing.Generator[typing.Tuple[int, int, int]]:
utterance_counts[int(speaker)] = utt_count
input_proc = subprocess.Popen(
[
thirdparty_binary("copy-vector"),
thirdparty_binary("ivector-subtract-global-mean"),
f"ark:{self.train_ivector_path}",
"ark,t:-",
],
Expand Down Expand Up @@ -537,8 +537,6 @@ def _run(self) -> typing.Generator[typing.Tuple[int, int, int]]:
for line in input_proc.stdout:
lines.append(line)
input_proc.wait()
for line in input_proc.stdout:
lines.append(line)
with Session(self.db_engine) as session:

job: Job = (
Expand Down Expand Up @@ -854,5 +852,4 @@ def run(self) -> None:
except Exception as e:
self.return_q.put(e)
finally:
db_engine.dispose()
self.finished_adding.stop()
3 changes: 0 additions & 3 deletions montreal_forced_aligner/g2p/phonetisaurus_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@ def run(self) -> None:
self.stopped.stop()
self.return_queue.put(e)
finally:
engine.dispose()
self.finished.stop()
del far_writer

Expand Down Expand Up @@ -368,7 +367,6 @@ def run(self) -> None:
)
for symbol, sym_id in query:
symbol_mapper[symbol_table.find(symbol)] = sym_id
engine.dispose()
while not far_reader.done():
if self.stopped.stop_check():
break
Expand Down Expand Up @@ -496,7 +494,6 @@ def run(self) -> None:
self.return_queue.put(e)
raise
finally:
engine.dispose()
if count >= 1:
self.return_queue.put(count)
self.finished.stop()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_commandline_adapt.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_adapt_multilingual(
basic_align_config_path,
"-q",
"--clean",
"--debug",
"--no_debug",
]
result = click.testing.CliRunner(mix_stderr=False, echo_stdin=True).invoke(
mfa_cli, command, catch_exceptions=True
Expand Down
2 changes: 1 addition & 1 deletion tests/test_commandline_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_align_duplicated(
basic_align_config_path,
"-q",
"--clean",
"--debug",
"--no_debug",
]
result = click.testing.CliRunner(mix_stderr=False, echo_stdin=True).invoke(
mfa_cli, command, catch_exceptions=True
Expand Down
2 changes: 1 addition & 1 deletion tests/test_commandline_create_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_create_segments_speechbrain(
os.path.join(temp_dir, "sad_cli_speechbrain"),
"-q",
"--clean",
"--debug",
"--no_debug",
"-v",
"--speechbrain",
"--config_path",
Expand Down
2 changes: 2 additions & 0 deletions tests/test_commandline_diarize_speakers.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def test_cluster_speechbrain(
"3",
"--clean",
"--no_use_pca",
"--no_debug",
"--evaluate",
]
result = click.testing.CliRunner(mix_stderr=False, echo_stdin=True).invoke(
Expand Down Expand Up @@ -132,6 +133,7 @@ def test_classify_speechbrain(
os.path.join(temp_dir, "diarize_cli"),
"--classify",
"--clean",
"--no_debug",
"--evaluate",
]
result = click.testing.CliRunner(mix_stderr=False, echo_stdin=True).invoke(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_commandline_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_train_and_align_basic_speaker_dict(
basic_train_config_path,
"-q",
"--clean",
"--debug",
"--no_debug",
"--output_directory",
output_directory,
"--single_speaker",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_commandline_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_transcribe_arpa(
os.path.join(temp_dir, "transcribe_cli"),
"-q",
"--clean",
"--debug",
"--no_debug",
"-v",
"--use_mp",
"false",
Expand Down
1 change: 1 addition & 0 deletions tests/test_commandline_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def test_validate_training_corpus(
os.path.join(temp_dir, "validation"),
"-q",
"--clean",
"--no_debug",
"--config_path",
mono_train_config_path,
"--test_transcriptions",
Expand Down

0 comments on commit 5e09c64

Please sign in to comment.