Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.2.4 #576

Merged
merged 42 commits into from
Mar 7, 2023
Merged

2.2.4 #576

Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
2780f0f
Fix error in some common voice japanese file paths
mmcauliffe Feb 17, 2023
22ca4ab
Add extra validation to textgrid interval start/end timestamps
mmcauliffe Feb 17, 2023
059daf1
Ensure GITHUB_TOKEN is available for tests
mmcauliffe Feb 17, 2023
e0b7490
Try to fix failing test runner
mmcauliffe Feb 18, 2023
a277966
Fix error in processing opus files
mmcauliffe Feb 18, 2023
f80b0f7
Remove GITHUB_TOKEN env variable
mmcauliffe Feb 18, 2023
bddb49b
Fix for logging disappearing after API runs
mmcauliffe Feb 18, 2023
7074ade
Attempt to fix CI
mmcauliffe Feb 18, 2023
ac241e7
Fix environment yaml
mmcauliffe Feb 18, 2023
55cdd7f
Fix mamba install
mmcauliffe Feb 18, 2023
9f0d517
Add clean up for initial tests?
mmcauliffe Feb 18, 2023
36046fe
Fix failing test
mmcauliffe Feb 18, 2023
ea60694
Add back in github token
mmcauliffe Feb 18, 2023
b178c0d
Attempt to fix test runner
mmcauliffe Feb 19, 2023
09454d9
remove test timing out
mmcauliffe Feb 19, 2023
8eca112
attempt to fix tests
mmcauliffe Feb 20, 2023
69ee10f
Attempt to fix github actions
mmcauliffe Feb 23, 2023
ff66e90
Update connections to use sockets
mmcauliffe Mar 2, 2023
38e3cd9
Switch over to using a dedicated server command
mmcauliffe Mar 2, 2023
8849a9d
Fix initialize bug on linux
mmcauliffe Mar 2, 2023
c9715e7
Add log output for database initialization
mmcauliffe Mar 2, 2023
0216fed
Config debugging on CI
mmcauliffe Mar 2, 2023
36ee223
Remove github token
mmcauliffe Mar 2, 2023
e8e70b8
Fix typo
mmcauliffe Mar 2, 2023
c2c71e6
Attempt to fix CI
mmcauliffe Mar 2, 2023
5d781eb
Another attempt
mmcauliffe Mar 2, 2023
8087aea
Another attempt
mmcauliffe Mar 3, 2023
fdc4f89
Fix wrong error message on start up
mmcauliffe Mar 3, 2023
13f7d6d
Another attempt
mmcauliffe Mar 3, 2023
d16ada2
Ensure sessions closing
mmcauliffe Mar 3, 2023
bb7efcb
Add support for piping to mfa g2p
mmcauliffe Mar 5, 2023
d6bd311
Add more debugging information for CI failures
mmcauliffe Mar 5, 2023
3a96687
Add timeout to tests
mmcauliffe Mar 5, 2023
f5998ec
Increase test timeout
mmcauliffe Mar 5, 2023
9026b40
reduce timeout
mmcauliffe Mar 5, 2023
808b013
reduce timout
mmcauliffe Mar 6, 2023
a11746d
timeout on the failing test
mmcauliffe Mar 6, 2023
8525d4d
Add timeouts to all alignment tests
mmcauliffe Mar 6, 2023
003443f
Remove sqlalchemy debug output
mmcauliffe Mar 6, 2023
10d023f
add back in github token
mmcauliffe Mar 6, 2023
e929572
Add support for per-utterance G2P
mmcauliffe Mar 6, 2023
fd550ad
Clean up
mmcauliffe Mar 7, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Clean up
  • Loading branch information
mmcauliffe committed Mar 7, 2023
commit fd550ade61f60dba31c4a364d41e62080faef2cd
26 changes: 0 additions & 26 deletions ci/mfa_publish.yml

This file was deleted.

5 changes: 4 additions & 1 deletion montreal_forced_aligner/command_line/g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ def g2p_cli(context, **kwargs) -> None:
if per_utterance:
g2p.num_pronunciations = 1
elif use_stdin:
g2p = PyniniConsoleGenerator(g2p_model_path=g2p_model_path)
g2p = PyniniConsoleGenerator(
g2p_model_path=g2p_model_path,
**PyniniWordListGenerator.parse_parameters(config_path, context.params, context.args),
)
else:
g2p = PyniniWordListGenerator(
word_list_path=input_path,
Expand Down
6 changes: 6 additions & 0 deletions montreal_forced_aligner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@
"update_command_history",
"MfaConfiguration",
"GLOBAL_CONFIG",
"MFA_ROOT_ENVIRONMENT_VARIABLE",
"MFA_PROFILE_VARIABLE",
"IVECTOR_DIMENSION",
"XVECTOR_DIMENSION",
"PLDA_DIMENSION",
"MEMORY",
]

MFA_ROOT_ENVIRONMENT_VARIABLE = "MFA_ROOT_DIR"
Expand Down
142 changes: 90 additions & 52 deletions montreal_forced_aligner/corpus/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"plda_distance",
"plda_log_likelihood",
"score_plda",
"online_feature_proc",
"compute_transform_process",
]

Expand Down Expand Up @@ -187,8 +188,6 @@ def compute_mfcc_process(
Options for computing MFCC features
min_length: float
Minimum length of segments in seconds
no_logging: bool
Flag for logging progress information to log_file rather than a subprocess pipe

Returns
-------
Expand Down Expand Up @@ -267,14 +266,10 @@ def compute_pitch_process(
Wav scp to use
segments: str
Segments scp to use
mfcc_options: dict[str, Any]
Options for computing MFCC features
pitch_options: dict[str, Any]
Options for computing pitch features
min_length: float
Minimum length of segments in seconds
no_logging: bool
Flag for logging progress information to log_file rather than a subprocess pipe

Returns
-------
Expand Down Expand Up @@ -484,7 +479,7 @@ def __init__(self, args: MfccArguments):
def _run(self) -> typing.Generator[int]:
"""Run the function"""
with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
job: Job = session.get(Job, self.job_name)
job: typing.Optional[Job] = session.get(Job, self.job_name)
feats_scp_path = job.construct_path(self.data_directory, "feats", "scp")
pitch_scp_path = job.construct_path(self.data_directory, "pitch", "scp")
segments_scp_path = job.construct_path(self.data_directory, "segments", "scp")
Expand Down Expand Up @@ -592,7 +587,7 @@ def __init__(self, args: FinalFeatureArguments):
def _run(self) -> typing.Generator[int]:
"""Run the function"""
with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
job: Job = session.get(Job, self.job_name)
job: typing.Optional[Job] = session.get(Job, self.job_name)
feats_scp_path = job.construct_path(self.data_directory, "feats", "scp")
temp_scp_path = job.construct_path(self.data_directory, "final_features", "scp")
utt2spk_path = job.construct_path(self.data_directory, "utt2spk", "scp")
Expand Down Expand Up @@ -737,7 +732,7 @@ def __init__(self, args: PitchArguments):
def _run(self) -> typing.Generator[int]:
"""Run the function"""
with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
job: Job = session.get(Job, self.job_name)
job: typing.Optional[Job] = session.get(Job, self.job_name)

feats_scp_path = job.construct_path(self.data_directory, "pitch", "scp")
raw_ark_path = job.construct_path(self.data_directory, "pitch", "ark")
Expand Down Expand Up @@ -805,7 +800,7 @@ def __init__(self, args: PitchRangeArguments):
def _run(self) -> typing.Generator[int]:
"""Run the function"""
with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
job: Job = session.get(Job, self.job_name)
job: typing.Optional[Job] = session.get(Job, self.job_name)
wav_path = job.construct_path(self.data_directory, "wav", "scp")
segment_path = job.construct_path(self.data_directory, "segments", "scp")
min_length = 0.1
Expand Down Expand Up @@ -1248,22 +1243,19 @@ def model_path(self) -> str: # needed for fmllr
raise NotImplementedError

@property
@abstractmethod
def working_directory(self) -> Path:
"""Abstract method for working directory"""
...
raise NotImplementedError

@property
@abstractmethod
def corpus_output_directory(self) -> str:
"""Abstract method for working directory of corpus"""
...
raise NotImplementedError

@property
@abstractmethod
def data_directory(self) -> str:
"""Abstract method for corpus data directory"""
...
raise NotImplementedError

@property
def feature_options(self) -> MetaDict:
Expand Down Expand Up @@ -1302,10 +1294,9 @@ def feature_options(self) -> MetaDict:
}
return options

@abstractmethod
def calc_fmllr(self) -> None:
"""Abstract method for calculating fMLLR transforms"""
...
raise NotImplementedError

@property
def fmllr_options(self) -> MetaDict:
Expand Down Expand Up @@ -1665,10 +1656,8 @@ def plda_distance_matrix(
Ivectors to compare test ivectors against against 1 X N X D
test_ivectors : numpy.ndarray
Ivectors to compare against training examples 1 X M X D
normalize: bool
Flag for normalizing matrix by the maximum value
distance: bool
Flag for converting PLDA log likelihood ratios into a distance metric
psi: numpy.ndarray
Psi matrix from PLDA model

Returns
-------
Expand All @@ -1694,14 +1683,10 @@ def pairwise_plda_distance_matrix(

Parameters
----------
train_ivectors : numpy.ndarray
Ivectors to compare test ivectors against against 1 X N X D
test_ivectors : numpy.ndarray
Ivectors to compare against training examples 1 X M X D
normalize: bool
Flag for normalizing matrix by the maximum value
distance: bool
Flag for converting PLDA log likelihood ratios into a distance metric
ivectors : numpy.ndarray
Ivectors to compare pairwise
psi: numpy.ndarray
Psi matrix from PLDA model

Returns
-------
Expand Down Expand Up @@ -1773,16 +1758,41 @@ def score_plda(
threshold = np.max(loglikes)
loglikes -= threshold
loglikes *= -1
if normalize:
# loglike_ratio -= np.min(loglike_ratio)
loglikes /= threshold
if normalize:
loglikes /= threshold
return loglikes


@njit
def compute_classification_stats(
speaker_ivectors: np.ndarray, psi: np.ndarray, counts: np.ndarray
):
) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""
Precomputes necessary stats for training ivectors to save time on classification in
:func:`~montreal_forced_aligner.corpus.features.classify_plda`.

Parameters
----------
speaker_ivectors: numpy.ndarray
Training speaker ivectors
psi: numpy.ndarray
Psi matrix from PLDA model
counts: numpy.ndarray
Utterance counts for each speaker

Returns
-------
numpy.ndarray
PLDA mean vector
numpy.ndarray
Variance for given class
numpy.ndarray
Logdet for given class
numpy.ndarray
Variance for no class
numpy.ndarray
Logdet for no class
"""
mean = (counts.reshape(-1, 1) * psi.reshape(1, -1)) / (
counts.reshape(-1, 1) * psi.reshape(1, -1) + 1.0
)
Expand All @@ -1802,11 +1812,11 @@ def compute_classification_stats(
@njit(parallel=True)
def classify_plda(
utterance_ivector: np.ndarray,
mean,
variance_given,
logdet_given,
variance_without,
logdet_without,
mean: np.ndarray,
variance_given: np.ndarray,
logdet_given: np.ndarray,
variance_without: np.ndarray,
logdet_without: np.ndarray,
) -> typing.Tuple[int, float]:
"""
Adapted from https://github.com/prachiisc/PLDA_scoring/blob/master/PLDA_scoring.py#L177
Expand All @@ -1816,6 +1826,16 @@ def classify_plda(
----------
utterance_ivector : numpy.ndarray
Utterance ivector to compare against
mean: numpy.ndarray
From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
variance_given: numpy.ndarray
From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
logdet_given: numpy.ndarray
From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
variance_without: numpy.ndarray
From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
logdet_without: numpy.ndarray
From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`

Returns
-------
Expand Down Expand Up @@ -1860,10 +1880,10 @@ def score_plda_train_counts(
Ivectors to compare test ivectors against against 1 X N X D
test_ivectors : numpy.ndarray
Ivectors to compare against training examples 1 X M X D
normalize: bool
Flag for normalizing matrix by the maximum value
distance: bool
Flag for converting PLDA log likelihood ratios into a distance metric
psi: numpy.ndarray
Psi matrix from PLDA model
counts: numpy.ndarray
Utterance counts for each speaker

Returns
-------
Expand Down Expand Up @@ -2065,14 +2085,9 @@ def compute_pca_transform(self, ivectors: np.ndarray) -> np.ndarray:
self.apply_transform()
return newX

def apply_transform(self):
def apply_transform(self) -> None:
"""
Adapted from https://github.com/prachiisc/PLDA_scoring/blob/master/PLDA_scoring.py#L101

Parameters
----------
transform_in : numpy.ndarray
PCA transform
"""

mean_plda = self.mean
Expand Down Expand Up @@ -2110,6 +2125,8 @@ def transform_ivectors(self, ivectors: np.ndarray, counts: np.ndarray = None) ->
----------
ivectors : numpy.ndarray
Input ivectors
counts : numpy.ndarray, optional
Utterance counts per speaker

Returns
-------
Expand Down Expand Up @@ -2150,6 +2167,7 @@ def transform_ivectors(self, ivectors: np.ndarray, counts: np.ndarray = None) ->
class ExportIvectorsFunction(KaldiFunction):
"""
Multiprocessing function to compute voice activity detection

See Also
--------
:meth:`.AcousticCorpusMixin.compute_vad`
Expand All @@ -2158,6 +2176,7 @@ class ExportIvectorsFunction(KaldiFunction):
Job method for generating arguments for this function
:kaldi_src:`compute-vad`
Relevant Kaldi binary

Parameters
----------
args: :class:`~montreal_forced_aligner.corpus.features.VadArguments`
Expand Down Expand Up @@ -2232,8 +2251,27 @@ def online_feature_proc(
mfcc_options: MetaDict,
pitch_options: MetaDict,
lda_options: MetaDict,
log_file,
log_file: io.FileIO,
) -> subprocess.Popen:
"""
Generate a subprocess Popen object that processes features for online alignment, decoding, etc.

Parameters
----------
working_directory: :class:`~pathlib.Path`
wav_path: :class:`~pathlib.Path`
segment_path: :class:`~pathlib.Path`
mfcc_options: dict[str, Any]
pitch_options: dict[str, Any]
lda_options: dict[str, Any]
log_file: writable buffer


Returns
-------
subprocess.Popen
Process that
"""
mfcc_ark_path = working_directory.joinpath("mfcc.ark")
pitch_ark_path = working_directory.joinpath("pitch.ark")
feats_ark_path = working_directory.joinpath("feats.ark")
Expand Down Expand Up @@ -2301,14 +2339,14 @@ def online_feature_proc(
mfcc_proc.stdin.write(line)
mfcc_proc.stdin.flush()
if use_pitch:
pitch_proc.stdin.write(line)
pitch_proc.stdin.write(line) # noqa
pitch_proc.stdin.flush()
mfcc_proc.stdin.close()
if use_pitch:
pitch_proc.stdin.close()
cmvn_proc.wait()
if use_pitch:
pitch_copy_proc.wait()
pitch_copy_proc.wait() # noqa
if use_pitch:
paste_proc = subprocess.Popen(
[
Expand Down
Loading