Clean up

MontrealCorpusTools · mmcauliffe · Mar 7, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023
commit fd550ade61f60dba31c4a364d41e62080faef2cd
diff --git a/ci/mfa_publish.yml b/ci/mfa_publish.yml
diff --git a/montreal_forced_aligner/command_line/g2p.py b/montreal_forced_aligner/command_line/g2p.py
@@ -104,7 +104,10 @@ def g2p_cli(context, **kwargs) -> None:
             if per_utterance:
                 g2p.num_pronunciations = 1
     elif use_stdin:
-        g2p = PyniniConsoleGenerator(g2p_model_path=g2p_model_path)
+        g2p = PyniniConsoleGenerator(
+            g2p_model_path=g2p_model_path,
+            **PyniniWordListGenerator.parse_parameters(config_path, context.params, context.args),
+        )
     else:
         g2p = PyniniWordListGenerator(
             word_list_path=input_path,

diff --git a/montreal_forced_aligner/config.py b/montreal_forced_aligner/config.py
@@ -28,6 +28,12 @@
     "update_command_history",
     "MfaConfiguration",
     "GLOBAL_CONFIG",
+    "MFA_ROOT_ENVIRONMENT_VARIABLE",
+    "MFA_PROFILE_VARIABLE",
+    "IVECTOR_DIMENSION",
+    "XVECTOR_DIMENSION",
+    "PLDA_DIMENSION",
+    "MEMORY",
 ]
 
 MFA_ROOT_ENVIRONMENT_VARIABLE = "MFA_ROOT_DIR"

diff --git a/montreal_forced_aligner/corpus/features.py b/montreal_forced_aligner/corpus/features.py
@@ -49,6 +49,7 @@
     "plda_distance",
     "plda_log_likelihood",
     "score_plda",
+    "online_feature_proc",
     "compute_transform_process",
 ]
 
@@ -187,8 +188,6 @@ def compute_mfcc_process(
         Options for computing MFCC features
     min_length: float
         Minimum length of segments in seconds
-    no_logging: bool
-        Flag for logging progress information to log_file rather than a subprocess pipe
 
     Returns
     -------
@@ -267,14 +266,10 @@ def compute_pitch_process(
         Wav scp to use
     segments: str
         Segments scp to use
-    mfcc_options: dict[str, Any]
-        Options for computing MFCC features
     pitch_options: dict[str, Any]
         Options for computing pitch features
     min_length: float
         Minimum length of segments in seconds
-    no_logging: bool
-        Flag for logging progress information to log_file rather than a subprocess pipe
 
     Returns
     -------
@@ -484,7 +479,7 @@ def __init__(self, args: MfccArguments):
     def _run(self) -> typing.Generator[int]:
         """Run the function"""
         with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
-            job: Job = session.get(Job, self.job_name)
+            job: typing.Optional[Job] = session.get(Job, self.job_name)
             feats_scp_path = job.construct_path(self.data_directory, "feats", "scp")
             pitch_scp_path = job.construct_path(self.data_directory, "pitch", "scp")
             segments_scp_path = job.construct_path(self.data_directory, "segments", "scp")
@@ -592,7 +587,7 @@ def __init__(self, args: FinalFeatureArguments):
     def _run(self) -> typing.Generator[int]:
         """Run the function"""
         with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
-            job: Job = session.get(Job, self.job_name)
+            job: typing.Optional[Job] = session.get(Job, self.job_name)
             feats_scp_path = job.construct_path(self.data_directory, "feats", "scp")
             temp_scp_path = job.construct_path(self.data_directory, "final_features", "scp")
             utt2spk_path = job.construct_path(self.data_directory, "utt2spk", "scp")
@@ -737,7 +732,7 @@ def __init__(self, args: PitchArguments):
     def _run(self) -> typing.Generator[int]:
         """Run the function"""
         with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
-            job: Job = session.get(Job, self.job_name)
+            job: typing.Optional[Job] = session.get(Job, self.job_name)
 
             feats_scp_path = job.construct_path(self.data_directory, "pitch", "scp")
             raw_ark_path = job.construct_path(self.data_directory, "pitch", "ark")
@@ -805,7 +800,7 @@ def __init__(self, args: PitchRangeArguments):
     def _run(self) -> typing.Generator[int]:
         """Run the function"""
         with Session(self.db_engine()) as session, mfa_open(self.log_path, "w") as log_file:
-            job: Job = session.get(Job, self.job_name)
+            job: typing.Optional[Job] = session.get(Job, self.job_name)
             wav_path = job.construct_path(self.data_directory, "wav", "scp")
             segment_path = job.construct_path(self.data_directory, "segments", "scp")
             min_length = 0.1
@@ -1248,22 +1243,19 @@ def model_path(self) -> str:  # needed for fmllr
         raise NotImplementedError
 
     @property
-    @abstractmethod
     def working_directory(self) -> Path:
         """Abstract method for working directory"""
-        ...
+        raise NotImplementedError
 
     @property
-    @abstractmethod
     def corpus_output_directory(self) -> str:
         """Abstract method for working directory of corpus"""
-        ...
+        raise NotImplementedError
 
     @property
-    @abstractmethod
     def data_directory(self) -> str:
         """Abstract method for corpus data directory"""
-        ...
+        raise NotImplementedError
 
     @property
     def feature_options(self) -> MetaDict:
@@ -1302,10 +1294,9 @@ def feature_options(self) -> MetaDict:
         }
         return options
 
-    @abstractmethod
     def calc_fmllr(self) -> None:
         """Abstract method for calculating fMLLR transforms"""
-        ...
+        raise NotImplementedError
 
     @property
     def fmllr_options(self) -> MetaDict:
@@ -1665,10 +1656,8 @@ def plda_distance_matrix(
         Ivectors to compare test ivectors against against 1 X N X D
     test_ivectors : numpy.ndarray
         Ivectors to compare against training examples 1 X M X D
-    normalize: bool
-        Flag for normalizing matrix by the maximum value
-    distance: bool
-        Flag for converting PLDA log likelihood ratios into a distance metric
+    psi: numpy.ndarray
+        Psi matrix from PLDA model
 
     Returns
     -------
@@ -1694,14 +1683,10 @@ def pairwise_plda_distance_matrix(
 
     Parameters
     ----------
-    train_ivectors : numpy.ndarray
-        Ivectors to compare test ivectors against against 1 X N X D
-    test_ivectors : numpy.ndarray
-        Ivectors to compare against training examples 1 X M X D
-    normalize: bool
-        Flag for normalizing matrix by the maximum value
-    distance: bool
-        Flag for converting PLDA log likelihood ratios into a distance metric
+    ivectors : numpy.ndarray
+        Ivectors to compare pairwise
+    psi: numpy.ndarray
+        Psi matrix from PLDA model
 
     Returns
     -------
@@ -1773,16 +1758,41 @@ def score_plda(
         threshold = np.max(loglikes)
         loglikes -= threshold
         loglikes *= -1
-    if normalize:
-        # loglike_ratio -= np.min(loglike_ratio)
-        loglikes /= threshold
+        if normalize:
+            loglikes /= threshold
     return loglikes
 
 
 @njit
 def compute_classification_stats(
     speaker_ivectors: np.ndarray, psi: np.ndarray, counts: np.ndarray
-):
+) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Precomputes necessary stats for training ivectors to save time on classification in
+    :func:`~montreal_forced_aligner.corpus.features.classify_plda`.
+
+    Parameters
+    ----------
+    speaker_ivectors: numpy.ndarray
+        Training speaker ivectors
+    psi: numpy.ndarray
+        Psi matrix from PLDA model
+    counts: numpy.ndarray
+        Utterance counts for each speaker
+
+    Returns
+    -------
+    numpy.ndarray
+        PLDA mean vector
+    numpy.ndarray
+        Variance for given class
+    numpy.ndarray
+        Logdet for given class
+    numpy.ndarray
+        Variance for no class
+    numpy.ndarray
+        Logdet for no class
+    """
     mean = (counts.reshape(-1, 1) * psi.reshape(1, -1)) / (
         counts.reshape(-1, 1) * psi.reshape(1, -1) + 1.0
     )
@@ -1802,11 +1812,11 @@ def compute_classification_stats(
 @njit(parallel=True)
 def classify_plda(
     utterance_ivector: np.ndarray,
-    mean,
-    variance_given,
-    logdet_given,
-    variance_without,
-    logdet_without,
+    mean: np.ndarray,
+    variance_given: np.ndarray,
+    logdet_given: np.ndarray,
+    variance_without: np.ndarray,
+    logdet_without: np.ndarray,
 ) -> typing.Tuple[int, float]:
     """
     Adapted from https://github.com/prachiisc/PLDA_scoring/blob/master/PLDA_scoring.py#L177
@@ -1816,6 +1826,16 @@ def classify_plda(
     ----------
     utterance_ivector : numpy.ndarray
         Utterance ivector to compare against
+    mean: numpy.ndarray
+        From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
+    variance_given: numpy.ndarray
+        From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
+    logdet_given: numpy.ndarray
+        From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
+    variance_without: numpy.ndarray
+        From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
+    logdet_without: numpy.ndarray
+        From :func:`~montreal_forced_aligner.corpus.features.compute_classification_stats`
 
     Returns
     -------
@@ -1860,10 +1880,10 @@ def score_plda_train_counts(
         Ivectors to compare test ivectors against against 1 X N X D
     test_ivectors : numpy.ndarray
         Ivectors to compare against training examples 1 X M X D
-    normalize: bool
-        Flag for normalizing matrix by the maximum value
-    distance: bool
-        Flag for converting PLDA log likelihood ratios into a distance metric
+    psi: numpy.ndarray
+        Psi matrix from PLDA model
+    counts: numpy.ndarray
+        Utterance counts for each speaker
 
     Returns
     -------
@@ -2065,14 +2085,9 @@ def compute_pca_transform(self, ivectors: np.ndarray) -> np.ndarray:
         self.apply_transform()
         return newX
 
-    def apply_transform(self):
+    def apply_transform(self) -> None:
         """
         Adapted from https://github.com/prachiisc/PLDA_scoring/blob/master/PLDA_scoring.py#L101
-
-        Parameters
-        ----------
-        transform_in : numpy.ndarray
-           PCA transform
         """
 
         mean_plda = self.mean
@@ -2110,6 +2125,8 @@ def transform_ivectors(self, ivectors: np.ndarray, counts: np.ndarray = None) ->
         ----------
         ivectors : numpy.ndarray
            Input ivectors
+        counts : numpy.ndarray, optional
+           Utterance counts per speaker
 
         Returns
         -------
@@ -2150,6 +2167,7 @@ def transform_ivectors(self, ivectors: np.ndarray, counts: np.ndarray = None) ->
 class ExportIvectorsFunction(KaldiFunction):
     """
     Multiprocessing function to compute voice activity detection
+
     See Also
     --------
     :meth:`.AcousticCorpusMixin.compute_vad`
@@ -2158,6 +2176,7 @@ class ExportIvectorsFunction(KaldiFunction):
         Job method for generating arguments for this function
     :kaldi_src:`compute-vad`
         Relevant Kaldi binary
+
     Parameters
     ----------
     args: :class:`~montreal_forced_aligner.corpus.features.VadArguments`
@@ -2232,8 +2251,27 @@ def online_feature_proc(
     mfcc_options: MetaDict,
     pitch_options: MetaDict,
     lda_options: MetaDict,
-    log_file,
+    log_file: io.FileIO,
 ) -> subprocess.Popen:
+    """
+    Generate a subprocess Popen object that processes features for online alignment, decoding, etc.
+
+    Parameters
+    ----------
+    working_directory: :class:`~pathlib.Path`
+    wav_path: :class:`~pathlib.Path`
+    segment_path: :class:`~pathlib.Path`
+    mfcc_options: dict[str, Any]
+    pitch_options: dict[str, Any]
+    lda_options: dict[str, Any]
+    log_file: writable buffer
+
+
+    Returns
+    -------
+    subprocess.Popen
+        Process that
+    """
     mfcc_ark_path = working_directory.joinpath("mfcc.ark")
     pitch_ark_path = working_directory.joinpath("pitch.ark")
     feats_ark_path = working_directory.joinpath("feats.ark")
@@ -2301,14 +2339,14 @@ def online_feature_proc(
         mfcc_proc.stdin.write(line)
         mfcc_proc.stdin.flush()
         if use_pitch:
-            pitch_proc.stdin.write(line)
+            pitch_proc.stdin.write(line)  # noqa
             pitch_proc.stdin.flush()
     mfcc_proc.stdin.close()
     if use_pitch:
         pitch_proc.stdin.close()
     cmvn_proc.wait()
     if use_pitch:
-        pitch_copy_proc.wait()
+        pitch_copy_proc.wait()  # noqa
     if use_pitch:
         paste_proc = subprocess.Popen(
             [