Update connections to use sockets

MontrealCorpusTools · mmcauliffe · Mar 7, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023
commit ff66e90ab404b49faebf78a8f75b1e5b64f24c91
diff --git a/montreal_forced_aligner/abc.py b/montreal_forced_aligner/abc.py
@@ -236,7 +236,7 @@ def initialize_database(self) -> None:
         retcode = subprocess.call(
             [
                 "createdb",
-                f"--port={GLOBAL_CONFIG.current_profile.database_port}",
+                f"--host={GLOBAL_CONFIG.database_socket}",
                 self.identifier,
             ],
             stderr=subprocess.DEVNULL,
@@ -325,7 +325,7 @@ def current_workflow(self) -> CorpusWorkflow:
     @property
     def db_string(self):
         """Connection string for the database"""
-        return f"postgresql+psycopg2://localhost:{GLOBAL_CONFIG.current_profile.database_port}/{self.identifier}"
+        return f"postgresql+psycopg2://@/{self.identifier}?host={GLOBAL_CONFIG.database_socket}"
 
     def construct_engine(self, **kwargs) -> sqlalchemy.engine.Engine:
         """

diff --git a/montreal_forced_aligner/alignment/multiprocessing.py b/montreal_forced_aligner/alignment/multiprocessing.py
@@ -1293,10 +1293,10 @@ def _run(self) -> typing.Generator[typing.Tuple[int, float]]:
                 trans_proc = compute_transform_process(
                     log_file,
                     extract_proc,
-                    utt2spk_path,
                     workflow.lda_mat_path,
-                    fmllr_path,
                     self.lda_options,
+                    fmllr_path=fmllr_path,
+                    utt2spk_path=utt2spk_path,
                 )
                 align_proc = subprocess.Popen(
                     [

diff --git a/montreal_forced_aligner/command_line/utils.py b/montreal_forced_aligner/command_line/utils.py
@@ -232,6 +232,8 @@ def configure_pg(directory):
         "#log_min_duration_statement = -1": "log_min_duration_statement = 5000",
         "#enable_partitionwise_join = off": "enable_partitionwise_join = on",
         "#enable_partitionwise_aggregate = off": "enable_partitionwise_aggregate = on",
+        "#unix_socket_directories = ''": f"unix_socket_directories = '{GLOBAL_CONFIG.database_socket}'",
+        "#listen_addresses = 'localhost'": "listen_addresses = ''",
     }
     if not GLOBAL_CONFIG.current_profile.database_limited_mode:
         configuration_updates.update(
@@ -279,7 +281,7 @@ def check_databases(db_name=None) -> None:
     if not create:
         try:
             engine = sqlalchemy.create_engine(
-                f"postgresql+psycopg2://localhost:{GLOBAL_CONFIG.current_profile.database_port}/{db_name}",
+                f"postgresql+psycopg2://@/{db_name}?host={GLOBAL_CONFIG.database_socket}",
                 poolclass=sqlalchemy.NullPool,
                 pool_reset_on_return=None,
                 logging_name="check_databases_engine",
@@ -308,8 +310,6 @@ def check_databases(db_name=None) -> None:
                         db_directory,
                         "-l",
                         log_path,
-                        "-o",
-                        f"-F -p {GLOBAL_CONFIG.current_profile.database_port}",
                         "start",
                     ],
                     stdout=log_file,
@@ -318,8 +318,8 @@ def check_databases(db_name=None) -> None:
                 subprocess.check_call(
                     [
                         "createuser",
-                        "-p",
-                        str(GLOBAL_CONFIG.current_profile.database_port),
+                        "-h",
+                        GLOBAL_CONFIG.database_socket,
                         "-s",
                         "postgres",
                     ],
@@ -337,8 +337,6 @@ def check_databases(db_name=None) -> None:
                         db_directory,
                         "-l",
                         log_path,
-                        "-o",
-                        f"-F -p {GLOBAL_CONFIG.current_profile.database_port}",
                         "start",
                     ],
                     stdout=log_file,
@@ -357,7 +355,7 @@ def cleanup_databases(force: bool = False) -> None:
         GLOBAL_CONFIG["temporary_directory"], f"pg_mfa_{GLOBAL_CONFIG.current_profile_name}"
     )
     if force:
-        mode = "immediate"
+        mode = "fast"
     else:
         mode = "smart"
     try:

diff --git a/montreal_forced_aligner/config.py b/montreal_forced_aligner/config.py
@@ -203,6 +203,12 @@ def __getitem__(self, item):
         if hasattr(self.current_profile, item):
             return getattr(self.current_profile, item)
 
+    @property
+    def database_socket(self):
+        p = get_temporary_directory().joinpath(f"pg_mfa_{self.current_profile_name}_socket")
+        p.mkdir(parents=True, exist_ok=True)
+        return p.as_posix()
+
     @property
     def current_profile(self) -> MfaProfile:
         """Name of the current :class:`~montreal_forced_aligner.config.MfaProfile`"""

diff --git a/montreal_forced_aligner/corpus/acoustic_corpus.py b/montreal_forced_aligner/corpus/acoustic_corpus.py
@@ -712,7 +712,7 @@ def calc_cmvn(self) -> None:
                     .filter(Speaker.cmvn != None, Utterance.job_id == j.id)  # noqa
                     .distinct()
                 )
-                with mfa_open(j.construct_path(self.split_directory, "cmvn", ".scp"), "w") as f:
+                with mfa_open(j.construct_path(self.split_directory, "cmvn", "scp"), "w") as f:
                     for s_id, cmvn in query:
                         f.write(f"{s_id} {cmvn}\n")
 
@@ -775,6 +775,31 @@ def calc_fmllr(self, iteration: Optional[int] = None) -> None:
                         pbar.update(1)
 
         self.uses_speaker_adaptation = True
+        update_mapping = []
+        if not GLOBAL_CONFIG.current_profile.single_speaker:
+            for args in arguments:
+                for p in args.trans_paths.values():
+                    ark_p = self.split_directory.joinpath(p.name)
+                    scp_p = ark_p.with_suffix(".scp")
+                    compose_proc = subprocess.Popen(
+                        [
+                            thirdparty_binary("copy-matrix"),
+                            f"ark:{p}",
+                            f"ark,scp:{ark_p},{scp_p}",
+                        ],
+                        stderr=subprocess.DEVNULL,
+                        env=os.environ,
+                    )
+                    compose_proc.communicate()
+                    with mfa_open(scp_p) as f:
+                        for line in f:
+                            line = line.strip()
+                            speaker, ark = line.split(maxsplit=1)
+                            speaker = int(speaker)
+                            update_mapping.append({"id": speaker, "fmllr": ark})
+            with self.session() as session:
+                bulk_update(session, Speaker, update_mapping)
+                session.commit()
         logger.debug(f"Fmllr calculation took {time.time() - begin:.3f} seconds")
 
     def compute_vad(self) -> None:

diff --git a/montreal_forced_aligner/corpus/features.py b/montreal_forced_aligner/corpus/features.py
@@ -363,10 +363,10 @@ def compute_pitch_process(
 def compute_transform_process(
     log_file: io.FileIO,
     feat_proc: typing.Union[subprocess.Popen, Path],
-    utt2spk_path: Path,
     lda_mat_path: typing.Optional[Path],
-    fmllr_path: typing.Optional[Path],
     lda_options: MetaDict,
+    fmllr_path: Path = None,
+    utt2spk_path: Path = None,
 ) -> subprocess.Popen:
     """
     Construct feature transformation process
@@ -377,21 +377,21 @@ def compute_transform_process(
         File for logging stderr
     feat_proc: subprocess.Popen
         Feature generation process
-    utt2spk_path: :class:`~pathlib.Path`
-        Utterance to speaker SCP file path
     lda_mat_path: :class:`~pathlib.Path`
         LDA matrix file path
-    fmllr_path: :class:`~pathlib.Path`
-        fMLLR transform file path
     lda_options: dict[str, Any]
         Options for LDA
+    fmllr_path: :class:`~pathlib.Path`, optional
+        fMLLR transform file path
+    utt2spk_path: :class:`~pathlib.Path`, optional
+        Utterance to speaker SCP file path
 
     Returns
     -------
     subprocess.Popen
         Processing for transforming features
     """
-    if isinstance(feat_proc, str):
+    if isinstance(feat_proc, (str, Path)):
         feat_input = f"ark,s,cs:{feat_proc}"
         use_stdin = False
     else:
@@ -426,14 +426,17 @@ def compute_transform_process(
             stdout=subprocess.PIPE,
             stderr=log_file,
         )
-    if fmllr_path is None:
+    if fmllr_path is None or not fmllr_path.exists():
         return delta_proc
-
+    if fmllr_path.suffix == ".scp":
+        fmllr_ark = f"scp:{fmllr_path}"
+    else:
+        fmllr_ark = f"ark:{fmllr_path}"
     fmllr_proc = subprocess.Popen(
         [
             "transform-feats",
             f"--utt2spk=ark:{utt2spk_path}",
-            f"ark:{fmllr_path}",
+            fmllr_ark,
             "ark,s,cs:-",
             "ark,t:-",
         ],
@@ -1318,6 +1321,8 @@ def fmllr_options(self) -> MetaDict:
     @property
     def lda_options(self) -> MetaDict:
         """Options for computing LDA"""
+        if getattr(self, "acoustic_model", None) is not None:
+            return self.acoustic_model.lda_options
         return {
             "splice_left_context": self.splice_left_context,
             "splice_right_context": self.splice_right_context,
@@ -1326,6 +1331,8 @@ def lda_options(self) -> MetaDict:
     @property
     def mfcc_options(self) -> MetaDict:
         """Parameters to use in computing MFCC features."""
+        if getattr(self, "acoustic_model", None) is not None:
+            return self.acoustic_model.mfcc_options
         return {
             "use-energy": self.use_energy,
             "dither": self.dither,
@@ -1347,6 +1354,8 @@ def mfcc_options(self) -> MetaDict:
     @property
     def pitch_options(self) -> MetaDict:
         """Parameters to use in computing MFCC features."""
+        if getattr(self, "acoustic_model", None) is not None:
+            return self.acoustic_model.pitch_options
         return {
             "use-pitch": self.use_pitch,
             "use-voicing": self.use_voicing,
@@ -2214,3 +2223,114 @@ def _run(self) -> typing.Generator[typing.Tuple[int, int, int]]:
                     utt_id, ark_path = line.split(maxsplit=1)
                     utt_id = int(utt_id.split("-")[1])
                     yield utt_id, ark_path
+
+
+def online_feature_proc(
+    working_directory: Path,
+    wav_path: Path,
+    segment_path: Path,
+    mfcc_options: MetaDict,
+    pitch_options: MetaDict,
+    lda_options: MetaDict,
+    log_file,
+) -> subprocess.Popen:
+    mfcc_ark_path = working_directory.joinpath("mfcc.ark")
+    pitch_ark_path = working_directory.joinpath("pitch.ark")
+    feats_ark_path = working_directory.joinpath("feats.ark")
+    lda_mat_path = working_directory.joinpath("lda.mat")
+    trans_scp_path = working_directory.joinpath("trans.scp")
+    cmvn_scp_path = working_directory.joinpath("cmvn.scp")
+    utt2spk_scp_path = working_directory.joinpath("utt2spk.scp")
+    seg_proc = subprocess.Popen(
+        [
+            thirdparty_binary("extract-segments"),
+            "--min-segment-length=0.1",
+            f"scp:{wav_path}",
+            segment_path,
+            "ark:-",
+        ],
+        stdout=subprocess.PIPE,
+        stderr=log_file,
+        env=os.environ,
+    )
+    mfcc_proc = compute_mfcc_process(log_file, wav_path, subprocess.PIPE, mfcc_options)
+    if cmvn_scp_path.exists():
+        cmvn_proc = subprocess.Popen(
+            [
+                thirdparty_binary("apply-cmvn"),
+                f"--utt2spk=ark:{utt2spk_scp_path}",
+                f"scp:{cmvn_scp_path}",
+                "ark:-",
+                f"ark:{mfcc_ark_path}",
+            ],
+            stdin=mfcc_proc.stdout,
+            stderr=log_file,
+            env=os.environ,
+        )
+
+    else:
+        cmvn_proc = subprocess.Popen(
+            [
+                "apply-cmvn-sliding",
+                "--norm-vars=false",
+                "--center=true",
+                "--cmn-window=300",
+                "ark:-",
+                f"ark:{mfcc_ark_path}",
+            ],
+            env=os.environ,
+            stdin=mfcc_proc.stdout,
+            stderr=log_file,
+        )
+
+    use_pitch = pitch_options["use-pitch"] or pitch_options["use-voicing"]
+    if use_pitch:
+        pitch_proc = compute_pitch_process(log_file, wav_path, subprocess.PIPE, pitch_options)
+        pitch_copy_proc = subprocess.Popen(
+            [
+                thirdparty_binary("copy-feats"),
+                "--compress=true",
+                "ark:-",
+                f"ark:{pitch_ark_path}",
+            ],
+            stdin=pitch_proc.stdout,
+            stderr=log_file,
+            env=os.environ,
+        )
+    for line in seg_proc.stdout:
+        mfcc_proc.stdin.write(line)
+        mfcc_proc.stdin.flush()
+        if use_pitch:
+            pitch_proc.stdin.write(line)
+            pitch_proc.stdin.flush()
+    mfcc_proc.stdin.close()
+    if use_pitch:
+        pitch_proc.stdin.close()
+    cmvn_proc.wait()
+    if use_pitch:
+        pitch_copy_proc.wait()
+    if use_pitch:
+        paste_proc = subprocess.Popen(
+            [
+                thirdparty_binary("paste-feats"),
+                "--length-tolerance=2",
+                f"ark:{mfcc_ark_path}",
+                f"ark:{pitch_ark_path}",
+                f"ark:{feats_ark_path}",
+            ],
+            stderr=log_file,
+            env=os.environ,
+        )
+        paste_proc.wait()
+    else:
+        feats_ark_path = mfcc_ark_path
+
+    trans_proc = compute_transform_process(
+        log_file,
+        feats_ark_path,
+        lda_mat_path,
+        lda_options,
+        fmllr_path=trans_scp_path,
+        utt2spk_path=utt2spk_scp_path,
+    )
+    return trans_proc
diff --git a/montreal_forced_aligner/corpus/multiprocessing.py b/montreal_forced_aligner/corpus/multiprocessing.py
@@ -547,6 +547,7 @@ def output_for_features(self, session: Session) -> None:
         )
         wav_scp_path = job.wav_scp_path
         segments_scp_path = job.segments_scp_path
+        utt2spk_scp_path = job.utt2spk_scp_path
         if os.path.exists(segments_scp_path):
             return
         with mfa_open(wav_scp_path, "w") as wav_file:
@@ -564,20 +565,24 @@ def output_for_features(self, session: Session) -> None:
                 wav_file.write(f"{f_id} {sox_string}\n")
                 yield 1
 
-        with mfa_open(segments_scp_path, "w") as segments_file:
+        with mfa_open(segments_scp_path, "w") as segments_file, mfa_open(
+            utt2spk_scp_path, "w"
+        ) as utt2spk_file:
             utterances = (
                 session.query(
                     Utterance.kaldi_id,
                     Utterance.file_id,
+                    Utterance.speaker_id,
                     Utterance.begin,
                     Utterance.end,
                     Utterance.channel,
                 )
                 .filter(Utterance.job_id == job.id)
                 .order_by(Utterance.kaldi_id)
             )
-            for u_id, f_id, begin, end, channel in utterances:
+            for u_id, f_id, s_id, begin, end, channel in utterances:
                 segments_file.write(f"{u_id} {f_id} {begin} {end} {channel}\n")
+                utt2spk_file.write(f"{u_id} {s_id}\n")
                 yield 1
 
     def output_to_directory(self, session) -> None:

diff --git a/montreal_forced_aligner/data.py b/montreal_forced_aligner/data.py
@@ -245,6 +245,7 @@ class WordType(enum.Enum):
     noise = 8  #: Words that represent non-speech noise
     music = 9  #: Words that represent music
     disambiguation = 10  #: Disambiguation symbols internal to Kaldi
+    interjection = 11  #: Set of words that can be added on the fly to transcripts
 
 
 class DistanceMetric(enum.Enum):