Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.2.4 #576

Merged
merged 42 commits into from
Mar 7, 2023
Merged

2.2.4 #576

Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
2780f0f
Fix error in some common voice japanese file paths
mmcauliffe Feb 17, 2023
22ca4ab
Add extra validation to textgrid interval start/end timestamps
mmcauliffe Feb 17, 2023
059daf1
Ensure GITHUB_TOKEN is available for tests
mmcauliffe Feb 17, 2023
e0b7490
Try to fix failing test runner
mmcauliffe Feb 18, 2023
a277966
Fix error in processing opus files
mmcauliffe Feb 18, 2023
f80b0f7
Remove GITHUB_TOKEN env variable
mmcauliffe Feb 18, 2023
bddb49b
Fix for logging disappearing after API runs
mmcauliffe Feb 18, 2023
7074ade
Attempt to fix CI
mmcauliffe Feb 18, 2023
ac241e7
Fix environment yaml
mmcauliffe Feb 18, 2023
55cdd7f
Fix mamba install
mmcauliffe Feb 18, 2023
9f0d517
Add clean up for initial tests?
mmcauliffe Feb 18, 2023
36046fe
Fix failing test
mmcauliffe Feb 18, 2023
ea60694
Add back in github token
mmcauliffe Feb 18, 2023
b178c0d
Attempt to fix test runner
mmcauliffe Feb 19, 2023
09454d9
remove test timing out
mmcauliffe Feb 19, 2023
8eca112
attempt to fix tests
mmcauliffe Feb 20, 2023
69ee10f
Attempt to fix github actions
mmcauliffe Feb 23, 2023
ff66e90
Update connections to use sockets
mmcauliffe Mar 2, 2023
38e3cd9
Switch over to using a dedicated server command
mmcauliffe Mar 2, 2023
8849a9d
Fix initialize bug on linux
mmcauliffe Mar 2, 2023
c9715e7
Add log output for database initialization
mmcauliffe Mar 2, 2023
0216fed
Config debugging on CI
mmcauliffe Mar 2, 2023
36ee223
Remove github token
mmcauliffe Mar 2, 2023
e8e70b8
Fix typo
mmcauliffe Mar 2, 2023
c2c71e6
Attempt to fix CI
mmcauliffe Mar 2, 2023
5d781eb
Another attempt
mmcauliffe Mar 2, 2023
8087aea
Another attempt
mmcauliffe Mar 3, 2023
fdc4f89
Fix wrong error message on start up
mmcauliffe Mar 3, 2023
13f7d6d
Another attempt
mmcauliffe Mar 3, 2023
d16ada2
Ensure sessions closing
mmcauliffe Mar 3, 2023
bb7efcb
Add support for piping to mfa g2p
mmcauliffe Mar 5, 2023
d6bd311
Add more debugging information for CI failures
mmcauliffe Mar 5, 2023
3a96687
Add timeout to tests
mmcauliffe Mar 5, 2023
f5998ec
Increase test timeout
mmcauliffe Mar 5, 2023
9026b40
reduce timeout
mmcauliffe Mar 5, 2023
808b013
reduce timout
mmcauliffe Mar 6, 2023
a11746d
timeout on the failing test
mmcauliffe Mar 6, 2023
8525d4d
Add timeouts to all alignment tests
mmcauliffe Mar 6, 2023
003443f
Remove sqlalchemy debug output
mmcauliffe Mar 6, 2023
10d023f
add back in github token
mmcauliffe Mar 6, 2023
e929572
Add support for per-utterance G2P
mmcauliffe Mar 6, 2023
fd550ad
Clean up
mmcauliffe Mar 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update connections to use sockets
  • Loading branch information
mmcauliffe committed Mar 2, 2023
commit ff66e90ab404b49faebf78a8f75b1e5b64f24c91
4 changes: 2 additions & 2 deletions montreal_forced_aligner/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def initialize_database(self) -> None:
retcode = subprocess.call(
[
"createdb",
f"--port={GLOBAL_CONFIG.current_profile.database_port}",
f"--host={GLOBAL_CONFIG.database_socket}",
self.identifier,
],
stderr=subprocess.DEVNULL,
Expand Down Expand Up @@ -325,7 +325,7 @@ def current_workflow(self) -> CorpusWorkflow:
@property
def db_string(self):
"""Connection string for the database"""
return f"postgresql+psycopg2://localhost:{GLOBAL_CONFIG.current_profile.database_port}/{self.identifier}"
return f"postgresql+psycopg2://@/{self.identifier}?host={GLOBAL_CONFIG.database_socket}"

def construct_engine(self, **kwargs) -> sqlalchemy.engine.Engine:
"""
Expand Down
4 changes: 2 additions & 2 deletions montreal_forced_aligner/alignment/multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,10 +1293,10 @@ def _run(self) -> typing.Generator[typing.Tuple[int, float]]:
trans_proc = compute_transform_process(
log_file,
extract_proc,
utt2spk_path,
workflow.lda_mat_path,
fmllr_path,
self.lda_options,
fmllr_path=fmllr_path,
utt2spk_path=utt2spk_path,
)
align_proc = subprocess.Popen(
[
Expand Down
14 changes: 6 additions & 8 deletions montreal_forced_aligner/command_line/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ def configure_pg(directory):
"#log_min_duration_statement = -1": "log_min_duration_statement = 5000",
"#enable_partitionwise_join = off": "enable_partitionwise_join = on",
"#enable_partitionwise_aggregate = off": "enable_partitionwise_aggregate = on",
"#unix_socket_directories = ''": f"unix_socket_directories = '{GLOBAL_CONFIG.database_socket}'",
"#listen_addresses = 'localhost'": "listen_addresses = ''",
}
if not GLOBAL_CONFIG.current_profile.database_limited_mode:
configuration_updates.update(
Expand Down Expand Up @@ -279,7 +281,7 @@ def check_databases(db_name=None) -> None:
if not create:
try:
engine = sqlalchemy.create_engine(
f"postgresql+psycopg2://localhost:{GLOBAL_CONFIG.current_profile.database_port}/{db_name}",
f"postgresql+psycopg2://@/{db_name}?host={GLOBAL_CONFIG.database_socket}",
poolclass=sqlalchemy.NullPool,
pool_reset_on_return=None,
logging_name="check_databases_engine",
Expand Down Expand Up @@ -308,8 +310,6 @@ def check_databases(db_name=None) -> None:
db_directory,
"-l",
log_path,
"-o",
f"-F -p {GLOBAL_CONFIG.current_profile.database_port}",
"start",
],
stdout=log_file,
Expand All @@ -318,8 +318,8 @@ def check_databases(db_name=None) -> None:
subprocess.check_call(
[
"createuser",
"-p",
str(GLOBAL_CONFIG.current_profile.database_port),
"-h",
GLOBAL_CONFIG.database_socket,
"-s",
"postgres",
],
Expand All @@ -337,8 +337,6 @@ def check_databases(db_name=None) -> None:
db_directory,
"-l",
log_path,
"-o",
f"-F -p {GLOBAL_CONFIG.current_profile.database_port}",
"start",
],
stdout=log_file,
Expand All @@ -357,7 +355,7 @@ def cleanup_databases(force: bool = False) -> None:
GLOBAL_CONFIG["temporary_directory"], f"pg_mfa_{GLOBAL_CONFIG.current_profile_name}"
)
if force:
mode = "immediate"
mode = "fast"
else:
mode = "smart"
try:
Expand Down
6 changes: 6 additions & 0 deletions montreal_forced_aligner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ def __getitem__(self, item):
if hasattr(self.current_profile, item):
return getattr(self.current_profile, item)

@property
def database_socket(self):
p = get_temporary_directory().joinpath(f"pg_mfa_{self.current_profile_name}_socket")
p.mkdir(parents=True, exist_ok=True)
return p.as_posix()

@property
def current_profile(self) -> MfaProfile:
"""Name of the current :class:`~montreal_forced_aligner.config.MfaProfile`"""
Expand Down
27 changes: 26 additions & 1 deletion montreal_forced_aligner/corpus/acoustic_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ def calc_cmvn(self) -> None:
.filter(Speaker.cmvn != None, Utterance.job_id == j.id) # noqa
.distinct()
)
with mfa_open(j.construct_path(self.split_directory, "cmvn", ".scp"), "w") as f:
with mfa_open(j.construct_path(self.split_directory, "cmvn", "scp"), "w") as f:
for s_id, cmvn in query:
f.write(f"{s_id} {cmvn}\n")

Expand Down Expand Up @@ -775,6 +775,31 @@ def calc_fmllr(self, iteration: Optional[int] = None) -> None:
pbar.update(1)

self.uses_speaker_adaptation = True
update_mapping = []
if not GLOBAL_CONFIG.current_profile.single_speaker:
for args in arguments:
for p in args.trans_paths.values():
ark_p = self.split_directory.joinpath(p.name)
scp_p = ark_p.with_suffix(".scp")
compose_proc = subprocess.Popen(
[
thirdparty_binary("copy-matrix"),
f"ark:{p}",
f"ark,scp:{ark_p},{scp_p}",
],
stderr=subprocess.DEVNULL,
env=os.environ,
)
compose_proc.communicate()
with mfa_open(scp_p) as f:
for line in f:
line = line.strip()
speaker, ark = line.split(maxsplit=1)
speaker = int(speaker)
update_mapping.append({"id": speaker, "fmllr": ark})
with self.session() as session:
bulk_update(session, Speaker, update_mapping)
session.commit()
logger.debug(f"Fmllr calculation took {time.time() - begin:.3f} seconds")

def compute_vad(self) -> None:
Expand Down
140 changes: 130 additions & 10 deletions montreal_forced_aligner/corpus/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,10 @@ def compute_pitch_process(
def compute_transform_process(
log_file: io.FileIO,
feat_proc: typing.Union[subprocess.Popen, Path],
utt2spk_path: Path,
lda_mat_path: typing.Optional[Path],
fmllr_path: typing.Optional[Path],
lda_options: MetaDict,
fmllr_path: Path = None,
utt2spk_path: Path = None,
) -> subprocess.Popen:
"""
Construct feature transformation process
Expand All @@ -377,21 +377,21 @@ def compute_transform_process(
File for logging stderr
feat_proc: subprocess.Popen
Feature generation process
utt2spk_path: :class:`~pathlib.Path`
Utterance to speaker SCP file path
lda_mat_path: :class:`~pathlib.Path`
LDA matrix file path
fmllr_path: :class:`~pathlib.Path`
fMLLR transform file path
lda_options: dict[str, Any]
Options for LDA
fmllr_path: :class:`~pathlib.Path`, optional
fMLLR transform file path
utt2spk_path: :class:`~pathlib.Path`, optional
Utterance to speaker SCP file path

Returns
-------
subprocess.Popen
Processing for transforming features
"""
if isinstance(feat_proc, str):
if isinstance(feat_proc, (str, Path)):
feat_input = f"ark,s,cs:{feat_proc}"
use_stdin = False
else:
Expand Down Expand Up @@ -426,14 +426,17 @@ def compute_transform_process(
stdout=subprocess.PIPE,
stderr=log_file,
)
if fmllr_path is None:
if fmllr_path is None or not fmllr_path.exists():
return delta_proc

if fmllr_path.suffix == ".scp":
fmllr_ark = f"scp:{fmllr_path}"
else:
fmllr_ark = f"ark:{fmllr_path}"
fmllr_proc = subprocess.Popen(
[
"transform-feats",
f"--utt2spk=ark:{utt2spk_path}",
f"ark:{fmllr_path}",
fmllr_ark,
"ark,s,cs:-",
"ark,t:-",
],
Expand Down Expand Up @@ -1318,6 +1321,8 @@ def fmllr_options(self) -> MetaDict:
@property
def lda_options(self) -> MetaDict:
"""Options for computing LDA"""
if getattr(self, "acoustic_model", None) is not None:
return self.acoustic_model.lda_options
return {
"splice_left_context": self.splice_left_context,
"splice_right_context": self.splice_right_context,
Expand All @@ -1326,6 +1331,8 @@ def lda_options(self) -> MetaDict:
@property
def mfcc_options(self) -> MetaDict:
"""Parameters to use in computing MFCC features."""
if getattr(self, "acoustic_model", None) is not None:
return self.acoustic_model.mfcc_options
return {
"use-energy": self.use_energy,
"dither": self.dither,
Expand All @@ -1347,6 +1354,8 @@ def mfcc_options(self) -> MetaDict:
@property
def pitch_options(self) -> MetaDict:
"""Parameters to use in computing MFCC features."""
if getattr(self, "acoustic_model", None) is not None:
return self.acoustic_model.pitch_options
return {
"use-pitch": self.use_pitch,
"use-voicing": self.use_voicing,
Expand Down Expand Up @@ -2214,3 +2223,114 @@ def _run(self) -> typing.Generator[typing.Tuple[int, int, int]]:
utt_id, ark_path = line.split(maxsplit=1)
utt_id = int(utt_id.split("-")[1])
yield utt_id, ark_path


def online_feature_proc(
working_directory: Path,
wav_path: Path,
segment_path: Path,
mfcc_options: MetaDict,
pitch_options: MetaDict,
lda_options: MetaDict,
log_file,
) -> subprocess.Popen:
mfcc_ark_path = working_directory.joinpath("mfcc.ark")
pitch_ark_path = working_directory.joinpath("pitch.ark")
feats_ark_path = working_directory.joinpath("feats.ark")
lda_mat_path = working_directory.joinpath("lda.mat")
trans_scp_path = working_directory.joinpath("trans.scp")
cmvn_scp_path = working_directory.joinpath("cmvn.scp")
utt2spk_scp_path = working_directory.joinpath("utt2spk.scp")
seg_proc = subprocess.Popen(
[
thirdparty_binary("extract-segments"),
"--min-segment-length=0.1",
f"scp:{wav_path}",
segment_path,
"ark:-",
],
stdout=subprocess.PIPE,
stderr=log_file,
env=os.environ,
)
mfcc_proc = compute_mfcc_process(log_file, wav_path, subprocess.PIPE, mfcc_options)
if cmvn_scp_path.exists():
cmvn_proc = subprocess.Popen(
[
thirdparty_binary("apply-cmvn"),
f"--utt2spk=ark:{utt2spk_scp_path}",
f"scp:{cmvn_scp_path}",
"ark:-",
f"ark:{mfcc_ark_path}",
],
stdin=mfcc_proc.stdout,
stderr=log_file,
env=os.environ,
)

else:
cmvn_proc = subprocess.Popen(
[
"apply-cmvn-sliding",
"--norm-vars=false",
"--center=true",
"--cmn-window=300",
"ark:-",
f"ark:{mfcc_ark_path}",
],
env=os.environ,
stdin=mfcc_proc.stdout,
stderr=log_file,
)

use_pitch = pitch_options["use-pitch"] or pitch_options["use-voicing"]
if use_pitch:
pitch_proc = compute_pitch_process(log_file, wav_path, subprocess.PIPE, pitch_options)
pitch_copy_proc = subprocess.Popen(
[
thirdparty_binary("copy-feats"),
"--compress=true",
"ark:-",
f"ark:{pitch_ark_path}",
],
stdin=pitch_proc.stdout,
stderr=log_file,
env=os.environ,
)
for line in seg_proc.stdout:
mfcc_proc.stdin.write(line)
mfcc_proc.stdin.flush()
if use_pitch:
pitch_proc.stdin.write(line)
pitch_proc.stdin.flush()
mfcc_proc.stdin.close()
if use_pitch:
pitch_proc.stdin.close()
cmvn_proc.wait()
if use_pitch:
pitch_copy_proc.wait()
if use_pitch:
paste_proc = subprocess.Popen(
[
thirdparty_binary("paste-feats"),
"--length-tolerance=2",
f"ark:{mfcc_ark_path}",
f"ark:{pitch_ark_path}",
f"ark:{feats_ark_path}",
],
stderr=log_file,
env=os.environ,
)
paste_proc.wait()
else:
feats_ark_path = mfcc_ark_path

trans_proc = compute_transform_process(
log_file,
feats_ark_path,
lda_mat_path,
lda_options,
fmllr_path=trans_scp_path,
utt2spk_path=utt2spk_scp_path,
)
return trans_proc
9 changes: 7 additions & 2 deletions montreal_forced_aligner/corpus/multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@ def output_for_features(self, session: Session) -> None:
)
wav_scp_path = job.wav_scp_path
segments_scp_path = job.segments_scp_path
utt2spk_scp_path = job.utt2spk_scp_path
if os.path.exists(segments_scp_path):
return
with mfa_open(wav_scp_path, "w") as wav_file:
Expand All @@ -564,20 +565,24 @@ def output_for_features(self, session: Session) -> None:
wav_file.write(f"{f_id} {sox_string}\n")
yield 1

with mfa_open(segments_scp_path, "w") as segments_file:
with mfa_open(segments_scp_path, "w") as segments_file, mfa_open(
utt2spk_scp_path, "w"
) as utt2spk_file:
utterances = (
session.query(
Utterance.kaldi_id,
Utterance.file_id,
Utterance.speaker_id,
Utterance.begin,
Utterance.end,
Utterance.channel,
)
.filter(Utterance.job_id == job.id)
.order_by(Utterance.kaldi_id)
)
for u_id, f_id, begin, end, channel in utterances:
for u_id, f_id, s_id, begin, end, channel in utterances:
segments_file.write(f"{u_id} {f_id} {begin} {end} {channel}\n")
utt2spk_file.write(f"{u_id} {s_id}\n")
yield 1

def output_to_directory(self, session) -> None:
Expand Down
1 change: 1 addition & 0 deletions montreal_forced_aligner/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ class WordType(enum.Enum):
noise = 8 #: Words that represent non-speech noise
music = 9 #: Words that represent music
disambiguation = 10 #: Disambiguation symbols internal to Kaldi
interjection = 11 #: Set of words that can be added on the fly to transcripts


class DistanceMetric(enum.Enum):
Expand Down
Loading