forked from kenders2000/u_net_speech_enhancement
-
Notifications
You must be signed in to change notification settings - Fork 0
/
resample_audio.py
89 lines (83 loc) · 4.35 KB
/
resample_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
This script converts the output of the u-net scene cleaning to the format
required by the hearing aid processor.
"""
import librosa
import soundfile as sf
from pathlib import Path
import os
import tqdm
# clean_path = Path("/home/paulkendrick/spectrogram_based_models/example_data/clarity_CEC1_data/clarity_data/dev/cleaned_audio")
# data_path = Path("/home/paulkendrick/spectrogram_based_models/example_data/clarity_CEC1_data/clarity_data/dev/scenes")
def main():
ap = ArgumentParser()
ap.add_argument(
"-p",
type=str,
dest="clean_path",
help="Path to the cleaned audio (output of u-net).",
default="/home/paulkendrick/spectrogram_based_models/example_data/clarity_CEC1_data/clarity_data/dev/cleaned_audio",
)
ap.add_argument(
"-d",
type=str,
dest="data_path",
help="Path to the scenes (where _mixed_CH1.wav are).",
default="/home/paulkendrick/spectrogram_based_models/example_data/clarity_CEC1_data/clarity_data/dev/scenes",
)
ap.add_argument(
"-o",
type=str,
dest="output_path",
help="Path save the converted files.",
default="/home/kenders/greenhdd/clarity_challenge/pk_speech_enhancement/spectrogram_models/cleaned_scenes",
)
args = ap.parse_args()
clean_path = Path(args.clean_path)
data_path = Path(args.clean_path)
output_path = Path(args.output_path)
# traverse whole directory
for root, dirs, files in os.walk(clean_path):
# select file name
for file in files:
# check the extension of files
if file.endswith('16k.wav'):
reconstructed_audio_full, fs = sf.read(clean_path / file)
scene = Path(file).stem.split("_")[0]
output_filename_44k = f"{scene}_cleaned_signal_441k.wav"
reconstructed_audio_full_44k = librosa.resample(reconstructed_audio_full.T, fs, 44100)
# load the origianl channel 1 to find out the length:
original_ch1, fs = sf.read(data_path / f"{scene}_mixed_CH1.wav")
original_samples = original_ch1.shape[0]
reconstructed_audio_full_44k = reconstructed_audio_full_44k[:,0:original_samples]
output_filename_44k = f"{scene}_cleaned_signal_441k.wav"
sf.write(clean_path / output_filename_44k, reconstructed_audio_full_44k.T, 44100, subtype="FLOAT")
# legacy conversion:
# I saved the cleaned output as {scene}_{listener}_HA-output.wav as 16 kHz 16 bit wavs
# this is the output of the u-net.
# they are to be all upsampled to 44.1 kHz and save as float wavs named:
# {scene}_cleaned_signal_441k.wav, this cleaning is listener independnat, and will
# then be passed into a listening aid algorithm.
clean_path = Path("/home/kenders/greenhdd/clarity_challenge/pk_speech_enhancement/spectrogram_models/reconstructed_audio/eval")
data_path = Path("/home/kenders/greenhdd/clarity_challenge/data/clarity_CEC1_data/clarity_data/eval/scenes/")
output_path = Path("/home/kenders/greenhdd/clarity_challenge/pk_speech_enhancement/spectrogram_models/cleaned_scenes")
# traverse whole directory
for root, dirs, files in os.walk(clean_path):
# select file name
for file in tqdm.tqdm(files):
# check the extension of files
if file.endswith('HA-output.wav'):
reconstructed_audio_full, fs = sf.read(clean_path / file)
scene = Path(file).stem.split("_")[0]
output_filename = f"{scene}_cleaned_signal_16k.wav"
sf.write(output_path / output_filename, reconstructed_audio_full, 16000, subtype="FLOAT")
# reconstructed_audio_full_44k = librosa.resample(reconstructed_audio_full.T, fs, 44100)
#
# # load the origianl channel 1 to find out the length:
# # this is because I zeropadded all signals to the same length.
# original_ch1, fs = sf.read(data_path / f"{scene}_mixed_CH1.wav")
# original_samples = original_ch1.shape[0]
# reconstructed_audio_full_44k = reconstructed_audio_full_44k[:, 0:original_samples]
# output_filename_44k = f"{scene}_cleaned_signal_441k.wav"
#
# sf.write(output_path / output_filename_44k, reconstructed_audio_full_44k.T, 44100, subtype="FLOAT")