Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Housekeeping and small error fixes #1225

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Housekeeping and small error fixes
I noticed that the only thing holding me back from running this with certain versions of the modules used here was some argument related issues! I did my best to track all of them down here.

I also appended *.pt files to the .gitignore to ensure not models are accidentally uploaded
  • Loading branch information
MarkusHammer committed Jun 12, 2023
commit 60c65be81f8cf81b6b1e714b1fe9333ffd05dd9c
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
encoder/saved_models/*
synthesizer/saved_models/*
vocoder/saved_models/*
*.pt
4 changes: 2 additions & 2 deletions encoder/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def wav_to_mel_spectrogram(wav):
Note: this not a log-mel spectrogram.
"""
frames = librosa.feature.melspectrogram(
wav,
sampling_rate,
y = wav,
sr = sampling_rate,
n_fft=int(sampling_rate * mel_window_length / 1000),
hop_length=int(sampling_rate * mel_window_step / 1000),
n_mels=mel_n_channels
Expand Down
2 changes: 1 addition & 1 deletion synthesizer/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def _mel_to_linear(mel_spectrogram, hparams):

def _build_mel_basis(hparams):
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels,
return librosa.filters.mel(sr = hparams.sample_rate, n_fft = hparams.n_fft, n_mels=hparams.num_mels,
fmin=hparams.fmin, fmax=hparams.fmax)

def _amp_to_db(x, hparams):
Expand Down
2 changes: 1 addition & 1 deletion synthesizer/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def load_preprocess_wav(fpath):
Loads and preprocesses an audio file under the same conditions the audio files were used to
train the synthesizer.
"""
wav = librosa.load(str(fpath), hparams.sample_rate)[0]
wav = librosa.load(str(fpath), sr = hparams.sample_rate)[0]
if hparams.rescale:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
return wav
Expand Down
4 changes: 2 additions & 2 deletions synthesizer/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def preprocess_speaker(speaker_dir, out_dir: Path, skip_existing: bool, hparams,

for wav_fpath in wav_fpaths:
# Load the audio waveform
wav, _ = librosa.load(str(wav_fpath), hparams.sample_rate)
wav, _ = librosa.load(str(wav_fpath), sr = hparams.sample_rate)
if hparams.rescale:
wav = wav / np.abs(wav).max() * hparams.rescaling_max

Expand Down Expand Up @@ -111,7 +111,7 @@ def preprocess_speaker(speaker_dir, out_dir: Path, skip_existing: bool, hparams,

def split_on_silences(wav_fpath, words, end_times, hparams):
# Load the audio waveform
wav, _ = librosa.load(str(wav_fpath), hparams.sample_rate)
wav, _ = librosa.load(str(wav_fpath), sr = hparams.sample_rate)
if hparams.rescale:
wav = wav / np.abs(wav).max() * hparams.rescaling_max

Expand Down
2 changes: 1 addition & 1 deletion toolbox/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def log(self, line, mode="newline"):
self.app.processEvents()

def set_loading(self, value, maximum=1):
self.loading_bar.setValue(value * 100)
self.loading_bar.setValue(int(value * 100))
self.loading_bar.setMaximum(maximum * 100)
self.loading_bar.setTextVisible(value != 0)
self.app.processEvents()
Expand Down
4 changes: 2 additions & 2 deletions vocoder/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def float_2_label(x, bits) :


def load_wav(path) :
return librosa.load(str(path), sr=hp.sample_rate)[0]
return librosa.load(str(path), sr = hp.sample_rate)[0]


def save_wav(x, path) :
Expand Down Expand Up @@ -50,7 +50,7 @@ def linear_to_mel(spectrogram):


def build_mel_basis():
return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)
return librosa.filters.mel(sr = hp.sample_rate, n_fft = hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)


def normalize(S):
Expand Down