From e1658985f81de73e16da5e69c13504e8d1547515 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 12 Dec 2023 12:30:04 +0900 Subject: [PATCH 1/6] update dfs --- Makefile | 2 +- diffsptk/core/dfs.py | 135 +++++++++++++++++++++++++-------------- diffsptk/core/lsp2lpc.py | 11 +--- diffsptk/misc/utils.py | 11 ++++ pyproject.toml | 1 + tests/test_dfs.py | 53 ++++++++++++++- tests/test_lsp2lpc.py | 5 -- 7 files changed, 154 insertions(+), 64 deletions(-) diff --git a/Makefile b/Makefile index 9796efd..a458e32 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ init: dev: test -d venv || python$(PYTHON_VERSION) -m venv venv; \ . ./venv/bin/activate; python -m pip install pip --upgrade; \ - python -m pip install torch==1.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \ + python -m pip install torch==1.11.0+cu113 torchaudio==0.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \ python -m pip install -e .[dev] dist: diff --git a/diffsptk/core/dfs.py b/diffsptk/core/dfs.py index 448dbae..8a26f14 100644 --- a/diffsptk/core/dfs.py +++ b/diffsptk/core/dfs.py @@ -17,14 +17,15 @@ import numpy as np import torch.nn as nn import torch.nn.functional as F +import torchaudio.functional as G from ..misc.utils import numpy_to_torch +from ..misc.utils import to_3d class InfiniteImpulseResponseDigitalFilter(nn.Module): """See `this page `_ - for details. Note that numerator and denominator coefficients are fixed in the - current implementation. + for details. Parameters ---------- @@ -35,52 +36,70 @@ class InfiniteImpulseResponseDigitalFilter(nn.Module): Denominator coefficients. ir_length : int >= 1 [scalar] - Length of impulse response. + Length of impulse response (valid only if **mode** is 'fir'). + + mode : ['fir', 'iir'] + If 'fir', filter is approximated by a finite impulse response. """ - def __init__(self, b=[1], a=[1], ir_length=None): + def __init__(self, b=None, a=None, ir_length=None, mode="fir"): super(InfiniteImpulseResponseDigitalFilter, self).__init__() - if ir_length is None: - ir_length = len(b) - assert 1 <= ir_length - - d = np.zeros(max(len(b), len(a))) - h = np.empty(ir_length) + self.mode = mode - a0 = a[0] - a1 = np.asarray(a[1:]) + if b is None: + b = [1] + if a is None: + a = [1] b = np.asarray(b) - - # Pre-compute impulse response. - for t in range(ir_length): - x = a0 if t == 0 else 0 - y = x - np.sum(d[: len(a1)] * a1) - - d = np.roll(d, 1) - d[0] = y - - y = np.sum(d[: len(b)] * b) - h[t] = y - - h = h.reshape(1, 1, -1) - self.register_buffer("h", numpy_to_torch(h).flip(-1)) - - self.pad = nn.ConstantPad1d((ir_length - 1, 0), 0) - - def forward(self, x): - """Apply an approximated IIR digital filter. + a = np.asarray(a) + + if self.mode == "fir": + # Pre-compute impulse response. + if ir_length is None: + ir_length = len(b) + assert 1 <= ir_length + + d = np.zeros(max(len(b), len(a))) + h = np.empty(ir_length) + a0 = a[0] + a1 = a[1:] + for t in range(ir_length): + x = a0 if t == 0 else 0 + y = x - np.sum(d[: len(a1)] * a1) + + d = np.roll(d, 1) + d[0] = y + + y = np.sum(d[: len(b)] * b) + h[t] = y + h = h.reshape(1, 1, -1) + self.register_buffer("h", numpy_to_torch(h).flip(-1)) + elif self.mode == "iir": + self.register_buffer("b", numpy_to_torch(b)) + self.register_buffer("a", numpy_to_torch(a)) + else: + raise ValueError(f"mode {mode} is not supported") + + def forward(self, x, b=None, a=None): + """Apply an IIR digital filter. Parameters ---------- - x : Tensor [shape=(B, 1, T) or (B, T) or (T,)] + x : Tensor [shape=(..., T)] Input waveform. + b : Tensor [shape=(M+1,)] + Numerator coefficients. + + a : Tensor [shape=(N+1,)] + Denominator coefficients. + Returns ------- - y : Tensor [shape=(B, 1, T) or (B, T) or (T,)] - Filterd waveform. + y : Tensor [shape=(..., T)] + Filtered waveform. Examples -------- @@ -91,17 +110,39 @@ def forward(self, x): tensor([0.0000, 1.0000, 1.0300, 1.0600, 1.0900]) """ - d = x.dim() - if d == 1: - x = x.view(1, 1, -1) - elif d == 2: - x = x.unsqueeze(1) - assert x.dim() == 3, "Input must be 3D tensor" - - y = F.conv1d(self.pad(x), self.h) - - if d == 1: - y = y.view(-1) - elif d == 2: - y = y.squeeze(1) + if self.mode == "fir": + y = self._forward_fir(x, b, a) + elif self.mode == "iir": + y = self._forward_iir(x, b, a) + else: + raise RuntimeError + return y + + def _forward_fir(self, x, b=None, a=None): + if a is None and b is None: + h = self.h + elif a is None and b is not None: + h = b.view(1, 1, -1).flip(-1) + else: + raise ValueError("Denominator coefficients must be set via constructor") + + y = to_3d(x) + y = F.pad(y, (h.size(-1) - 1, 0)) + y = F.conv1d(y, h) + y = y.view_as(x) + return y + + def _forward_iir(self, x, b=None, a=None): + if b is None: + b = self.b + if a is None: + a = self.a + + diff = b.size(-1) - a.size(-1) + if diff > 0: + a = F.pad(a, (0, diff)) + elif diff < 0: + b = F.pad(b, (0, -diff)) + + y = G.lfilter(x, a, b, clamp=False) return y diff --git a/diffsptk/core/lsp2lpc.py b/diffsptk/core/lsp2lpc.py index 78a4f98..816b9c5 100644 --- a/diffsptk/core/lsp2lpc.py +++ b/diffsptk/core/lsp2lpc.py @@ -19,6 +19,7 @@ import torch.nn.functional as F from ..misc.utils import check_size +from ..misc.utils import to_3d from .pol_root import RootsToPolynomial @@ -90,15 +91,7 @@ def forward(self, w): if self.lpc_order == 0: return K - d = w.dim() - if d == 1: - x = w.view(1, 1, -1) - elif d == 2: - x = w.unsqueeze(1) - else: - x = w.view(-1, 1, w.size(-1)) - - z = torch.exp(1j * x) + z = torch.exp(1j * to_3d(w)) p = z[..., 1::2] q = z[..., 0::2] if self.lpc_order == 1: diff --git a/diffsptk/misc/utils.py b/diffsptk/misc/utils.py index 9378e4f..f1dedee 100644 --- a/diffsptk/misc/utils.py +++ b/diffsptk/misc/utils.py @@ -69,6 +69,17 @@ def numpy_to_torch(x): return torch.from_numpy(x.astype(default_dtype())) +def to_3d(x): + d = x.dim() + if d == 1: + y = x.view(1, 1, -1) + elif d == 2: + y = x.unsqueeze(1) + else: + y = x.view(-1, 1, x.size(-1)) + return y + + def get_alpha(sr, mode="hts", n_freq=10, n_alpha=100): """Compute frequency warping factor under given sample rate. diff --git a/pyproject.toml b/pyproject.toml index 7314bc4..c738a13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "soundfile >= 0.10.2", "torch >= 1.11.0", + "torchaudio >= 0.11.0", "torchcrepe >= 0.0.21", "torchlpc >= 0.2.0", "vector-quantize-pytorch >= 0.8.0", diff --git a/tests/test_dfs.py b/tests/test_dfs.py index e9b0b13..99bcf0b 100644 --- a/tests/test_dfs.py +++ b/tests/test_dfs.py @@ -21,8 +21,9 @@ @pytest.mark.parametrize("device", ["cpu", "cuda"]) -def test_compatibility(device, b=[-0.42, 1], a=[1, -0.42], T=100): - dfs = diffsptk.IIR(b, a, 30) +@pytest.mark.parametrize("mode", ["fir", "iir"]) +def test_compatibility(device, mode, b=[-0.42, 1], a=[1, -0.42], T=100): + dfs = diffsptk.IIR(b, a, ir_length=30, mode=mode) bb = " ".join([str(x) for x in b]) aa = " ".join([str(x) for x in a]) @@ -39,6 +40,54 @@ def test_compatibility(device, b=[-0.42, 1], a=[1, -0.42], T=100): U.check_differentiable(device, dfs, [T]) +@pytest.mark.parametrize("device", ["cpu", "cuda"]) +@pytest.mark.parametrize("mode", ["fir", "iir"]) +def test_compatibility_b(device, mode, b=[-0.42, 1], T=100): + dfs = diffsptk.IIR(b, None, mode=mode) + + bb = " ".join([str(x) for x in b]) + + tmp1 = "dfs.tmp1" + tmp2 = "dfs.tmp2" + U.check_compatibility( + device, + dfs, + [f"nrand -l {T} > {tmp1}", f"echo {bb} | x2x +ad > {tmp2}"], + [f"cat {tmp1}", f"cat {tmp2}"], + f"dfs -z {tmp2} < {tmp1}", + [f"rm {tmp1} {tmp2}"], + ) + + U.check_differentiable(device, dfs, [(T,), (len(b),)]) + + +@pytest.mark.parametrize("device", ["cpu", "cuda"]) +@pytest.mark.parametrize("mode", ["iir"]) +def test_compatibility_b_a(device, mode, b=[-0.42, 1], a=[1, -0.42, 0], T=100): + dfs = diffsptk.IIR(None, None, mode=mode) + + bb = " ".join([str(x) for x in b]) + aa = " ".join([str(x) for x in a]) + + tmp1 = "dfs.tmp1" + tmp2 = "dfs.tmp2" + tmp3 = "dfs.tmp3" + U.check_compatibility( + device, + dfs, + [ + f"nrand -l {T} > {tmp1}", + f"echo {bb} | x2x +ad > {tmp2}", + f"echo {aa} | x2x +ad > {tmp3}", + ], + [f"cat {tmp1}", f"cat {tmp2}", f"cat {tmp3}"], + f"dfs -z {tmp2} -p {tmp3} < {tmp1}", + [f"rm {tmp1} {tmp2} {tmp3}"], + ) + + U.check_differentiable(device, dfs, [(T,), (len(b),), (len(a),)]) + + def test_various_shape(T=10): pqmf = diffsptk.IIR() U.check_various_shape(pqmf, [(T,), (1, T), (1, 1, T)]) diff --git a/tests/test_lsp2lpc.py b/tests/test_lsp2lpc.py index 7ea42ba..2233b82 100644 --- a/tests/test_lsp2lpc.py +++ b/tests/test_lsp2lpc.py @@ -37,8 +37,3 @@ def test_compatibility(device, M, L=32, B=2): ) U.check_differentiable(device, lsp2lpc, [B, M + 1]) - - -def test_various_shape(M=3): - lsp2lpc = diffsptk.LineSpectralPairsToLinearPredictiveCoefficients(M) - U.check_various_shape(lsp2lpc, [(M + 1,), (1, M + 1), (1, 1, M + 1)]) From 3218bc8ba6dd197a6fc0333923155734377471dd Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 12 Dec 2023 12:37:42 +0900 Subject: [PATCH 2/6] update df2 --- diffsptk/core/df2.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/diffsptk/core/df2.py b/diffsptk/core/df2.py index 241a98e..d864f8c 100644 --- a/diffsptk/core/df2.py +++ b/diffsptk/core/df2.py @@ -42,8 +42,8 @@ class SecondOrderDigitalFilter(nn.Module): zero_bandwidth : float > 0 [scalar] Zero bandwidth in Hz. - ir_length : int >= 1 [scalar] - Length of impulse response. + **kwargs : additional keyword arguments + See :func:`~diffsptk.InfiniteImpulseResponseDigitalFilter`. """ @@ -54,7 +54,7 @@ def __init__( pole_bandwidth=None, zero_frequency=None, zero_bandwidth=None, - ir_length=None, + **kwargs, ): super(SecondOrderDigitalFilter, self).__init__() @@ -67,32 +67,29 @@ def get_filter_coefficients(frequency, bandwidth, sample_rate): theta = 2 * math.pi * frequency / sample_rate return [1, -2 * r * math.cos(theta), r * r] - param = {} if pole_frequency is not None: - param["a"] = get_filter_coefficients( + kwargs["a"] = get_filter_coefficients( pole_frequency, pole_bandwidth, sample_rate ) if zero_frequency is not None: - param["b"] = get_filter_coefficients( + kwargs["b"] = get_filter_coefficients( zero_frequency, zero_bandwidth, sample_rate ) - if ir_length is not None: - param["ir_length"] = ir_length - self.dfs = InfiniteImpulseResponseDigitalFilter(**param) + self.dfs = InfiniteImpulseResponseDigitalFilter(**kwargs) def forward(self, x): """Apply a second order digital filter. Parameters ---------- - x : Tensor [shape=(B, 1, T) or (B, T) or (T,)] + x : Tensor [shape=(..., T)] Input waveform. Returns ------- - y : Tensor [shape=(B, 1, T) or (B, T) or (T,)] - Filterd waveform. + y : Tensor [shape=(..., T)] + Filtered waveform. Examples -------- From f8f81f0a153eeaf8c1b93292fd4cb8c365632bcb Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 12 Dec 2023 12:56:53 +0900 Subject: [PATCH 3/6] fix doc [skip ci] --- diffsptk/misc/signals.py | 8 ++++---- diffsptk/misc/utils.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/diffsptk/misc/signals.py b/diffsptk/misc/signals.py index 415f35f..51c1c77 100644 --- a/diffsptk/misc/signals.py +++ b/diffsptk/misc/signals.py @@ -30,7 +30,7 @@ def impulse(order, **kwargs): order : int >= 0 [scalar] Order of sequence, :math:`M`. - kwargs : additional keyword arguments + **kwargs : additional keyword arguments See `torch.eye `_. Returns @@ -63,7 +63,7 @@ def step(order, value=1, **kwargs): value : float [scalar] Step value. - kwargs : additional keyword arguments + **kwargs : additional keyword arguments See `torch.full `_. Returns @@ -102,7 +102,7 @@ def ramp(arg, end=None, step=1, eps=1e-8, **kwargs): eps : float [scalar] A correction value. - kwargs : additional keyword arguments + **kwargs : additional keyword arguments See `torch.arange `_. @@ -148,7 +148,7 @@ def sin(order, period=None, magnitude=1, **kwargs): magnitude : float [scalar] Magnitude. - kwargs : additional keyword arguments + **kwargs : additional keyword arguments See `torch.arange `_. diff --git a/diffsptk/misc/utils.py b/diffsptk/misc/utils.py index f1dedee..7901318 100644 --- a/diffsptk/misc/utils.py +++ b/diffsptk/misc/utils.py @@ -218,7 +218,7 @@ def read(filename, double=False, **kwargs): double : bool [scalar] If True, return double-type tensor. - kwargs : additional keyword arguments + **kwargs : additional keyword arguments Additional arguments passed to `soundfile.read`. Returns @@ -257,7 +257,7 @@ def write(filename, x, sr, **kwargs): sr : int [scalar] Sample rate in Hz. - kwargs : additional keyword arguments + **kwargs : additional keyword arguments Additional arguments passed to `soundfile.write`. Examples From d2cb84b6469e8bad1632462dd62d184e093e205b Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 12 Dec 2023 17:04:38 +0900 Subject: [PATCH 4/6] refactor --- diffsptk/core/dfs.py | 10 ++-------- diffsptk/misc/utils.py | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/diffsptk/core/dfs.py b/diffsptk/core/dfs.py index 8a26f14..c40ad0a 100644 --- a/diffsptk/core/dfs.py +++ b/diffsptk/core/dfs.py @@ -17,8 +17,8 @@ import numpy as np import torch.nn as nn import torch.nn.functional as F -import torchaudio.functional as G +from ..misc.utils import iir from ..misc.utils import numpy_to_torch from ..misc.utils import to_3d @@ -138,11 +138,5 @@ def _forward_iir(self, x, b=None, a=None): if a is None: a = self.a - diff = b.size(-1) - a.size(-1) - if diff > 0: - a = F.pad(a, (0, diff)) - elif diff < 0: - b = F.pad(b, (0, -diff)) - - y = G.lfilter(x, a, b, clamp=False) + y = iir(x, a, b) return y diff --git a/diffsptk/misc/utils.py b/diffsptk/misc/utils.py index 7901318..ef0ed20 100644 --- a/diffsptk/misc/utils.py +++ b/diffsptk/misc/utils.py @@ -19,6 +19,8 @@ import numpy as np import soundfile as sf import torch +import torch.nn.functional as F +import torchaudio UNVOICED_SYMBOL = 0 TWO_PI = 2 * torch.pi @@ -185,16 +187,6 @@ def hankel(x): return X -def vander(x, N): - target_shape = list(x.shape) - target_shape.append(N - 1) - x = x.repeat_interleave(N - 1, dim=-1) - X = x.view(*target_shape) - X = torch.cat((X[..., 0:1] * 0 + 1, X), dim=-1) - X = X.cumprod(dim=-1).flip(-1) - return X - - def cexp(x): return torch.polar(torch.exp(x.real), x.imag) @@ -203,6 +195,16 @@ def clog(x): return torch.log(x.abs()) +def iir(x, b, a): + diff = b.size(-1) - a.size(-1) + if diff > 0: + a = F.pad(a, (0, diff)) + elif diff < 0: + b = F.pad(b, (0, -diff)) + y = torchaudio.functional.lfilter(x, a, b, clamp=False, batching=True) + return y + + def check_size(x, y, cause): assert x == y, f"Unexpected {cause} (input {x} vs target {y})" From 52de0df18ba79408a45d39f1f5ec7f5fa4b6667b Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 12 Dec 2023 17:11:34 +0900 Subject: [PATCH 5/6] fix --- diffsptk/core/dfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diffsptk/core/dfs.py b/diffsptk/core/dfs.py index c40ad0a..4fefc2e 100644 --- a/diffsptk/core/dfs.py +++ b/diffsptk/core/dfs.py @@ -138,5 +138,5 @@ def _forward_iir(self, x, b=None, a=None): if a is None: a = self.a - y = iir(x, a, b) + y = iir(x, b, a) return y From ee0b2b9e960125924a9d3efd0f5efa06371ef850 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 12 Dec 2023 17:21:46 +0900 Subject: [PATCH 6/6] use < instead > [skip ci] --- diffsptk/core/delay.py | 2 +- diffsptk/core/fbank.py | 2 +- diffsptk/misc/utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/diffsptk/core/delay.py b/diffsptk/core/delay.py index eec28d7..8cd184e 100644 --- a/diffsptk/core/delay.py +++ b/diffsptk/core/delay.py @@ -68,7 +68,7 @@ def forward(self, x, dim=-1): """ # Generate zeros if needed. - if self.start > 0 or self.keeplen: + if 0 < self.start or self.keeplen: shape = list(x.shape) shape[dim] = abs(self.start) zeros = torch.zeros(*shape, dtype=x.dtype, device=x.device) diff --git a/diffsptk/core/fbank.py b/diffsptk/core/fbank.py index b8d4f92..c84db51 100644 --- a/diffsptk/core/fbank.py +++ b/diffsptk/core/fbank.py @@ -100,7 +100,7 @@ def hz_to_mel(x): seed = np.arange(lower_bin_index, upper_bin_index) mel = hz_to_mel(sample_rate * seed / fft_length) - lower_channel_map = [np.argmax((freq >= m) > 0) for m in mel] + lower_channel_map = [np.argmax(0 < (m <= freq)) for m in mel] diff = freq - np.insert(freq[:-1], 0, mel_min) weights = np.zeros((fft_length // 2 + 1, n_channel)) diff --git a/diffsptk/misc/utils.py b/diffsptk/misc/utils.py index ef0ed20..31b4164 100644 --- a/diffsptk/misc/utils.py +++ b/diffsptk/misc/utils.py @@ -197,7 +197,7 @@ def clog(x): def iir(x, b, a): diff = b.size(-1) - a.size(-1) - if diff > 0: + if 0 < diff: a = F.pad(a, (0, diff)) elif diff < 0: b = F.pad(b, (0, -diff))