use F.pad and mT

sp-nitech · takenori-y · Jul 16, 2023 · Jul 16, 2023 · Jul 16, 2023 · Jul 16, 2023
commit c1c0c850778b20d73f29f4ac363130f487fe57c1
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       max-parallel: 4
       matrix:
         python-version: [3.8]
-        pytorch-version: [1.10.0, 2.0.1]
+        pytorch-version: [1.11.0, 2.0.1]
 
     steps:
       - name: Clone

diff --git a/Makefile b/Makefile
@@ -24,7 +24,7 @@ init:
 dev:
 	test -d venv || python$(PYTHON_VERSION) -m venv venv; \
 	. ./venv/bin/activate; python -m pip install pip --upgrade; \
-	python -m pip install torch==1.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
+	python -m pip install torch==1.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
 	python -m pip install -e .[dev]
 
 dist:

diff --git a/diffsptk/core/ap.py b/diffsptk/core/ap.py
@@ -248,7 +248,7 @@ def forward(self, x, f0):
 
             H = torch.cat((H_alpha, H_beta), dim=-1)  # (B, N, J, 6)
             w = self.window[i, : self.segment_length[i]]  # (J,)
-            Hw = H.transpose(-2, -1) * w  # (B, N, 6, J)
+            Hw = H.mT * w  # (B, N, 6, J)
             R = torch.matmul(Hw, H)  # (B, N, 6, 6)
 
             index_gamma = origin.unsqueeze(-1) + j[..., 1:-1]  # (B, N, J)

diff --git a/diffsptk/core/excite.py b/diffsptk/core/excite.py
@@ -16,6 +16,7 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 from ..misc.utils import UNVOICED_SYMBOL
 from .linear_intpl import LinearInterpolation
@@ -79,16 +80,16 @@ def forward(self, p):
         mask = torch.repeat_interleave(mask, self.frame_period, dim=-1)
 
         # Extend right side for interpolation.
-        tmp_mask = torch.cat((base_mask[..., :1] * 0, base_mask), dim=-1)
+        tmp_mask = F.pad(base_mask, (1, 0))
         tmp_mask = torch.eq(tmp_mask[..., 1:] - tmp_mask[..., :-1], -1)
         p[tmp_mask] = torch.roll(p, 1, dims=-1)[tmp_mask]
 
         # Interpolate pitch.
         if p.dim() != 1:
-            p = p.transpose(-2, -1)
+            p = p.mT
         p = self.linear_intpl(p)
         if p.dim() != 1:
-            p = p.transpose(-2, -1)
+            p = p.mT
         p *= mask
 
         # Compute phase.
@@ -101,7 +102,7 @@ def forward(self, p):
 
         if self.voiced_region == "pulse":
             r = torch.ceil(phase)
-            r = torch.cat((r[..., :1] * 0, r), dim=-1)
+            r = F.pad(r, (1, 0))
             pulse_pos = torch.ge(r[..., 1:] - r[..., :-1], 1)
             e = torch.zeros_like(p)
             e[pulse_pos] = torch.sqrt(p[pulse_pos])

diff --git a/diffsptk/core/fftcep.py b/diffsptk/core/fftcep.py
@@ -79,11 +79,6 @@ def forward(self, x):
                 [-0.8539,  4.6173, -0.5496, -0.3207]])
 
         """
-        # Torch's pad only supports 3D, 4D, 5D padding with non-constant padding.
-        d = x.dim()
-        for _ in range(3 - d):
-            x = x.unsqueeze(0)
-
         M = self.cep_order
         H = self.fft_length // 2
 
@@ -102,8 +97,4 @@ def forward(self, x):
 
         indices = [0, M] if H == M else [0]
         v[..., indices] *= 0.5
-
-        # Revert shape.
-        for _ in range(3 - d):
-            v = v.squeeze(0)
         return v
diff --git a/diffsptk/core/gmm.py b/diffsptk/core/gmm.py
@@ -316,7 +316,7 @@ def forward(self, x):
                     y = posterior.sum(dim=0)
                     nu = px / y.view(-1, 1)
                     nm = torch.matmul(nu.unsqueeze(-1), self.mu.unsqueeze(-2))
-                    mn = nm.transpose(1, 2)
+                    mn = nm.mT
                     a = pxx - y.view(-1, 1, 1) * (nm + mn - mm)
                     b = xi.view(-1, 1, 1) * self.ubm_sigma
                     diff = self.ubm_mu - self.mu

diff --git a/diffsptk/core/grpdelay.py b/diffsptk/core/grpdelay.py
@@ -86,8 +86,7 @@ def forward(self, b, a=None):
 
             # Remove gain.
             K, a1 = torch.split(a, [1, order], dim=-1)
-            K = K * 0 + 1
-            a2 = torch.cat((K, a1), dim=-1).unsqueeze(-1)
+            a2 = F.pad(a1, (1, 0), value=1).unsqueeze(-1)
 
             # Perform full convolution.
             b1 = F.pad(b, (order, order))

diff --git a/diffsptk/core/linear_intpl.py b/diffsptk/core/linear_intpl.py
@@ -76,7 +76,7 @@ def forward(self, x):
         assert x.dim() == 3, "Input must be 3D tensor"
         B, T, D = x.shape
 
-        x = x.transpose(1, 2)
+        x = x.mT
         x = self.pad(x)
         x = F.interpolate(
             x,
@@ -86,7 +86,7 @@ def forward(self, x):
         )[
             ..., :-1
         ]  # Remove the padded value.
-        y = x.transpose(1, 2).reshape(B, -1, D)
+        y = x.mT.reshape(B, -1, D)
 
         if d == 1:
             y = y.view(-1)

diff --git a/diffsptk/core/mgc2mgc.py b/diffsptk/core/mgc2mgc.py
@@ -17,6 +17,7 @@
 import numpy as np
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 from ..misc.utils import cexp
 from ..misc.utils import clog
@@ -79,7 +80,7 @@ def forward(self, c1):
             Output cepstrum.
 
         """
-        c01 = torch.cat((c1[..., :1] * 0, c1[..., 1:]), dim=-1)
+        c01 = F.pad(c1[..., 1:], (1, 0))
         C1 = torch.fft.fft(c01, n=self.n_fft)
 
         if self.in_gamma == 0:

diff --git a/diffsptk/core/phase.py b/diffsptk/core/phase.py
@@ -18,6 +18,7 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 
 class Phase(nn.Module):
@@ -73,8 +74,7 @@ def forward(self, b, a=None):
             p = torch.atan2(B.imag, B.real)
         else:
             K, a = torch.split(a, [1, a.size(-1) - 1], dim=-1)
-            K = K * 0 + 1
-            a = torch.cat((K, a), dim=-1)
+            a = F.pad(a, (1, 0), value=1)
             A = torch.fft.rfft(a, n=self.fft_length)
             p = torch.atan2(
                 B.imag * A.real - B.real * A.imag, B.real * A.real + B.imag * A.imag

diff --git a/diffsptk/core/pitch.py b/diffsptk/core/pitch.py
@@ -255,7 +255,7 @@ def calc_embed(self, x):
 
     def calc_pitch(self, x):
         # Compute pitch probabilities.
-        prob = self.calc_prob(x).transpose(-2, -1)
+        prob = self.calc_prob(x).mT
 
         # Decode pitch probabilities.
         pitch, periodicity = self.torchcrepe.postprocess(

diff --git a/diffsptk/core/root_pol.py b/diffsptk/core/root_pol.py
@@ -17,6 +17,7 @@
 import numpy as np
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 from ..misc.utils import check_size
 from ..misc.utils import numpy_to_torch
@@ -108,7 +109,7 @@ def forward(self, a):
             center + radius * self.cos,
             center + radius * self.sin,
         )
-        a = torch.cat((a[..., :1] * 0 + 1, a), dim=-1)
+        a = F.pad(a, (1, 0), value=1)
         a = a.unsqueeze(-1).to(x.dtype)
 
         for _ in range(self.n_iter):

diff --git a/diffsptk/core/spec.py b/diffsptk/core/spec.py
@@ -16,6 +16,7 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 
 class Spectrum(nn.Module):
@@ -95,7 +96,7 @@ def forward(self, b, a=None):
 
         if a is not None:
             K, a1 = torch.split(a, [1, a.size(-1) - 1], dim=-1)
-            a = torch.cat((K * 0 + 1, a1), dim=-1)
+            a = F.pad(a1, (1, 0), value=1)
             X /= torch.fft.rfft(a, n=self.fft_length).abs()
             X *= K
 

diff --git a/diffsptk/core/unframe.py b/diffsptk/core/unframe.py
@@ -120,7 +120,7 @@ def fold(x):
             return x
 
         w = self.window.repeat(1, 1, N)
-        x = y.transpose(-2, -1)
+        x = y.mT
 
         if d == 2:
             x = x.unsqueeze(0)

diff --git a/diffsptk/core/yingram.py b/diffsptk/core/yingram.py
@@ -17,6 +17,7 @@
 import numpy as np
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 from ..misc.utils import check_size
 from .acorr import AutocorrelationAnalysis
@@ -121,7 +122,7 @@ def forward(self, x):
         W = self.frame_length
         check_size(x.size(-1), W, "frame length")
 
-        x0 = torch.cat((x[..., :1] * 0, x), dim=-1)
+        x0 = F.pad(x, (1, 0))
         s = torch.cumsum(x0 * x0, dim=-1)
         term1 = (s[..., W - self.tau_max + 1 :]).flip(-1)
         term2 = s[..., W:] - s[..., : self.tau_max]
@@ -136,7 +137,7 @@ def forward(self, x):
         d = self.ramp * d / (torch.cumsum(d, dim=-1) + 1e-7)
 
         # Compute Yingram.
-        d0 = torch.cat((d[..., :1] * 0 + 1, d), dim=-1)
+        d0 = F.pad(d, (1, 0), value=1)
         numer = (self.lags - self.lags_floor) * (
             d0[..., self.lags_ceil] - d0[..., self.lags_floor]
         )

diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,7 @@
   ]
   dependencies = [
     "soundfile",
-    "torch >= 1.10.0",
+    "torch >= 1.11.0",
     "torchcrepe @ git+https://github.com/takenori-y/torchcrepe.git@v0.0.20a",
     "torchlpc >= 0.2.0",
     "numpy",