Speedup DeformConvTester (#3191)

* Separating unrelated checks to avoid unnecessary repetition. * Add cache on get_fn_args().
pytorch · datumbox · Jan 4, 2021 · Dec 18, 2020 · Dec 18, 2020 · Dec 18, 2020
commit e96bc8da5c10b6646106030624255298cf61ed80
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 import torch
+from functools import lru_cache
 from torch import Tensor
 from torch.autograd import gradcheck
 from torch.nn.modules.utils import _pair
@@ -496,6 +497,7 @@ def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilati
         out += bias.view(1, n_out_channels, 1, 1)
         return out
 
+    @lru_cache(maxsize=None)
     def get_fn_args(self, device, contiguous, batch_sz, dtype):
         n_in_channels = 6
         n_out_channels = 2
@@ -614,9 +616,11 @@ def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
         gradcheck(lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation),
                   (x, offset, weight, bias), nondet_tol=1e-5)
 
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_compare_cpu_cuda_grads(self):
         # Test from https://github.com/pytorch/vision/issues/2598
         # Run on CUDA only
-        if "cuda" in device.type:
+        for contiguous in [False, True]:
             # compare grads computed on CUDA with grads computed on CPU
             true_cpu_grads = None