alibaba · chenbohua3 · Jul 20, 2023 · Jul 20, 2023
diff --git a/pytorch_blade/tests/disc/pdl/test_e2e/test_quantization.py b/pytorch_blade/tests/disc/pdl/test_e2e/test_quantization.py
@@ -395,7 +395,6 @@ def forward(self, x):
                 return x
         model = Model().eval().to(self.device)
         inp = torch.randn(512, 512).to(self.device)
-        traced_model = torch.jit.trace(model, inp)
         # only cuda version 11.3/11.7 can be ensured correctness
         if torch.version.cuda == '11.3':
             qgemm_pdl_file = "dequant_gemm_quant_bias_quant.pdll"
@@ -406,7 +405,7 @@ def forward(self, x):
             os.path.join(self.device_pdll_dir, qgemm_pdl_file)
         ]
         pdll_files = ",".join(pdll_files)
-        self._test_e2e(model, inp, pdll_files=pdll_files, enable_int8=True, diff_scale=model.output_scale)
+        self._test_e2e(model, inp, pdll_files=pdll_files, enable_int8=True, atol=model.output_scale)
 
     def test_s8s8s8_f32bias_per_tensor_three_rank_verify(self):
         class Model(nn.Module):
@@ -448,7 +447,6 @@ def forward(self, x):
                 return x
         model = Model().eval().to(self.device)
         inp = torch.randn(8, 512, 512).to(self.device)
-        traced_model = torch.jit.trace(model, inp)
         # only cuda version 11.3/11.7 can be ensured correctness
         if torch.version.cuda == '11.3':
             qgemm_pdl_file = "dequant_gemm_quant_bias_quant.pdll"
@@ -459,7 +457,7 @@ def forward(self, x):
             os.path.join(self.device_pdll_dir, qgemm_pdl_file)
         ]
         pdll_files = ",".join(pdll_files)
-        self._test_e2e(model, inp, pdll_files=pdll_files, enable_int8=True, diff_scale=model.output_scale)
+        self._test_e2e(model, inp, pdll_files=pdll_files, enable_int8=True, atol=model.output_scale)
 
 
 if __name__ == "__main__":

diff --git a/pytorch_blade/tests/disc/pdl/test_e2e/test_quantized_qkv_merge.py b/pytorch_blade/tests/disc/pdl/test_e2e/test_quantized_qkv_merge.py
@@ -92,7 +92,8 @@ def forward(self, x):
                     x3, self.output_scale, self.output_zero_point,
                     self.activation_quant_min, self.activation_quant_max
                 )
-                return x1+x2+x3
+                return x1 + x2 + x3
+
         model = Model().eval().to(self.device)
         inp = torch.randn(512, 512).to(self.device)
         traced_model = torch.jit.trace(model, inp)
@@ -106,7 +107,7 @@ def forward(self, x):
             os.path.join(self.device_pdll_dir, qgemm_pdl_file)
         ]
         pdll_files = ",".join(pdll_files)
-        self._test_e2e(model, inp, pdll_files=pdll_files, enable_int8=True, diff_scale=3*model.output_scale)
+        self._test_e2e(model, inp, pdll_files=pdll_files, enable_int8=True, atol=3 * model.output_scale)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/pytorch_blade/tests/disc/testing_base.py b/pytorch_blade/tests/disc/testing_base.py
@@ -18,7 +18,6 @@
 from torch.testing import FileCheck
 from torch_blade import mlir, optimize, utils
 from torch_blade.clustering import support_fusion_group
-from torch_blade.clustering.support_fusion_group import min_group_nodes
 from torch_blade.config import Config
 from torch_blade.mlir import is_available
 from torch_blade.pass_manager import _optimize_common
@@ -153,7 +152,7 @@ def setUp(self):
     def _test_torchscipte_to_mhlo(
             self, module, expected_str, pdll_files=None,
             pdll_dirs=None, enable_int8=False, 
-            env_var = {},
+            env_var={},
     ):
         if pdll_files is not None:
             env_var["DISC_TORCH_PDL_FILES"] = pdll_files
@@ -225,22 +224,3 @@ def setUp(self):
         super().setUp()
         if self.device != torch.device('cuda'):
             self.skipTest("Quantization pdl test case only supports gpu platform")
-
-    def _test_e2e(
-            self, model, inp, pdll_files=None,
-            pdll_dirs=None, enable_int8=False,
-            diff_scale=1.0
-    ):
-        origin_output = model(inp)
-        cfg = Config.get_current_context_or_new()
-        cfg.optimization_pipeline = mlir.backend_name()
-        cfg.enable_int8 = enable_int8
-        env_var = {}
-        if pdll_files is not None:
-            env_var["DISC_TORCH_PDL_FILES"] = pdll_files
-        if pdll_dirs is not None:
-            env_var["DISC_TORCH_PDLL_INCLUDE_DIRS"] = pdll_dirs
-        with set_env(**env_var), cfg, min_group_nodes(1):
-            opt_model = optimize(model, True, inp)
-        now_output = opt_model(inp)
-        self.assertTrue(torch.allclose(now_output, origin_output, atol=1.0 * diff_scale))