🌊 Only_onnxruntime to false when opt_level > 1 (#599)

## Describe your changes 1. Only_onnxruntime to false when opt_level > 1 2. set quantization calibration_method to minmax as the bug from ort 1.16.0 microsoft/onnxruntime#17619 ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Format your code by running `pre-commit run --all-files` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link
microsoft · Sep 26, 2023 · 66a4d7b · 66a4d7b
1 parent e2e878b
commit 66a4d7b
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 8 deletions.
diff --git a/examples/bert/bert_ptq_cpu.json b/examples/bert/bert_ptq_cpu.json
@@ -50,7 +50,8 @@
         "transformers_optimization": {
             "type": "OrtTransformersOptimization",
             "config": {
-                "float16": false
+                "float16": false,
+                "only_onnxruntime": false
             }
         },
         "quantization": {
@@ -59,6 +60,7 @@
                 "quant_preprocess": true,
                 "per_channel": false,
                 "reduce_range": false,
+                "calibrate_method": "MinMax",
                 "data_config": "__input_model_data_config__"
             }
         },

diff --git a/examples/bert/bert_ptq_cpu_aml.json b/examples/bert/bert_ptq_cpu_aml.json
@@ -58,13 +58,17 @@
         "transformers_optimization": {
             "type": "OrtTransformersOptimization",
             "config": {
-                "float16": false
+                "float16": false,
+                "only_onnxruntime": false
             }
         },
         "quantization": {
             "type": "OnnxQuantization",
             "config": {
                 "quant_preprocess": true,
+                "per_channel": false,
+                "reduce_range": false,
+                "calibrate_method": "MinMax",
                 "data_config": "__input_model_data_config__"
             }
         },

diff --git a/olive/passes/onnx/quantization.py b/olive/passes/onnx/quantization.py
@@ -177,6 +177,7 @@
         description="""
             Current calibration methods supported are MinMax and Entropy,
             Please use CalibrationMethod.MinMax or CalibrationMethod.Entropy as options.
+            Percentile is not supported for onnxruntime==1.16.0, please avoid to set/search it.
         """,
     ),
     "quant_format": PassConfigParam(

diff --git a/olive/passes/onnx/transformer_optimization.py b/olive/passes/onnx/transformer_optimization.py
@@ -13,7 +13,7 @@
 from olive.passes import Pass
 from olive.passes.onnx.common import get_external_data_config, model_proto_to_olive_model
 from olive.passes.pass_config import PassConfigParam
-from olive.strategy.search_parameter import Boolean, Categorical
+from olive.strategy.search_parameter import Boolean, Categorical, Conditional
 
 logger = logging.getLogger(__name__)
 
@@ -62,8 +62,19 @@ def _default_config(accelerator_spec: AcceleratorSpec) -> Dict[str, PassConfigPa
             "only_onnxruntime": PassConfigParam(
                 type_=bool,
                 default_value=False,
-                searchable_values=Boolean(),
-                description="Whether only use onnxruntime to optimize model, and no python fusion.",
+                searchable_values=Conditional(
+                    parents=("opt_level",),
+                    support={
+                        (2,): Categorical([False]),
+                        (99,): Categorical([False]),
+                    },
+                    default=Boolean(),
+                ),
+                description=(
+                    "Whether only use onnxruntime to optimize model, and no python fusion."
+                    " Disable some optimizers that might cause failure in symbolic shape inference or attention fusion,"
+                    " when opt_level > 1."
+                ),
             ),
             "float16": PassConfigParam(
                 type_=bool, default_value=False, description="Whether half-precision float will be used."

diff --git a/olive/strategy/search_parameter.py b/olive/strategy/search_parameter.py
@@ -286,9 +286,10 @@ def json_to_search_parameter(json: Dict[str, Any]) -> SearchParameter:
     if search_parameter_type == "Categorical":
         return Categorical(json["support"])
     if search_parameter_type == "Conditional" or search_parameter_type == "ConditionalDefault":
-        stop_condition = lambda x: (  # noqa: E731
-            isinstance(x, dict) and x.get("olive_parameter_type") == "SearchParameter"
-        )
+
+        def stop_condition(x):
+            return isinstance(x, dict) and x.get("olive_parameter_type") == "SearchParameter"
+
         support = flatten_dict(json["support"], stop_condition=stop_condition)
         for key, value in support.items():
             support[key] = json_to_search_parameter(value)