Skip to content

Commit

Permalink
馃寠 Only_onnxruntime to false when opt_level > 1 (#599)
Browse files Browse the repository at this point in the history
## Describe your changes
1. Only_onnxruntime to false when opt_level > 1
2. set quantization calibration_method to minmax as the bug from ort
1.16.0 microsoft/onnxruntime#17619

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Format your code by running `pre-commit run --all-files`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.

## (Optional) Issue link
  • Loading branch information
trajepl committed Sep 26, 2023
1 parent e2e878b commit 66a4d7b
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 8 deletions.
4 changes: 3 additions & 1 deletion examples/bert/bert_ptq_cpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
"transformers_optimization": {
"type": "OrtTransformersOptimization",
"config": {
"float16": false
"float16": false,
"only_onnxruntime": false
}
},
"quantization": {
Expand All @@ -59,6 +60,7 @@
"quant_preprocess": true,
"per_channel": false,
"reduce_range": false,
"calibrate_method": "MinMax",
"data_config": "__input_model_data_config__"
}
},
Expand Down
6 changes: 5 additions & 1 deletion examples/bert/bert_ptq_cpu_aml.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,17 @@
"transformers_optimization": {
"type": "OrtTransformersOptimization",
"config": {
"float16": false
"float16": false,
"only_onnxruntime": false
}
},
"quantization": {
"type": "OnnxQuantization",
"config": {
"quant_preprocess": true,
"per_channel": false,
"reduce_range": false,
"calibrate_method": "MinMax",
"data_config": "__input_model_data_config__"
}
},
Expand Down
1 change: 1 addition & 0 deletions olive/passes/onnx/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@
description="""
Current calibration methods supported are MinMax and Entropy,
Please use CalibrationMethod.MinMax or CalibrationMethod.Entropy as options.
Percentile is not supported for onnxruntime==1.16.0, please avoid to set/search it.
""",
),
"quant_format": PassConfigParam(
Expand Down
17 changes: 14 additions & 3 deletions olive/passes/onnx/transformer_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from olive.passes import Pass
from olive.passes.onnx.common import get_external_data_config, model_proto_to_olive_model
from olive.passes.pass_config import PassConfigParam
from olive.strategy.search_parameter import Boolean, Categorical
from olive.strategy.search_parameter import Boolean, Categorical, Conditional

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,8 +62,19 @@ def _default_config(accelerator_spec: AcceleratorSpec) -> Dict[str, PassConfigPa
"only_onnxruntime": PassConfigParam(
type_=bool,
default_value=False,
searchable_values=Boolean(),
description="Whether only use onnxruntime to optimize model, and no python fusion.",
searchable_values=Conditional(
parents=("opt_level",),
support={
(2,): Categorical([False]),
(99,): Categorical([False]),
},
default=Boolean(),
),
description=(
"Whether only use onnxruntime to optimize model, and no python fusion."
" Disable some optimizers that might cause failure in symbolic shape inference or attention fusion,"
" when opt_level > 1."
),
),
"float16": PassConfigParam(
type_=bool, default_value=False, description="Whether half-precision float will be used."
Expand Down
7 changes: 4 additions & 3 deletions olive/strategy/search_parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,10 @@ def json_to_search_parameter(json: Dict[str, Any]) -> SearchParameter:
if search_parameter_type == "Categorical":
return Categorical(json["support"])
if search_parameter_type == "Conditional" or search_parameter_type == "ConditionalDefault":
stop_condition = lambda x: ( # noqa: E731
isinstance(x, dict) and x.get("olive_parameter_type") == "SearchParameter"
)

def stop_condition(x):
return isinstance(x, dict) and x.get("olive_parameter_type") == "SearchParameter"

support = flatten_dict(json["support"], stop_condition=stop_condition)
for key, value in support.items():
support[key] = json_to_search_parameter(value)
Expand Down

0 comments on commit 66a4d7b

Please sign in to comment.