diff --git a/examples/test/test_bert_cuda_gpu.py b/examples/test/test_bert_cuda_gpu.py index 5bc4156a1..e5e8a922e 100644 --- a/examples/test/test_bert_cuda_gpu.py +++ b/examples/test/test_bert_cuda_gpu.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from utils import check_search_output, patch_config +from utils import check_output, patch_config @pytest.fixture(scope="module", autouse=True) @@ -32,4 +32,4 @@ def test_bert(search_algorithm, execution_order, system, olive_json, enable_cuda olive_config["passes"]["perf_tuning"]["config"]["enable_cuda_graph"] = enable_cuda_graph footprint = olive_run(olive_config) - check_search_output(footprint) + check_output(footprint) diff --git a/examples/test/test_bert_ptq_cpu.py b/examples/test/test_bert_ptq_cpu.py index 963a66803..9349f1e6d 100644 --- a/examples/test/test_bert_ptq_cpu.py +++ b/examples/test/test_bert_ptq_cpu.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from utils import check_search_output, patch_config +from utils import check_output, patch_config @pytest.fixture(scope="module", autouse=True) @@ -30,4 +30,4 @@ def test_bert(search_algorithm, execution_order, system, olive_json): olive_config = patch_config(olive_json, search_algorithm, execution_order, system) footprint = olive_run(olive_config) - check_search_output(footprint) + check_output(footprint) diff --git a/examples/test/test_bert_ptq_cpu_aml.py b/examples/test/test_bert_ptq_cpu_aml.py index cce797f3f..507dbdec8 100644 --- a/examples/test/test_bert_ptq_cpu_aml.py +++ b/examples/test/test_bert_ptq_cpu_aml.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from utils import check_no_search_output, check_search_output, patch_config +from utils import check_output, patch_config @pytest.fixture(scope="module", autouse=True) @@ -38,4 +38,4 @@ def test_bert(olive_test_knob): olive_config = patch_config(*olive_test_knob) output = olive_run(olive_config) - check_no_search_output(output) if not olive_test_knob[1] else check_search_output(output) + check_output(output) diff --git a/examples/test/test_bert_ptq_cpu_docker.py b/examples/test/test_bert_ptq_cpu_docker.py index 22a422c59..fe82d3aaa 100644 --- a/examples/test/test_bert_ptq_cpu_docker.py +++ b/examples/test/test_bert_ptq_cpu_docker.py @@ -7,7 +7,7 @@ from pathlib import Path import pytest -from utils import check_search_output, patch_config +from utils import check_output, patch_config @pytest.fixture(scope="module", autouse=True) @@ -33,4 +33,4 @@ def test_bert(search_algorithm, execution_order, system, olive_json): olive_config = patch_config(olive_json, search_algorithm, execution_order, system) footprint = olive_run(olive_config) - check_search_output(footprint) + check_output(footprint) diff --git a/examples/test/test_cifar10_openvino_intel_hw.py b/examples/test/test_cifar10_openvino_intel_hw.py index f2d1c9c8d..17dfc23fa 100644 --- a/examples/test/test_cifar10_openvino_intel_hw.py +++ b/examples/test/test_cifar10_openvino_intel_hw.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from utils import check_search_output +from utils import check_output @pytest.fixture(scope="module", autouse=True) @@ -22,4 +22,4 @@ def test_cifar10(): import cifar10 metrics = cifar10.main() - check_search_output(metrics) + check_output(metrics) diff --git a/examples/test/test_resnet_ptq_cpu.py b/examples/test/test_resnet_ptq_cpu.py index a071a1943..6b05d39ee 100644 --- a/examples/test/test_resnet_ptq_cpu.py +++ b/examples/test/test_resnet_ptq_cpu.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from utils import check_search_output, patch_config +from utils import check_output, patch_config from olive.common.utils import retry_func, run_subprocess @@ -37,4 +37,4 @@ def test_resnet(search_algorithm, execution_order, system, olive_json): olive_config = patch_config(olive_json, search_algorithm, execution_order, system) footprint = olive_run(olive_config) - check_search_output(footprint) + check_output(footprint) diff --git a/examples/test/test_resnet_qat.py b/examples/test/test_resnet_qat.py index 01670b0b5..baec796be 100644 --- a/examples/test/test_resnet_qat.py +++ b/examples/test/test_resnet_qat.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from utils import check_search_output, patch_config +from utils import check_output, patch_config from olive.common.utils import retry_func, run_subprocess @@ -38,4 +38,4 @@ def test_resnet(search_algorithm, execution_order, system, olive_json): olive_config = patch_config(olive_json, search_algorithm, execution_order, system) footprint = olive_run(olive_config) - check_search_output(footprint) + check_output(footprint) diff --git a/examples/test/test_resnet_vitis_ai_ptq_cpu.py b/examples/test/test_resnet_vitis_ai_ptq_cpu.py index 64e32f715..488c9a0ed 100644 --- a/examples/test/test_resnet_vitis_ai_ptq_cpu.py +++ b/examples/test/test_resnet_vitis_ai_ptq_cpu.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from utils import check_search_output, patch_config +from utils import check_output, patch_config from olive.common.utils import retry_func, run_subprocess @@ -37,4 +37,4 @@ def test_resnet(search_algorithm, execution_order, system, olive_json): olive_config = patch_config(olive_json, search_algorithm, execution_order, system) footprint = olive_run(olive_config) - check_search_output(footprint) + check_output(footprint) diff --git a/examples/test/test_whisper.py b/examples/test/test_whisper.py index 9957fc7fd..c5c5fc42c 100644 --- a/examples/test/test_whisper.py +++ b/examples/test/test_whisper.py @@ -9,7 +9,7 @@ from pathlib import Path import pytest -from utils import check_no_search_output +from utils import check_output @pytest.fixture(scope="module", autouse=True) @@ -43,7 +43,7 @@ def test_whisper(device_precision): # test workflow result = olive_run(olive_config) - check_no_search_output(result) + check_output(result) # test transcription from test_transcription import main as test_transcription diff --git a/examples/test/utils.py b/examples/test/utils.py index 810d1b039..74e7de804 100644 --- a/examples/test/utils.py +++ b/examples/test/utils.py @@ -6,7 +6,7 @@ import os -def check_search_output(footprints): +def check_output(footprints): """Check if the search output is valid.""" assert footprints, "footprints is empty. The search must have failed for all accelerator specs." for footprint in footprints.values(): @@ -15,17 +15,6 @@ def check_search_output(footprints): assert all([metric_result.value > 0 for metric_result in v.metrics.value.values()]) -def check_no_search_output(outputs): - assert outputs, "outputs is empty. The run must have failed for all accelerator specs." - # k:v => accelerator_spec: pass_flow_output - for pass_flow_output in outputs.values(): - # k:v => pass_flow: output - for output in pass_flow_output.values(): - output_metrics = output["metrics"] - for item in output_metrics.values(): - assert item.value > 0 - - def patch_config(config_json_path: str, search_algorithm: str, execution_order: str, system: str, is_gpu: bool = False): """Load the config json file and patch it with the given search algorithm, execution order and system.""" with open(config_json_path, "r") as fin: diff --git a/olive/engine/engine.py b/olive/engine/engine.py index e93ddadad..0a78ad2a1 100644 --- a/olive/engine/engine.py +++ b/olive/engine/engine.py @@ -325,7 +325,6 @@ def run( output_dir.mkdir(parents=True, exist_ok=True) outputs = {} - pf_footprints = {} for accelerator_spec in self.accelerator_specs: with self.create_managed_environment(accelerator_spec): @@ -336,37 +335,21 @@ def run( if run_result is None: continue - if evaluate_input_model and not self.passes: - # for evaluate input model only, return the evaluation results - # TODO: need check whether the evaluation results are valid since it will only evaluate input model - # once and use the same evaluation results for all accelerators - outputs[accelerator_spec] = run_result - elif self.no_search: - output, model_ids = run_result - if output: - outputs[accelerator_spec] = output - pf_footprints[accelerator_spec] = self.footprints[accelerator_spec].get_footprints_by_model_ids( - model_ids - ) - else: - outputs[accelerator_spec] = run_result - pf_footprints[accelerator_spec] = run_result - - if not self.passes: - # no passes registered, return the evaluation results - return outputs + outputs[accelerator_spec] = run_result for accelerator_spec in self.footprints.keys(): logger.info(f"Run history for {accelerator_spec}:") run_history = self.footprints[accelerator_spec].summarize_run_history() self.dump_run_history(run_history, output_dir / f"run_history_{accelerator_spec}.txt") - if packaging_config: - logger.info(f"Package top ranked {sum([len(f.nodes) for f in pf_footprints.values()])} models as artifacts") + if packaging_config and self.passes: + # TODO: should we support package input model? + # TODO: do you support packaging pytorch models? + logger.info(f"Package top ranked {sum([len(f.nodes) for f in outputs.values()])} models as artifacts") generate_output_artifacts( packaging_config, self.footprints, - pf_footprints, + outputs, output_dir, ) else: @@ -493,13 +476,13 @@ def run_no_search( self.search_strategy.initialize(self.pass_flows_search_spaces, input_model_id, objective_dict) iter_num = 0 - flows_output = {} - output_model_ids = [] + output_models = {} while True: iter_num += 1 # get the next step next_step = self.search_strategy.next_step() + if iter_num == 1: assert next_step is not None, "Search strategy returned None for the first step" # if no more steps, break @@ -518,16 +501,15 @@ def run_no_search( logger.debug(f"Step no search with search point {next_step['search_point']} ...") # run all the passes in the step - ( - should_prune, - signal, - model_ids, - ) = self._run_passes(next_step["passes"], model_config, model_id, data_root, accelerator_spec) - pass_flow = self.pass_flows[iter_num - 1] + should_prune, signal, model_ids = self._run_passes( + next_step["passes"], model_config, model_id, data_root, accelerator_spec + ) + pass_flow = self.pass_flows[iter_num - 1] if should_prune: failed_pass = pass_flow[len(model_ids)] logger.warning(f"Flow {pass_flow} is pruned due to failed or invalid config for pass '{failed_pass}'") + continue # names of the output models of the passes pass_output_names = [self.passes[pass_name]["output_name"] for pass_name, _ in next_step["passes"]] @@ -547,7 +529,6 @@ def run_no_search( pass_output_names[-1] = final_output_name output_model_json = None - output = {} for pass_output_name, pass_output_model_id in zip(pass_output_names, model_ids): if not pass_output_name: continue @@ -558,7 +539,7 @@ def run_no_search( overwrite=True, cache_dir=self._config.cache_dir, ) - output_model_ids.append(pass_output_model_id) + output_models[pass_output_model_id] = output_model_json # save the evaluation results to output_dir if signal is not None: @@ -566,15 +547,13 @@ def run_no_search( with open(results_path, "w") as f: json.dump(signal.to_json(), f, indent=4) - if output_model_json and not should_prune: - # output_model_json is the last model only if the flow is not pruned - output["model"] = output_model_json - if signal is not None: - output["metrics"] = signal - else: - output = None - flows_output[tuple(pass_flow)] = output - return flows_output, output_model_ids + output_model_ids = list(output_models.keys()) + fp_outputs = self.footprints[accelerator_spec].create_footprints_by_model_ids(output_model_ids) + # update the output model config + for model_id, model_config in output_models.items(): + fp_outputs.nodes[model_id].model_config = model_config + + return fp_outputs def run_search( self, @@ -640,14 +619,14 @@ def run_search( self.footprints[accelerator_spec].to_file(output_dir / f"{prefix_output_name}footprints.json") - return self.get_pareto_frontier_footprints( + return self.create_pareto_frontier_footprints( accelerator_spec, output_model_num, objective_dict, output_dir, prefix_output_name ) - def get_pareto_frontier_footprints( + def create_pareto_frontier_footprints( self, accelerator_spec, output_model_num, objective_dict, output_dir, prefix_output_name ): - pf_footprints = self.footprints[accelerator_spec].get_pareto_frontier() + pf_footprints = self.footprints[accelerator_spec].create_pareto_frontier() if output_model_num is None or len(pf_footprints.nodes) <= output_model_num: logger.info(f"Output all {len(pf_footprints.nodes)} models") else: @@ -959,7 +938,7 @@ def _run_passes( ) if model_config in PRUNED_CONFIGS: should_prune = True - logger.debug("Pruned") + logger.debug(f"Pruned for pass {pass_id}") break model_ids.append(model_id) diff --git a/olive/engine/footprint.py b/olive/engine/footprint.py index 98fb046af..43b26ae48 100644 --- a/olive/engine/footprint.py +++ b/olive/engine/footprint.py @@ -5,6 +5,7 @@ import logging from collections import OrderedDict, defaultdict, namedtuple +from copy import deepcopy from typing import DefaultDict, Dict from olive.common.config_utils import ConfigBase, config_json_dumps, config_json_loads @@ -153,20 +154,22 @@ def mark_pareto_frontier(self): self.nodes[k].is_pareto_frontier = cmp_flag self.is_marked_pareto_frontier = True - def get_footprints_by_model_ids(self, model_ids): + def create_footprints_by_model_ids(self, model_ids): nodes = OrderedDict() for model_id in model_ids: - nodes[model_id] = self.nodes[model_id] - return Footprint(nodes=nodes, objective_dict=self.objective_dict, is_marked_pareto_frontier=True) + nodes[model_id] = deepcopy(self.nodes[model_id]) + return Footprint(nodes=nodes, objective_dict=deepcopy(self.objective_dict)) - def get_pareto_frontier(self): + def create_pareto_frontier(self): self.mark_pareto_frontier() rls = {k: v for k, v in self.nodes.items() if v.is_pareto_frontier} for _, v in rls.items(): logger.info(f"pareto frontier points: {v.model_id} \n{v.metrics.value}") # restructure the pareto frontier points to instance of Footprints node for further analysis - return Footprint(nodes=rls, objective_dict=self.objective_dict, is_marked_pareto_frontier=True) + return Footprint( + nodes=deepcopy(rls), objective_dict=deepcopy(self.objective_dict), is_marked_pareto_frontier=True + ) def update_nodes(self, nodes): node_dict = OrderedDict() diff --git a/test/multiple_ep/test_aml_system.py b/test/multiple_ep/test_aml_system.py index e98e6f368..eb1282cbb 100644 --- a/test/multiple_ep/test_aml_system.py +++ b/test/multiple_ep/test_aml_system.py @@ -11,7 +11,7 @@ from olive.engine import Engine from olive.evaluator.olive_evaluator import OliveEvaluatorConfig from olive.hardware import Device -from olive.hardware.accelerator import AcceleratorSpec +from olive.hardware.accelerator import DEFAULT_CPU_ACCELERATOR, AcceleratorSpec from olive.model import ModelConfig from olive.passes.onnx import OrtPerfTuning @@ -58,9 +58,11 @@ def test_run_pass_evaluate(self): engine = Engine(options, target=self.system, host=self.system, evaluator_config=evaluator_config) engine.register(OrtPerfTuning) output = engine.run(self.input_model_config, output_dir=output_dir) - cpu_res = output[AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="CPUExecutionProvider")] - openvino_res = output[ - AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="OpenVINOExecutionProvider") - ] - assert cpu_res[tuple(engine.pass_flows[0])]["metrics"]["latency-avg"] - assert openvino_res[tuple(engine.pass_flows[0])]["metrics"]["latency-avg"] + cpu_res = list(output[DEFAULT_CPU_ACCELERATOR].nodes.values())[0] + openvino_res = list( + output[ + AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="OpenVINOExecutionProvider") + ].nodes.values() + )[0] + assert cpu_res.metrics.value.__root__ + assert openvino_res.metrics.value.__root__ diff --git a/test/multiple_ep/test_docker_system.py b/test/multiple_ep/test_docker_system.py index 99a0e1e60..9fc5aaa9e 100644 --- a/test/multiple_ep/test_docker_system.py +++ b/test/multiple_ep/test_docker_system.py @@ -10,7 +10,7 @@ from olive.engine import Engine from olive.evaluator.olive_evaluator import OliveEvaluatorConfig from olive.hardware import Device -from olive.hardware.accelerator import AcceleratorSpec +from olive.hardware.accelerator import DEFAULT_CPU_ACCELERATOR, AcceleratorSpec from olive.model import ModelConfig from olive.passes.onnx import OrtPerfTuning @@ -44,9 +44,11 @@ def test_run_pass_evaluate(self): engine = Engine(options, target=self.system, evaluator_config=evaluator_config) engine.register(OrtPerfTuning) output = engine.run(self.input_model_config, output_dir=output_dir) - cpu_res = output[AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="CPUExecutionProvider")] - openvino_res = output[ - AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="OpenVINOExecutionProvider") - ] - assert cpu_res[tuple(engine.pass_flows[0])]["metrics"]["latency-avg"] - assert openvino_res[tuple(engine.pass_flows[0])]["metrics"]["latency-avg"] + cpu_res = list(output[DEFAULT_CPU_ACCELERATOR].nodes.values())[0] + openvino_res = list( + output[ + AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="OpenVINOExecutionProvider") + ].nodes.values() + )[0] + assert cpu_res.metrics.value.__root__ + assert openvino_res.metrics.value.__root__ diff --git a/test/multiple_ep/test_python_env_system.py b/test/multiple_ep/test_python_env_system.py index 6e8a35371..bc3272ff2 100644 --- a/test/multiple_ep/test_python_env_system.py +++ b/test/multiple_ep/test_python_env_system.py @@ -12,7 +12,7 @@ from olive.evaluator.metric import LatencySubType from olive.evaluator.olive_evaluator import OliveEvaluatorConfig from olive.hardware import Device -from olive.hardware.accelerator import AcceleratorSpec +from olive.hardware.accelerator import DEFAULT_CPU_ACCELERATOR, AcceleratorSpec from olive.passes.onnx import OrtPerfTuning from olive.systems.python_environment import PythonEnvironmentSystem @@ -64,9 +64,11 @@ def test_run_pass_evaluate_linux(self): engine = Engine(options, target=self.system, host=self.system, evaluator_config=evaluator_config) engine.register(OrtPerfTuning) output = engine.run(self.input_model_config, output_dir=output_dir, evaluate_input_model=True) - cpu_res = output[AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="CPUExecutionProvider")] - openvino_res = output[ - AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="OpenVINOExecutionProvider") - ] - assert cpu_res[tuple(engine.pass_flows[0])]["metrics"]["latency-avg"] - assert openvino_res[tuple(engine.pass_flows[0])]["metrics"]["latency-avg"] + cpu_res = list(output[DEFAULT_CPU_ACCELERATOR].nodes.values())[0] + openvino_res = list( + output[ + AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="OpenVINOExecutionProvider") + ].nodes.values() + )[0] + assert cpu_res.metrics.value.__root__ + assert openvino_res.metrics.value.__root__ diff --git a/test/unit_test/engine/test_engine.py b/test/unit_test/engine/test_engine.py index e9af180f8..516c52ea9 100644 --- a/test/unit_test/engine/test_engine.py +++ b/test/unit_test/engine/test_engine.py @@ -213,19 +213,20 @@ def test_run_no_search(self, mock_local_system, tmpdir): ) # execute - actual_res = engine.run(model_config, output_dir=output_dir) - actual_res = actual_res[accelerator_spec][tuple(engine.pass_flows[0])] + _actual_res = engine.run(model_config, output_dir=output_dir) + actual_res = list(_actual_res[accelerator_spec].nodes.values())[0] - assert expected_res == actual_res - assert Path(actual_res["model"]["config"]["model_path"]).is_file() + assert expected_res["model"] == actual_res.model_config + assert expected_res["metrics"] == actual_res.metrics.value + assert Path(actual_res.model_config["config"]["model_path"]).is_file() model_json_path = Path(expected_output_dir / f"{output_prefix}_model.json") assert model_json_path.is_file() with open(model_json_path, "r") as f: - assert json.load(f) == actual_res["model"] + assert json.load(f) == actual_res.model_config result_json_path = Path(expected_output_dir / f"{output_prefix}_metrics.json") assert result_json_path.is_file() with open(result_json_path, "r") as f: - assert json.load(f) == actual_res["metrics"].__root__ + assert json.load(f) == actual_res.metrics.value.__root__ def test_pass_exception(self, caplog, tmpdir): # Need explicitly set the propagate to allow the message to be logged into caplog @@ -297,7 +298,7 @@ def test_run_evaluate_input_model(self, mock_local_system, tmpdir): # execute actual_res = engine.run(model_config, output_dir=output_dir, evaluate_input_model=True) accelerator_spec = DEFAULT_CPU_ACCELERATOR - actual_res = actual_res[accelerator_spec][tuple(engine.pass_flows[0])]["metrics"] + actual_res = list(actual_res[accelerator_spec].nodes.values())[0].metrics.value assert expected_res == actual_res result_json_path = Path(output_dir / f"{accelerator_spec}_input_model_metrics.json") @@ -555,7 +556,4 @@ def test_pass_quantization_error(self, is_search, caplog, tmpdir): actual_res = engine.run( onnx_model_config, data_root=None, output_dir=output_dir, evaluate_input_model=False ) - for pass_flow in engine.pass_flows: - assert not actual_res[DEFAULT_CPU_ACCELERATOR][ - tuple(pass_flow) - ], "Expect empty dict when quantization fails" + assert not actual_res[DEFAULT_CPU_ACCELERATOR].nodes, "Expect empty dict when quantization fails" diff --git a/test/unit_test/engine/test_footprint.py b/test/unit_test/engine/test_footprint.py index cb51cc26b..1f012bcb6 100644 --- a/test/unit_test/engine/test_footprint.py +++ b/test/unit_test/engine/test_footprint.py @@ -18,6 +18,14 @@ def setup(self): self.fp = Footprint.from_file(self.footprint_file) self.input_node = {k: v for k, v in self.fp.nodes.items() if v.parent_model_id is None} + def test_create_from_model_ids(self): + new_fp = self.fp.create_footprints_by_model_ids(self.fp.nodes.keys()) + assert len(new_fp.nodes) == len(self.fp.nodes) + assert new_fp.nodes == self.fp.nodes + assert new_fp.nodes is not self.fp.nodes + assert new_fp.objective_dict == self.fp.objective_dict + assert new_fp.objective_dict is not self.fp.objective_dict + def test_file_dump(self): with tempfile.TemporaryDirectory() as tempdir: self.fp.to_file(Path(tempdir) / "footprint.json") @@ -30,7 +38,7 @@ def test_json_dump(self): assert len(fp2.nodes) == 3 def test_pareto_frontier(self): - pareto_frontier_fp = self.fp.get_pareto_frontier() + pareto_frontier_fp = self.fp.create_pareto_frontier() assert isinstance(pareto_frontier_fp, Footprint) assert len(pareto_frontier_fp.nodes) == 2 assert all([v.is_pareto_frontier for v in pareto_frontier_fp.nodes.values()])