diff --git a/docs/Usage.md b/docs/Usage.md index 5b3c383f440..8c42d0669ec 100644 --- a/docs/Usage.md +++ b/docs/Usage.md @@ -85,36 +85,54 @@ Important points you should consider when training your networks with compressio - Turn off the `Dropout` layers (and similar ones like `DropConnect`) when training a network with quantization or sparsity - It is better to turn off additional regularization in the loss function (for example, L2 regularization via `weight_decay`) when training the network with RB sparsity, since it already imposes an L0 regularization term. -#### Step 4 (optional): Export the compressed model to ONNX -After the compressed model has been fine-tuned to acceptable accuracy and compression stages, you can export it to ONNX format. -Since export process is in general algorithm-specific, you have to call the compression controller's `export_model` method to properly export the model with compression specifics into ONNX: -```python -compression_ctrl.export_model("./compressed_model.onnx") -``` -The exported ONNX file may contain special, non-ONNX-standard operations and layers to leverage full compressed/low-precision potential of the OpenVINO toolkit. -In some cases it is possible to export a compressed model with ONNX standard operations only (so that it can be run using `onnxruntime`, for example) - this is the case for the 8-bit symmetric quantization and sparsity/filter pruning algorithms. -Refer to [compression algorithm documentation](./compression_algorithms) for details. +#### Step 4: Export the compressed model +After the compressed model has been fine-tuned to acceptable accuracy and compression stages, you can export it. There are two ways to export a model: + +1. Call the compression controller's `export_model` method to properly export the model with compression specifics into ONNX. + + ```python + compression_ctrl.export_model("./compressed_model.onnx") + ``` + The exported ONNX file may contain special, non-ONNX-standard operations and layers to leverage full compressed/low-precision potential of the OpenVINO toolkit. + In some cases it is possible to export a compressed model with ONNX standard operations only (so that it can be run using `onnxruntime`, for example) - this is the case for the 8-bit symmetric quantization and sparsity/filter pruning algorithms. + Refer to [compression algorithm documentation](./compression_algorithms) for details. + Also, this method is limited to the supported formats for export. + +2. Call the compression controller's `prepare_for_inference` method, to properly get the model without NNCF specific + nodes for training compressed model, after that you can trace the model via inference in framework operations. + It gives more flexibility to deploy model after optimization. As well as this method also allows you to connect + third-party inference solutions, like OpenVINO. + + ```python + inference_model = compression_ctrl.prepare_for_inference() + # To ONNX format + import torch + torch.onnx.export(inference_model, dummy_input, './compressed_model.onnx') + # To OpenVINO format + from openvino.tools import mo + ov_model = mo.convert_model(inference_model, example_input=example_input) + ``` ## Saving and loading compressed models The complete information about compression is defined by a compressed model and a compression state. -The model characterizes the weights and topology of the network. The compression state - how to restore the setting of +The model characterizes the weights and topology of the network. The compression state - how to restore the setting of compression layers in the model and how to restore the compression schedule and the compression loss. -The latter can be obtained by `compression_ctrl.get_compression_state()` on saving and passed to the -`create_compressed_model` helper function by the optional `compression_state` argument on loading. +The latter can be obtained by `compression_ctrl.get_compression_state()` on saving and passed to the +`create_compressed_model` helper function by the optional `compression_state` argument on loading. The compressed model should be loaded once it's created. -Saving and loading of the compressed model and compression state is framework-specific and can be done in an arbitrary +Saving and loading of the compressed model and compression state is framework-specific and can be done in an arbitrary way. NNCF provides one possible way of doing it with helper functions in samples. To save the best compressed checkpoint use `compression_ctrl.compression_stage()` to distinguish between 3 possible -levels of compression: `UNCOMPRESSED`, `PARTIALLY_COMPRESSED` and `FULLY_COMPRESSED`. It is useful in case of `staged` -compression. Model may achieve the best accuracy on earlier stages of compression - tuning without compression or with -intermediate compression rate, but still fully compressed model with lower accuracy should be considered as the best -compressed one. `UNCOMPRESSED` means that no compression is applied for the model, for instance, in case of stage -quantization - when all quantization are disabled, or in case of sparsity - when current sparsity rate is zero. +levels of compression: `UNCOMPRESSED`, `PARTIALLY_COMPRESSED` and `FULLY_COMPRESSED`. It is useful in case of `staged` +compression. Model may achieve the best accuracy on earlier stages of compression - tuning without compression or with +intermediate compression rate, but still fully compressed model with lower accuracy should be considered as the best +compressed one. `UNCOMPRESSED` means that no compression is applied for the model, for instance, in case of stage +quantization - when all quantization are disabled, or in case of sparsity - when current sparsity rate is zero. `PARTIALLY_COMPRESSED` stands for the compressed model which haven't reached final compression ratio yet, e.g. magnitude -sparsity algorithm has learnt masking of 30% weights out of 51% of target rate. The controller returns -`FULLY_COMPRESSED` compression stage when it finished scheduling and tuning hyper parameters of the compression +sparsity algorithm has learnt masking of 30% weights out of 51% of target rate. The controller returns +`FULLY_COMPRESSED` compression stage when it finished scheduling and tuning hyper parameters of the compression algorithm, for example when rb-sparsity method sets final target sparsity rate for the loss. ### Saving and loading compressed models in TensorFlow @@ -147,8 +165,8 @@ checkpoint = tf.train.Checkpoint(model=compress_model, checkpoint.restore(path_to_checkpoint) ``` -Since the compression state is a dictionary of Python JSON-serializable objects, we convert it to JSON -string within `tf.train.Checkpoint`. There are 2 helper classes: `TFCompressionState` - for saving compression state and +Since the compression state is a dictionary of Python JSON-serializable objects, we convert it to JSON +string within `tf.train.Checkpoint`. There are 2 helper classes: `TFCompressionState` - for saving compression state and `TFCompressionStateLoader` - for loading. ### Saving and loading compressed models in PyTorch @@ -167,7 +185,7 @@ torch.save(checkpoint, path) # load part resuming_checkpoint = torch.load(path) -state_dict = resuming_checkpoint['state_dict'] +state_dict = resuming_checkpoint['state_dict'] compression_ctrl, compressed_model = create_compressed_model(model, nncf_config, resuming_state_dict=state_dict) compression_ctrl.scheduler.load_state(resuming_checkpoint['scheduler_state']) ``` @@ -185,39 +203,39 @@ torch.save(checkpoint, path) # load part resuming_checkpoint = torch.load(path) -compression_state = resuming_checkpoint['compression_state'] +compression_state = resuming_checkpoint['compression_state'] compression_ctrl, compressed_model = create_compressed_model(model, nncf_config, compression_state=compression_state) -state_dict = resuming_checkpoint['state_dict'] +state_dict = resuming_checkpoint['state_dict'] # load model in a preferable way - load_state(compressed_model, state_dict, is_resume=True) - # or when execution mode on loading is the same as on saving: - # save and load in a single GPU mode or save and load in the (Distributed)DataParallel one, not in a mixed way + load_state(compressed_model, state_dict, is_resume=True) + # or when execution mode on loading is the same as on saving: + # save and load in a single GPU mode or save and load in the (Distributed)DataParallel one, not in a mixed way compressed_model.load_state_dict(state_dict) ``` -You can save the `compressed_model` object `torch.save` as usual: via `state_dict` and `load_state_dict` methods. -Alternatively, you can use the `nncf.load_state` function on loading. It will attempt to load a PyTorch state dict into -a model by first stripping the irrelevant prefixes, such as `module.` or `nncf_module.`, from both the checkpoint and +You can save the `compressed_model` object `torch.save` as usual: via `state_dict` and `load_state_dict` methods. +Alternatively, you can use the `nncf.load_state` function on loading. It will attempt to load a PyTorch state dict into +a model by first stripping the irrelevant prefixes, such as `module.` or `nncf_module.`, from both the checkpoint and the model layer identifiers, and then do the matching between the layers. -Depending on the value of the `is_resume` argument, it will then fail if an exact match could not be made -(when `is_resume == True`), or load the matching layer parameters and print a warning listing the mismatches -(when `is_resume == False`). `is_resume=False` is most commonly used if you want to load the starting weights from an -uncompressed model into a compressed model and `is_resume=True` is used when you want to evaluate a compressed +Depending on the value of the `is_resume` argument, it will then fail if an exact match could not be made +(when `is_resume == True`), or load the matching layer parameters and print a warning listing the mismatches +(when `is_resume == False`). `is_resume=False` is most commonly used if you want to load the starting weights from an +uncompressed model into a compressed model and `is_resume=True` is used when you want to evaluate a compressed checkpoint or resume compressed checkpoint training without changing the compression algorithm parameters. The compression state can be directly pickled by `torch.save` as well, since it is a dictionary of Python objects. -In the previous releases of the NNCF, model can be loaded without compression state information -by saving the model state dictionary `compressed_model.state_dict` and loading it via `nncf.load_state` and -`compressed_model.load_state_dict` methods or using optional `resuming_state_dict` argument of the +In the previous releases of the NNCF, model can be loaded without compression state information +by saving the model state dictionary `compressed_model.state_dict` and loading it via `nncf.load_state` and +`compressed_model.load_state_dict` methods or using optional `resuming_state_dict` argument of the `create_compressed_model`. This way of loading is deprecated, and we highly recommend to not use this way as it does not guarantee the exact loading -of compression model state for algorithms with sophisticated initialization - e.g. HAWQ and AutoQ. +of compression model state for algorithms with sophisticated initialization - e.g. HAWQ and AutoQ. Also in this case, keep in mind that in order to load the resulting checkpoint file the `compressed_model` object should have the same structure with regard to PyTorch module and parameters as it was when the checkpoint was saved. -In practice this means that you should use the same compression algorithms (i.e. the same NNCF configuration file) when -loading a compressed model checkpoint. +In practice this means that you should use the same compression algorithms (i.e. the same NNCF configuration file) when +loading a compressed model checkpoint. ## Exploring the compressed model @@ -271,8 +289,8 @@ from nncf.common.accuracy_aware_training import create_accuracy_aware_training_l training_loop = create_accuracy_aware_training_loop(nncf_config, compression_ctrl) ``` -In order to properly instantiate the accuracy-aware training loop, the user has to specify the 'accuracy_aware_training' section. -This section fully depends on what Accuracy-Aware Training loop is being used. +In order to properly instantiate the accuracy-aware training loop, the user has to specify the 'accuracy_aware_training' section. +This section fully depends on what Accuracy-Aware Training loop is being used. For more details about config of Adaptive Compression Level Training refer to [Adaptive Compression Level Training documentation](./accuracy_aware_model_training/AdaptiveCompressionTraining.md) and Early Exit Training refer to [Early Exit Training documentation](./accuracy_aware_model_training/EarlyExitTraining.md). The training loop is launched by calling its `run` method. Before the start of the training loop, the user is expected to define several functions related to the training of the model and pass them as arguments to the `run` method of the training loop instance: @@ -318,9 +336,9 @@ def configure_optimizers_fn(): def dump_checkpoint_fn(model, compression_controller, accuracy_aware_runner, save_dir): ''' An (optional) function that allows a user to define how to save the model's checkpoint. - Training loop will call this function instead own dump_checkpoint function and pass + Training loop will call this function instead own dump_checkpoint function and pass `model`, `compression_controller`, `accuracy_aware_runner` and `save_dir` to it as arguments. - The user can save the states of the objects according to their own needs. + The user can save the states of the objects according to their own needs. `save_dir` is a directory that Accuracy-Aware pipeline created to store log information. ''' ``` @@ -334,4 +352,4 @@ model = training_loop.run(model, configure_optimizers_fn=configure_optimizers_fn, dump_checkpoint_fn=dump_checkpoint_fn) ``` -The above call executes the acccuracy-aware training loop and return the compressed model. For more details on how to use the accuracy-aware training loop functionality of NNCF, please refer to its [documentation](./accuracy_aware_model_training/AdaptiveCompressionTraining.md). +The above call executes the accuracy-aware training loop and return the compressed model. For more details on how to use the accuracy-aware training loop functionality of NNCF, please refer to its [documentation](./accuracy_aware_model_training/AdaptiveCompressionTraining.md). diff --git a/examples/tensorflow/classification/README.md b/examples/tensorflow/classification/README.md index 06866c6a463..3d078c5b20e 100644 --- a/examples/tensorflow/classification/README.md +++ b/examples/tensorflow/classification/README.md @@ -11,7 +11,7 @@ This sample demonstrates a DL model compression in case of the Image Classificat ## Installation -At this point it is assumed that you have already installed nncf. You can find information on downloading nncf [here](https://github.com/openvinotoolkit/nncf#user-content-installation). +At this point it is assumed that you have already installed nncf. You can find information on downloading nncf [here](https://github.com/openvinotoolkit/nncf#user-content-installation). To work with the sample you should install the corresponding Python package dependencies: @@ -55,7 +55,7 @@ The ImageNet dataset in TFRecords format should be specified in the configuratio #### Test Pretrained Model -Before compressing a model, it is highly recommended checking the accuracy of the pretrained model. All models which are supported in the sample has pretrained weights for ImageNet. +Before compressing a model, it is highly recommended checking the accuracy of the pretrained model. All models which are supported in the sample has pretrained weights for ImageNet. To load pretrained weights into a model and then evaluate the accuracy of that model, make sure that the pretrained=True option is set in the configuration file and use the following command: ```bash @@ -63,7 +63,7 @@ python main.py \ --mode=test \ --config=configs/quantization/mobilenet_v2_imagenet_int8.json \ --data= \ ---disable-compression +--disable-compression ``` #### Compress Pretrained Model diff --git a/examples/tensorflow/classification/main.py b/examples/tensorflow/classification/main.py index 12b03de8706..49870b0fd77 100644 --- a/examples/tensorflow/classification/main.py +++ b/examples/tensorflow/classification/main.py @@ -11,42 +11,42 @@ limitations under the License. """ -import sys import os.path as osp +import sys from pathlib import Path import tensorflow as tf import tensorflow_addons as tfa -from examples.tensorflow.common.experimental_patcher import patch_if_experimental_quantization -from nncf.config.utils import is_accuracy_aware_training -from nncf.tensorflow.helpers.model_creation import create_compressed_model -from nncf.tensorflow import create_compression_callbacks -from nncf.tensorflow.helpers.model_manager import TFModelManager -from nncf.tensorflow.initialization import register_default_init_args -from nncf.tensorflow.utils.state import TFCompressionState -from nncf.tensorflow.utils.state import TFCompressionStateLoader - +from examples.common.sample_config import create_sample_config from examples.tensorflow.classification.datasets.builder import DatasetBuilder from examples.tensorflow.common.argparser import get_common_argument_parser from examples.tensorflow.common.callbacks import get_callbacks from examples.tensorflow.common.callbacks import get_progress_bar from examples.tensorflow.common.distributed import get_distribution_strategy +from examples.tensorflow.common.experimental_patcher import patch_if_experimental_quantization +from examples.tensorflow.common.export import export_model from examples.tensorflow.common.logger import logger from examples.tensorflow.common.model_loader import get_model from examples.tensorflow.common.optimizer import build_optimizer -from examples.common.sample_config import create_sample_config from examples.tensorflow.common.scheduler import build_scheduler +from examples.tensorflow.common.utils import SummaryWriter +from examples.tensorflow.common.utils import close_strategy_threadpool from examples.tensorflow.common.utils import configure_paths from examples.tensorflow.common.utils import create_code_snapshot from examples.tensorflow.common.utils import get_saving_parameters from examples.tensorflow.common.utils import print_args -from examples.tensorflow.common.utils import serialize_config from examples.tensorflow.common.utils import serialize_cli_args -from examples.tensorflow.common.utils import write_metrics -from examples.tensorflow.common.utils import SummaryWriter -from examples.tensorflow.common.utils import close_strategy_threadpool +from examples.tensorflow.common.utils import serialize_config from examples.tensorflow.common.utils import set_seed +from examples.tensorflow.common.utils import write_metrics +from nncf.config.utils import is_accuracy_aware_training +from nncf.tensorflow import create_compression_callbacks +from nncf.tensorflow.helpers.model_creation import create_compressed_model +from nncf.tensorflow.helpers.model_manager import TFModelManager +from nncf.tensorflow.initialization import register_default_init_args +from nncf.tensorflow.utils.state import TFCompressionState +from nncf.tensorflow.utils.state import TFCompressionStateLoader def get_argument_parser(): @@ -288,7 +288,9 @@ def run(config): logger.info('evaluation...') statistics = compression_ctrl.statistics() logger.info(statistics.to_str()) - results = compress_model.evaluate( + eval_model = compress_model + + results = eval_model.evaluate( validation_dataset, steps=validation_steps, callbacks=[get_progress_bar( @@ -300,7 +302,7 @@ def run(config): if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) - compression_ctrl.export_model(save_path, save_format) + export_model(compression_ctrl.prepare_for_inference(), save_path, save_format) logger.info('Saved to {}'.format(save_path)) close_strategy_threadpool(strategy) @@ -338,7 +340,7 @@ def export(config): ckpt_path=config.ckpt_path) save_path, save_format = get_saving_parameters(config) - compression_ctrl.export_model(save_path, save_format) + export_model(compression_ctrl.prepare_for_inference(), save_path, save_format) logger.info('Saved to {}'.format(save_path)) diff --git a/examples/tensorflow/common/argparser.py b/examples/tensorflow/common/argparser.py index b247878ffec..88e797f6f00 100644 --- a/examples/tensorflow/common/argparser.py +++ b/examples/tensorflow/common/argparser.py @@ -229,7 +229,7 @@ def get_common_argument_parser(**flags): parser.add_argument( '--disable_tensor_float_32_execution', - help="Disable exection in TensorFloat-32", + help="Disable execution in TensorFloat-32", action="store_true" ) diff --git a/examples/tensorflow/common/export.py b/examples/tensorflow/common/export.py new file mode 100644 index 00000000000..c8d951d3dc8 --- /dev/null +++ b/examples/tensorflow/common/export.py @@ -0,0 +1,41 @@ +""" + Copyright (c) 2023 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import os.path as osp + +import tensorflow as tf +from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 + +from examples.tensorflow.common.utils import FROZEN_GRAPH_FORMAT + + +def export_model(model: tf.keras.Model, save_path: str, save_format: str) -> None: + """ + Export compressed model. Supported types 'tf', 'h5', 'frozen_graph'. + + :param model: Target model. + :param save_path: Path to save. + :param save_format: Model format used to save model. + """ + + if save_format == FROZEN_GRAPH_FORMAT: + input_signature = [] + for item in model.inputs: + input_signature.append(tf.TensorSpec(item.shape, item.dtype, item.name)) + concrete_function = tf.function(model).get_concrete_function(input_signature) + frozen_func = convert_variables_to_constants_v2(concrete_function, lower_control_flow=False) + frozen_graph = frozen_func.graph.as_graph_def(add_shapes=True) + + save_dir, name = osp.split(save_path) + tf.io.write_graph(frozen_graph, save_dir, name, as_text=False) + else: + model.save(save_path, save_format=save_format) diff --git a/examples/tensorflow/object_detection/README.md b/examples/tensorflow/object_detection/README.md index 7c357ce37d0..2252f977145 100644 --- a/examples/tensorflow/object_detection/README.md +++ b/examples/tensorflow/object_detection/README.md @@ -14,7 +14,7 @@ The sample receives a configuration file where the training schedule, hyper-para ## Installation -At this point it is assumed that you have already installed nncf. You can find information on downloading nncf [here](https://github.com/openvinotoolkit/nncf#user-content-installation). +At this point it is assumed that you have already installed nncf. You can find information on downloading nncf [here](https://github.com/openvinotoolkit/nncf#user-content-installation). To work with the sample you should install the corresponding Python package dependencies: @@ -66,18 +66,18 @@ The [COCO2017](https://cocodataset.org/) dataset in TFRecords format should be s - If you did not install the package, add the repository root folder to the `PYTHONPATH` environment variable. - Go to the `examples/tensorflow/object_detection` folder. -- Download the pre-trained weights in H5 format and provide the path to them using `--weights` flag. The link to the -archive with pre-trained weights can be found in the `TensorFlow checkpoint` column of the [results](#results) table. -Select the checkpoint corresponding to the `None` compression algorithm, which includes the pre-trained weights for the +- Download the pre-trained weights in H5 format and provide the path to them using `--weights` flag. The link to the +archive with pre-trained weights can be found in the `TensorFlow checkpoint` column of the [results](#results) table. +Select the checkpoint corresponding to the `None` compression algorithm, which includes the pre-trained weights for the FP32 model, without applying any compression algorithms. -- (Optional) Before compressing a model, it is highly recommended checking the accuracy of the pretrained model, use the following command: +- (Optional) Before compressing a model, it is highly recommended checking the accuracy of the pretrained model, use the following command: ```bash python main.py \ --mode=test \ --config=configs/quantization/retinanet_coco_int8.json \ --weights= --data= \ - --disable-compression + --disable-compression ``` - Run the following command to start compression with fine-tuning on all available GPUs on the machine: ```bash @@ -177,4 +177,3 @@ To export a model to the OpenVINO IR and run it using the IntelĀ® Deep Learning |RetinaNet|None|COCO 2017|33.43|[retinanet_coco.json](configs/retinanet_coco.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/tensorflow/retinanet_coco.tar.gz)| |RetinaNet|Filter pruning, 40%|COCO 2017|32.72 (0.71)|[retinanet_coco_pruning_geometric_median.json](configs/pruning/retinanet_coco_pruning_geometric_median.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/tensorflow/retinanet_coco_pruning_geometric_median.tar.gz)| |RetinaNet|INT8 (per-tensor symmetric for weights, per-tensor asymmetric half-range for activations) + filter pruning 40%|COCO 2017|32.67 (0.76)|[retinanet_coco_pruning_geometric_median_int8.json](configs/pruning_quantization/retinanet_coco_pruning_geometric_median_int8.json)|[Link](https://storage.openvinotoolkit.org/repositories/nncf/models/develop/tensorflow/retinanet_coco_pruning_geometric_median_int8.tar.gz)| - diff --git a/examples/tensorflow/object_detection/main.py b/examples/tensorflow/object_detection/main.py index 37ff984701a..09868b034a6 100644 --- a/examples/tensorflow/object_detection/main.py +++ b/examples/tensorflow/object_detection/main.py @@ -15,40 +15,40 @@ import sys from pathlib import Path -import tensorflow as tf import numpy as np +import tensorflow as tf -from examples.tensorflow.common.experimental_patcher import patch_if_experimental_quantization -from nncf.common.accuracy_aware_training import create_accuracy_aware_training_loop -from nncf.tensorflow import create_compressed_model -from nncf.tensorflow.helpers.model_manager import TFModelManager -from nncf.tensorflow.initialization import register_default_init_args -from nncf.common.utils.tensorboard import prepare_for_tensorboard -from nncf.config.utils import is_accuracy_aware_training -from nncf.config.structures import ModelEvaluationArgs -from nncf.tensorflow.utils.state import TFCompressionState -from nncf.tensorflow.utils.state import TFCompressionStateLoader - +from examples.common.sample_config import create_sample_config from examples.tensorflow.common.argparser import get_common_argument_parser from examples.tensorflow.common.distributed import get_distribution_strategy +from examples.tensorflow.common.experimental_patcher import patch_if_experimental_quantization +from examples.tensorflow.common.export import export_model from examples.tensorflow.common.logger import logger from examples.tensorflow.common.object_detection.datasets.builder import COCODatasetBuilder from examples.tensorflow.common.optimizer import build_optimizer -from examples.common.sample_config import create_sample_config from examples.tensorflow.common.scheduler import build_scheduler from examples.tensorflow.common.utils import SummaryWriter from examples.tensorflow.common.utils import Timer -from examples.tensorflow.common.utils import print_args -from examples.tensorflow.common.utils import serialize_config -from examples.tensorflow.common.utils import serialize_cli_args -from examples.tensorflow.common.utils import create_code_snapshot +from examples.tensorflow.common.utils import close_strategy_threadpool from examples.tensorflow.common.utils import configure_paths +from examples.tensorflow.common.utils import create_code_snapshot from examples.tensorflow.common.utils import get_saving_parameters +from examples.tensorflow.common.utils import print_args +from examples.tensorflow.common.utils import serialize_cli_args +from examples.tensorflow.common.utils import serialize_config +from examples.tensorflow.common.utils import set_seed from examples.tensorflow.common.utils import write_metrics -from examples.tensorflow.object_detection.models.model_selector import get_predefined_config from examples.tensorflow.object_detection.models.model_selector import get_model_builder -from examples.tensorflow.common.utils import close_strategy_threadpool -from examples.tensorflow.common.utils import set_seed +from examples.tensorflow.object_detection.models.model_selector import get_predefined_config +from nncf.common.accuracy_aware_training import create_accuracy_aware_training_loop +from nncf.common.utils.tensorboard import prepare_for_tensorboard +from nncf.config.structures import ModelEvaluationArgs +from nncf.config.utils import is_accuracy_aware_training +from nncf.tensorflow import create_compressed_model +from nncf.tensorflow.helpers.model_manager import TFModelManager +from nncf.tensorflow.initialization import register_default_init_args +from nncf.tensorflow.utils.state import TFCompressionState +from nncf.tensorflow.utils.state import TFCompressionStateLoader def get_argument_parser(): @@ -389,8 +389,8 @@ def validate_fn(model, **kwargs): if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) - compression_ctrl.export_model(save_path, save_format) - logger.info("Saved to {}".format(save_path)) + export_model(compression_ctrl.prepare_for_inference(), save_path, save_format) + logger.info('Saved to {}'.format(save_path)) close_strategy_threadpool(strategy) @@ -411,8 +411,8 @@ def export(config): load_checkpoint(checkpoint, config.ckpt_path) save_path, save_format = get_saving_parameters(config) - compression_ctrl.export_model(save_path, save_format) - logger.info("Saved to {}".format(save_path)) + export_model(compression_ctrl.prepare_for_inference(), save_path, save_format) + logger.info('Saved to {}'.format(save_path)) def main(argv): diff --git a/examples/tensorflow/segmentation/README.md b/examples/tensorflow/segmentation/README.md index ef0dce7899c..4f2c3e8a7d9 100644 --- a/examples/tensorflow/segmentation/README.md +++ b/examples/tensorflow/segmentation/README.md @@ -14,7 +14,7 @@ The sample receives a configuration file where the training schedule, hyper-para ## Installation -At this point it is assumed that you have already installed nncf. You can find information on downloading nncf [here](https://github.com/openvinotoolkit/nncf#user-content-installation). +At this point it is assumed that you have already installed nncf. You can find information on downloading nncf [here](https://github.com/openvinotoolkit/nncf#user-content-installation). To work with the sample you should install the corresponding Python package dependencies: @@ -52,8 +52,8 @@ We can run the sample after data preparation. For this follow these steps: - If you did not install the package, add the repository root folder to the `PYTHONPATH` environment variable. - Go to the `examples/tensorflow/segmentation` folder. - Download the pre-trained weights in checkpoint format and provide the path to them using `--weights` flag. The link to the -archive with pre-trained weights can be found in the `TensorFlow checkpoint` column of the [results](#results) table. -Select the checkpoint corresponding to the `None` compression algorithm, which includes the pre-trained weights for the +archive with pre-trained weights can be found in the `TensorFlow checkpoint` column of the [results](#results) table. +Select the checkpoint corresponding to the `None` compression algorithm, which includes the pre-trained weights for the FP32 model, without applying any compression algorithms. - Specify the GPUs to be used for training by setting the environment variable [`CUDA_VISIBLE_DEVICES`](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/). This is necessary because training and validation during training must be performed on different GPU devices. Please note that usually only one GPU is required for validation during training. - (Optional) Before compressing a model, it is highly recommended checking the accuracy of the pretrained model, use the following command: diff --git a/examples/tensorflow/segmentation/evaluation.py b/examples/tensorflow/segmentation/evaluation.py index fb4201111a7..6e8afbe0c03 100644 --- a/examples/tensorflow/segmentation/evaluation.py +++ b/examples/tensorflow/segmentation/evaluation.py @@ -10,34 +10,33 @@ See the License for the specific language governing permissions and limitations under the License. """ - import sys import tensorflow as tf +from examples.common.sample_config import SampleConfig from examples.common.sample_config import create_sample_config -from examples.tensorflow.common.experimental_patcher import patch_if_experimental_quantization -from examples.tensorflow.common.utils import close_strategy_threadpool -from nncf.tensorflow import create_compressed_model -from nncf.tensorflow import register_default_init_args -from nncf.tensorflow.helpers.model_manager import TFModelManager -from nncf.tensorflow.utils.state import TFCompressionState -from nncf.tensorflow.utils.state import TFCompressionStateLoader - from examples.tensorflow.common.argparser import get_common_argument_parser from examples.tensorflow.common.distributed import get_distribution_strategy +from examples.tensorflow.common.experimental_patcher import patch_if_experimental_quantization +from examples.tensorflow.common.export import export_model from examples.tensorflow.common.logger import logger -from examples.tensorflow.common.object_detection.datasets.builder import COCODatasetBuilder from examples.tensorflow.common.object_detection.checkpoint_utils import get_variables -from examples.common.sample_config import SampleConfig +from examples.tensorflow.common.object_detection.datasets.builder import COCODatasetBuilder +from examples.tensorflow.common.utils import SummaryWriter +from examples.tensorflow.common.utils import Timer +from examples.tensorflow.common.utils import close_strategy_threadpool from examples.tensorflow.common.utils import configure_paths from examples.tensorflow.common.utils import get_saving_parameters from examples.tensorflow.common.utils import print_args -from examples.tensorflow.common.utils import SummaryWriter from examples.tensorflow.common.utils import write_metrics -from examples.tensorflow.common.utils import Timer -from examples.tensorflow.segmentation.models.model_selector import get_predefined_config from examples.tensorflow.segmentation.models.model_selector import get_model_builder +from examples.tensorflow.segmentation.models.model_selector import get_predefined_config +from nncf.tensorflow import create_compressed_model +from nncf.tensorflow import register_default_init_args +from nncf.tensorflow.helpers.model_manager import TFModelManager +from nncf.tensorflow.utils.state import TFCompressionState +from nncf.tensorflow.utils.state import TFCompressionStateLoader def get_argument_parser(): @@ -245,8 +244,8 @@ def run_evaluation(config, eval_timeout=None): if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) - compression_ctrl.export_model(save_path, save_format) - logger.info("Saved to {}".format(save_path)) + export_model(compression_ctrl.prepare_for_inference(), save_path, save_format) + logger.info('Saved to {}'.format(save_path)) elif 'train' in config.mode: validation_summary_writer = SummaryWriter(config.log_dir, 'validation') @@ -286,8 +285,8 @@ def export(config): compression_ctrl, _, _ = restore_compressed_model(config, strategy, model_builder, config.ckpt_path) save_path, save_format = get_saving_parameters(config) - compression_ctrl.export_model(save_path, save_format) - logger.info("Saved to {}".format(save_path)) + export_model(compression_ctrl.prepare_for_inference(), save_path, save_format) + logger.info('Saved to {}'.format(save_path)) def main(argv): diff --git a/examples/torch/classification/README.md b/examples/torch/classification/README.md index 56a1007fe25..2b2285db7ae 100644 --- a/examples/torch/classification/README.md +++ b/examples/torch/classification/README.md @@ -60,7 +60,6 @@ python main.py \ - Use the `--resume` flag with the path to a previously saved model to resume training. - For Torchvision-supported image classification models, set `"pretrained": true` inside the NNCF config JSON file supplied via `--config` to initialize the model to be compressed with Torchvision-supplied pretrained weights, or, alternatively: - Use the `--weights` flag with the path to a compatible PyTorch checkpoint in order to load all matching weights from the checkpoint into the model - useful if you need to start compression-aware training from a previously trained uncompressed (FP32) checkpoint instead of performing compression-aware training from scratch. -- Use `--prepare-for-inference` argument to convert model to torch native format before `test` and `export` steps. #### Validate Your Model Checkpoint diff --git a/examples/torch/classification/main.py b/examples/torch/classification/main.py index 278d13956c1..18ded3d4923 100644 --- a/examples/torch/classification/main.py +++ b/examples/torch/classification/main.py @@ -37,6 +37,8 @@ from torchvision.datasets import CIFAR100 from torchvision.models import InceptionOutputs +from examples.common.sample_config import SampleConfig +from examples.common.sample_config import create_sample_config from examples.torch.common.argparser import get_common_argument_parser from examples.torch.common.argparser import parse_args from examples.torch.common.example_logger import logger @@ -45,6 +47,7 @@ from examples.torch.common.execution import prepare_model_for_execution from examples.torch.common.execution import set_seed from examples.torch.common.execution import start_worker +from examples.torch.common.export import export_model from examples.torch.common.model_loader import COMPRESSION_STATE_ATTR from examples.torch.common.model_loader import MODEL_STATE_ATTR from examples.torch.common.model_loader import extract_model_and_compression_states @@ -52,8 +55,6 @@ from examples.torch.common.model_loader import load_resuming_checkpoint from examples.torch.common.optimizer import get_parameter_groups from examples.torch.common.optimizer import make_optimizer -from examples.common.sample_config import SampleConfig -from examples.common.sample_config import create_sample_config from examples.torch.common.utils import MockDataset from examples.torch.common.utils import NullContextManager from examples.torch.common.utils import SafeMLFLow @@ -223,10 +224,8 @@ def model_eval_fn(model): load_state(model, model_state_dict, is_resume=True) if is_export_only: - if config.prepare_for_inference: - compression_ctrl.prepare_for_inference(make_model_copy=False) - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') return model, _ = prepare_model_for_execution(model, config) @@ -293,17 +292,13 @@ def configure_optimizers_fn(): if 'test' in config.mode: val_model = model - if config.prepare_for_inference: - val_model = compression_ctrl.prepare_for_inference(make_model_copy=True) validate(val_loader, val_model, criterion, config) config.mlflow.end_run() if 'export' in config.mode: - if config.prepare_for_inference: - compression_ctrl.prepare_for_inference(make_model_copy=False) - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') def train(config, compression_ctrl, model, criterion, criterion_fn, lr_scheduler, model_name, optimizer, diff --git a/examples/torch/classification/staged_quantization_worker.py b/examples/torch/classification/staged_quantization_worker.py index 4d5464bf832..1fd1eb7792a 100644 --- a/examples/torch/classification/staged_quantization_worker.py +++ b/examples/torch/classification/staged_quantization_worker.py @@ -16,12 +16,12 @@ import time import torch -from torch.backends import cudnn -from torch import nn import torch.nn.parallel import torch.optim import torch.utils.data import torch.utils.data.distributed +from torch import nn +from torch.backends import cudnn from torchvision.models import InceptionOutputs from examples.torch.classification.main import AverageMeter @@ -35,6 +35,7 @@ from examples.torch.common.execution import ExecutionMode from examples.torch.common.execution import prepare_model_for_execution from examples.torch.common.execution import set_seed +from examples.torch.common.export import export_model from examples.torch.common.model_loader import COMPRESSION_STATE_ATTR from examples.torch.common.model_loader import MODEL_STATE_ATTR from examples.torch.common.model_loader import extract_model_and_compression_states @@ -53,11 +54,11 @@ from nncf.config.schemata.defaults import LR_POLY_DURATION_EPOCHS from nncf.config.schemata.defaults import STAGED_QUANTIZATION_BASE_LR from nncf.config.schemata.defaults import STAGED_QUANTIZATION_BASE_WD +from nncf.torch import create_compressed_model from nncf.torch.binarization.algo import BinarizationController from nncf.torch.checkpoint_loading import load_state from nncf.torch.initialization import default_criterion_fn from nncf.torch.initialization import register_default_init_args -from nncf.torch import create_compressed_model from nncf.torch.quantization.algo import QuantizationController from nncf.torch.utils import is_main_process @@ -219,8 +220,8 @@ def autoq_eval_fn(model, eval_loader): log_common_mlflow_params(config) if is_export_only: - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') return if config.execution_mode != ExecutionMode.CPU_ONLY: @@ -240,8 +241,8 @@ def autoq_eval_fn(model, eval_loader): validate(val_loader, model, criterion, config) if 'export' in config.mode: - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') diff --git a/examples/torch/common/argparser.py b/examples/torch/common/argparser.py index a21c7411cb5..ec73b1a5d05 100644 --- a/examples/torch/common/argparser.py +++ b/examples/torch/common/argparser.py @@ -174,11 +174,6 @@ def get_common_argument_parser(): parser.add_argument('--to-onnx', type=str, metavar='PATH', default=None, help='Export to ONNX model by given path') - parser.add_argument( - "--prepare-for-inference", - action='store_true', - help="Convert model to torch native format for export and test steps.") - # Display parser.add_argument('-p', '--print-freq', default=10, type=int, metavar='N', help='Print frequency (batch iterations). ' diff --git a/examples/torch/common/export.py b/examples/torch/common/export.py new file mode 100644 index 00000000000..d5a95d14ccb --- /dev/null +++ b/examples/torch/common/export.py @@ -0,0 +1,34 @@ +""" + Copyright (c) 2023 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import torch + +from nncf.torch.exporter import generate_input_names_list +from nncf.torch.nncf_network import NNCFNetwork + + +def export_model(model: NNCFNetwork, save_path: str) -> None: + """ + Export compressed model. Supported only 'onnx' format. + + :param model: The target model. + :param save_path: Path to save onnx file. + """ + model = model.eval().cpu() + input_names = generate_input_names_list(len(model.input_infos)) + input_tensor_list = [] + for info in model.input_infos: + input_shape = tuple([1] + list(info.shape)[1:]) + input_tensor_list.append(torch.rand(input_shape)) + + with torch.no_grad(): + torch.onnx.export(model, tuple(input_tensor_list), save_path, input_names=input_names) diff --git a/examples/torch/object_detection/README.md b/examples/torch/object_detection/README.md index 27de4902685..c15567159a0 100644 --- a/examples/torch/object_detection/README.md +++ b/examples/torch/object_detection/README.md @@ -47,7 +47,6 @@ This scenario demonstrates quantization with fine-tuning of SSD300 on VOC datase - Use `--weights` flag with the path to a compatible PyTorch checkpoint in order to load all matching weights from the checkpoint into the model - useful if you need to start compression-aware training from a previously trained uncompressed (FP32) checkpoint instead of performing compression-aware training from scratch. This flag is optional, but highly recommended to use. - Use `--multiprocessing-distributed` flag to run in the distributed mode. - Use `--resume` flag with the path to a previously saved model to resume training. -- Use `--prepare-for-inference` argument to convert model to torch native format before `test` and `export` steps. #### Validate your model checkpoint diff --git a/examples/torch/object_detection/main.py b/examples/torch/object_detection/main.py index 165bd659eb5..785effa8a55 100644 --- a/examples/torch/object_detection/main.py +++ b/examples/torch/object_detection/main.py @@ -20,6 +20,8 @@ from torch.optim.lr_scheduler import ReduceLROnPlateau from torch.utils import data +from examples.common.sample_config import SampleConfig +from examples.common.sample_config import create_sample_config from examples.torch.common import restricted_pickle_module from examples.torch.common.argparser import get_common_argument_parser from examples.torch.common.argparser import parse_args @@ -29,14 +31,13 @@ from examples.torch.common.execution import prepare_model_for_execution from examples.torch.common.execution import set_seed from examples.torch.common.execution import start_worker +from examples.torch.common.export import export_model from examples.torch.common.model_loader import COMPRESSION_STATE_ATTR from examples.torch.common.model_loader import MODEL_STATE_ATTR from examples.torch.common.model_loader import extract_model_and_compression_states from examples.torch.common.model_loader import load_resuming_checkpoint from examples.torch.common.optimizer import get_parameter_groups from examples.torch.common.optimizer import make_optimizer -from examples.common.sample_config import SampleConfig -from examples.common.sample_config import create_sample_config from examples.torch.common.utils import SafeMLFLow from examples.torch.common.utils import configure_device from examples.torch.common.utils import configure_logging @@ -209,10 +210,8 @@ def model_eval_fn(model): log_common_mlflow_params(config) if is_export_only: - if config.prepare_for_inference: - compression_ctrl.prepare_for_inference(make_model_copy=False) - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') return if is_main_process(): @@ -259,8 +258,6 @@ def configure_optimizers_fn(): if 'test' in config.mode: with torch.no_grad(): val_net = net - if config.prepare_for_inference: - val_net = compression_ctrl.prepare_for_inference(make_model_copy=True) net.eval() if config['ssd_params'].get('loss_inference', False): model_loss = test_net(val_net, config.device, test_data_loader, distributed=config.distributed, @@ -272,10 +269,8 @@ def configure_optimizers_fn(): write_metrics(mAp, config.metrics_dump) if 'export' in config.mode: - if config.prepare_for_inference: - compression_ctrl.prepare_for_inference(make_model_copy=False) - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') def create_dataloaders(config): diff --git a/examples/torch/semantic_segmentation/README.md b/examples/torch/semantic_segmentation/README.md index d875e1af326..94f3901ac48 100644 --- a/examples/torch/semantic_segmentation/README.md +++ b/examples/torch/semantic_segmentation/README.md @@ -45,7 +45,6 @@ This scenario demonstrates quantization with fine-tuning of UNet on Mapillary Vi ``` - Run the following command to start compression with fine-tuning on GPUs: `python main.py -m train --config configs/unet_mapillary_int8.json --data --weights ` -- Use `--prepare-for-inference` argument to convert model to torch native format before `test` and `export` steps. It may take a few epochs to get the baseline accuracy results. diff --git a/examples/torch/semantic_segmentation/main.py b/examples/torch/semantic_segmentation/main.py index db8d5846c1e..77de7f91b9d 100644 --- a/examples/torch/semantic_segmentation/main.py +++ b/examples/torch/semantic_segmentation/main.py @@ -10,12 +10,11 @@ See the License for the specific language governing permissions and limitations under the License. """ - -import functools -import os # Major parts of this sample reuse code from: # https://github.com/davidtvs/PyTorch-ENet # https://github.com/pytorch/vision/tree/master/references/segmentation +import functools +import os import sys from copy import deepcopy from os import path as osp @@ -27,6 +26,7 @@ import examples.torch.semantic_segmentation.utils.data as data_utils import examples.torch.semantic_segmentation.utils.transforms as JT +from examples.common.sample_config import create_sample_config from examples.torch.common.argparser import get_common_argument_parser from examples.torch.common.argparser import parse_args from examples.torch.common.example_logger import logger @@ -34,11 +34,11 @@ from examples.torch.common.execution import prepare_model_for_execution from examples.torch.common.execution import set_seed from examples.torch.common.execution import start_worker +from examples.torch.common.export import export_model from examples.torch.common.model_loader import extract_model_and_compression_states from examples.torch.common.model_loader import load_model from examples.torch.common.model_loader import load_resuming_checkpoint from examples.torch.common.optimizer import make_optimizer -from examples.common.sample_config import create_sample_config from examples.torch.common.utils import SafeMLFLow from examples.torch.common.utils import configure_device from examples.torch.common.utils import configure_logging @@ -471,7 +471,6 @@ def predict(model, images, class_encoding, config): color_predictions = data_utils.label_to_color(predictions, class_encoding) return color_predictions - def main_worker(current_gpu, config): configure_device(current_gpu, config) config.mlflow = SafeMLFLow(config) @@ -540,10 +539,8 @@ def autoq_test_fn(model, eval_loader): log_common_mlflow_params(config) if is_export_only: - if config.prepare_for_inference: - compression_ctrl.prepare_for_inference(make_model_copy=False) - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') return if is_main_process(): @@ -592,8 +589,6 @@ def configure_optimizers_fn(): if 'test' in config.mode: logger.info(model) val_model = model - if config.prepare_for_inference: - val_model = compression_ctrl.prepare_for_inference(make_model_copy=True) model_parameters = filter(lambda p: p.requires_grad, val_model.parameters()) params = sum(np.prod(p.size()) for p in model_parameters) logger.info("Trainable argument count:{params}".format(params=params)) @@ -601,10 +596,8 @@ def configure_optimizers_fn(): test(val_model, val_loader, criterion, color_encoding, config) if 'export' in config.mode: - if config.prepare_for_inference: - compression_ctrl.prepare_for_inference(make_model_copy=False) - compression_ctrl.export_model(config.to_onnx) - logger.info("Saved to {}".format(config.to_onnx)) + export_model(compression_ctrl.prepare_for_inference(), config.to_onnx) + logger.info(f'Saved to {config.to_onnx}') def main(argv): diff --git a/nncf/api/compression.py b/nncf/api/compression.py index bbce8d58269..6d0f32de37f 100644 --- a/nncf/api/compression.py +++ b/nncf/api/compression.py @@ -14,10 +14,16 @@ from abc import ABC from abc import abstractmethod from enum import IntEnum -from typing import Any, Dict, List, Optional, Tuple, TypeVar +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Tuple +from typing import TypeVar from nncf.api.statistics import Statistics from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.utils.backend import copy_model TModel = TypeVar('TModel') @@ -244,12 +250,38 @@ def statistics(self, quickly_collected_only: bool = False) -> Statistics: :return: A `Statistics` class instance that contains compression algorithm statistics. """ + def strip_model(self, model: TModel, do_copy: bool = False) -> TModel: + """ + Strips auxiliary layers that were used for the model compression, as it's + only needed for training. The method is used before exporting the model + in the target format. + + :param model: The compressed model. + :param do_copy: Modify copy of the model, defaults to False. + :return: The stripped model. + """ + if do_copy: + model = copy_model(model) + return model + def prepare_for_export(self) -> None: """ Prepare the compressed model for deployment. """ self._model = self.strip_model(self._model) + def prepare_for_inference(self, do_copy: bool = True) -> TModel: + """ + Prepare the compressed model for inference and export by removing NNCF-specific operations, as it's + only needed for training. + + :param do_copy: `True` means that a copy of the model will be modified. + `False` means that the original model will be modify. Defaults to True. + + :return: Modified model. + """ + return self.strip_model(self.model, do_copy) + @abstractmethod def export_model(self, save_path: str, @@ -277,17 +309,6 @@ def export_model(self, - ({'x': None, 'y': y},) for keyword arguments only. """ - def strip_model(self, model: TModel) -> TModel: - """ - Strips auxiliary layers that were used for the model compression, as it's - only needed for training. The method is used before exporting the model - in the target format. - - :param model: The compressed model. - :return: The stripped model. - """ - return model - @property @abstractmethod def compression_rate(self) -> float: diff --git a/nncf/common/accuracy_aware_training/runner_factory.py b/nncf/common/accuracy_aware_training/runner_factory.py index 2168f1b71ab..bd53459515c 100644 --- a/nncf/common/accuracy_aware_training/runner_factory.py +++ b/nncf/common/accuracy_aware_training/runner_factory.py @@ -19,8 +19,8 @@ from nncf.common.accuracy_aware_training.runner import BaseAccuracyAwareTrainingRunner from nncf.common.accuracy_aware_training.runner import BaseAdaptiveCompressionLevelTrainingRunner from nncf.common.accuracy_aware_training.runner import TrainingRunner -from nncf.common.utils.backend import infer_backend_from_compression_controller from nncf.common.utils.backend import BackendType +from nncf.common.utils.backend import get_backend class TrainingRunnerCreator(ABC): @@ -53,7 +53,7 @@ def create_training_loop(self) -> BaseAccuracyAwareTrainingRunner: :return: AccuracyAwareTrainingRunner object """ - nncf_backend = infer_backend_from_compression_controller(self.compression_controller) + nncf_backend = get_backend(self.compression_controller.model) if nncf_backend is BackendType.TORCH: from nncf.torch.accuracy_aware_training.runner import PTAccuracyAwareTrainingRunner return PTAccuracyAwareTrainingRunner(self.accuracy_aware_training_params, self.verbose, @@ -88,7 +88,7 @@ def create_training_loop(self) -> BaseAdaptiveCompressionLevelTrainingRunner: :return: AdaptiveCompressionLevelTrainingRunner object """ - nncf_backend = infer_backend_from_compression_controller(self.compression_controller) + nncf_backend = get_backend(self.compression_controller.model) if nncf_backend is BackendType.TORCH: from nncf.torch.accuracy_aware_training.runner import PTAdaptiveCompressionLevelTrainingRunner diff --git a/nncf/common/composite_compression.py b/nncf/common/composite_compression.py index 851cb4328e2..03d9964c5c1 100644 --- a/nncf/common/composite_compression.py +++ b/nncf/common/composite_compression.py @@ -26,6 +26,7 @@ from nncf.api.compression import TModel from nncf.common.statistics import NNCFStatistics from nncf.common.utils.backend import BackendType +from nncf.common.utils.backend import copy_model from nncf.common.utils.backend import get_backend @@ -280,6 +281,14 @@ def prepare_for_export(self) -> None: stripped_model = ctrl.strip_model(stripped_model) self._model = stripped_model + def prepare_for_inference(self, do_copy: bool = True) -> TModel: + model = self.model + if do_copy: + model = copy_model(model) + for ctrl in self.child_ctrls: + model = ctrl.strip_model(model, do_copy=False) + return model + @property def compression_rate(self) -> float: raise NotImplementedError @@ -320,11 +329,11 @@ def export_model(self, self.prepare_for_export() backend = get_backend(self.model) if backend is BackendType.TENSORFLOW: - from nncf.tensorflow.exporter import TFExporter #pylint: disable=cyclic-import + from nncf.tensorflow.exporter import TFExporter # pylint: disable=cyclic-import exporter = TFExporter(self.model, input_names, output_names, model_args) else: assert backend is BackendType.TORCH - from nncf.torch.exporter import PTExporter #pylint: disable=cyclic-import + from nncf.torch.exporter import PTExporter # pylint: disable=cyclic-import exporter = PTExporter(self.model, input_names, output_names, model_args) if save_format is not None: exporter.export_model(save_path, save_format) diff --git a/nncf/common/utils/backend.py b/nncf/common/utils/backend.py index e8aa3da1d55..fe432a56a33 100644 --- a/nncf/common/utils/backend.py +++ b/nncf/common/utils/backend.py @@ -10,11 +10,9 @@ See the License for the specific language governing permissions and limitations under the License. """ -from typing import TypeVar -from enum import Enum from copy import deepcopy - -from nncf.api.compression import CompressionAlgorithmController +from enum import Enum +from typing import TypeVar TModel = TypeVar('TModel') @@ -75,19 +73,6 @@ def get_backend(model) -> BackendType: 'The available frameworks found: {}.'.format(', '.join(available_frameworks))) -def infer_backend_from_compression_controller(compression_controller: CompressionAlgorithmController) -> BackendType: - """ - Returns the NNCF backend name string inferred from the type of the model - stored in the passed compression controller. - - :param compression_controller: Passed compression controller - (of CompressionAlgorithmController type). - :return: A BackendType representing the NNCF backend. - """ - return get_backend(compression_controller.model) - - -# TODO(l-bat): Remove after fixing ticket: 100919 def copy_model(model: TModel) -> TModel: """ Function to create copy of the backend-specific model. @@ -97,5 +82,12 @@ def copy_model(model: TModel) -> TModel: """ model_backend = get_backend(model) if model_backend == BackendType.OPENVINO: + # TODO(l-bat): Remove after fixing ticket: 100919 return model.clone() + if model_backend == BackendType.TENSORFLOW: + # deepcopy and tensorflow.keras.models.clone_model does not work correctly on 2.8.4 version + from nncf.tensorflow.graph.model_transformer import TFModelTransformer + from nncf.tensorflow.graph.transformations.layout import TFTransformationLayout + model = TFModelTransformer(model).transform(TFTransformationLayout()) + return model return deepcopy(model) diff --git a/nncf/experimental/tensorflow/quantization/algorithm.py b/nncf/experimental/tensorflow/quantization/algorithm.py index 8fa9af27d08..7edbd9b0745 100644 --- a/nncf/experimental/tensorflow/quantization/algorithm.py +++ b/nncf/experimental/tensorflow/quantization/algorithm.py @@ -11,39 +11,42 @@ limitations under the License. """ -from typing import List, Optional, Dict, Any +from typing import Any +from typing import Dict +from typing import List +from typing import Optional -from nncf.common.logging import nncf_logger from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode -from nncf.common.graph.utils import get_first_nodes_of_type from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationPriority -from nncf.common.quantization.structs import QuantizerConfig -from nncf.common.quantization.quantizer_setup import QuantizationPointId +from nncf.common.graph.utils import get_first_nodes_of_type +from nncf.common.logging import nncf_logger from nncf.common.quantization.quantizer_setup import ActivationQuantizationInsertionPoint +from nncf.common.quantization.quantizer_setup import QuantizationPointId +from nncf.common.quantization.structs import QuantizerConfig from nncf.common.stateful_classes_registry import TF_STATEFUL_CLASSES from nncf.common.statistics import NNCFStatistics +from nncf.common.utils.backend import copy_model from nncf.config.extractors import extract_range_init_params -from nncf.tensorflow.algorithm_selector import TF_COMPRESSION_ALGORITHMS -from nncf.tensorflow.graph.transformations.commands import TFInsertionCommand -from nncf.tensorflow.graph.metatypes.tf_ops import TFOpWithWeightsMetatype -from nncf.tensorflow.quantization.quantizers import TFQuantizerSpec -from nncf.tensorflow.quantization.algorithm import QuantizationController -from nncf.tensorflow.quantization.algorithm import TFQuantizationPointStateNames -from nncf.tensorflow.quantization.algorithm import TFQuantizationPoint -from nncf.tensorflow.quantization.algorithm import TFQuantizationSetup -from nncf.tensorflow.quantization.algorithm import QuantizationBuilder -from nncf.experimental.tensorflow.nncf_network import NNCFNetwork from nncf.experimental.tensorflow.graph.converter import SubclassedConverter from nncf.experimental.tensorflow.graph.model_transformer import TFModelTransformerV2 +from nncf.experimental.tensorflow.graph.transformations.commands import TFTargetPoint from nncf.experimental.tensorflow.graph.transformations.layout import TFTransformationLayoutV2 -from nncf.experimental.tensorflow.quantization.init_range import TFRangeInitParamsV2 +from nncf.experimental.tensorflow.nncf_network import NNCFNetwork from nncf.experimental.tensorflow.quantization.init_range import RangeInitializerV2 +from nncf.experimental.tensorflow.quantization.init_range import TFRangeInitParamsV2 from nncf.experimental.tensorflow.quantization.quantizers import create_quantizer -from nncf.experimental.tensorflow.graph.transformations.commands import TFTargetPoint - +from nncf.tensorflow.algorithm_selector import TF_COMPRESSION_ALGORITHMS +from nncf.tensorflow.graph.metatypes.tf_ops import TFOpWithWeightsMetatype +from nncf.tensorflow.graph.transformations.commands import TFInsertionCommand +from nncf.tensorflow.quantization.algorithm import QuantizationBuilder +from nncf.tensorflow.quantization.algorithm import QuantizationController +from nncf.tensorflow.quantization.algorithm import TFQuantizationPoint +from nncf.tensorflow.quantization.algorithm import TFQuantizationPointStateNames +from nncf.tensorflow.quantization.algorithm import TFQuantizationSetup +from nncf.tensorflow.quantization.quantizers import TFQuantizerSpec UNSUPPORTED_TF_OP_METATYPES = [ ] @@ -364,7 +367,9 @@ def apply_to(self, model: NNCFNetwork) -> NNCFNetwork: class QuantizationControllerV2(QuantizationController): - def strip_model(self, model: NNCFNetwork) -> NNCFNetwork: + def strip_model(self, model: NNCFNetwork, do_copy: bool = False) -> NNCFNetwork: + if do_copy: + model = copy_model(model) return model def statistics(self, quickly_collected_only: bool = False) -> NNCFStatistics: diff --git a/nncf/tensorflow/algorithm_selector.py b/nncf/tensorflow/algorithm_selector.py index c768e67d197..987f888c55e 100644 --- a/nncf/tensorflow/algorithm_selector.py +++ b/nncf/tensorflow/algorithm_selector.py @@ -17,12 +17,13 @@ from nncf.api.compression import CompressionAlgorithmController from nncf.common.compression import NO_COMPRESSION_ALGORITHM_NAME +from nncf.common.compression import BaseCompressionAlgorithmController from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.schedulers import StubCompressionScheduler from nncf.common.logging import nncf_logger -from nncf.common.utils.registry import Registry +from nncf.common.schedulers import StubCompressionScheduler from nncf.common.statistics import NNCFStatistics -from nncf.common.compression import BaseCompressionAlgorithmController +from nncf.common.utils.backend import copy_model +from nncf.common.utils.registry import Registry from nncf.tensorflow.api.compression import TFCompressionAlgorithmBuilder from nncf.tensorflow.loss import TFZeroCompressionLoss @@ -61,6 +62,12 @@ def scheduler(self) -> StubCompressionScheduler: def statistics(self, quickly_collected_only: bool = False) -> NNCFStatistics: return NNCFStatistics() + def prepare_for_inference(self, do_copy: bool = True) -> tf.keras.Model: + model = self.model + if do_copy: + model = copy_model(self.model) + return model + def get_compression_algorithm_builder(algo_name: str) -> Type[TFCompressionAlgorithmBuilder]: nncf_logger.info(f'Creating compression algorithm: {algo_name}') diff --git a/nncf/tensorflow/pruning/base_algorithm.py b/nncf/tensorflow/pruning/base_algorithm.py index aabb9cedd4f..d1dfdcadd03 100644 --- a/nncf/tensorflow/pruning/base_algorithm.py +++ b/nncf/tensorflow/pruning/base_algorithm.py @@ -354,5 +354,6 @@ def _calculate_pruned_layers_summary(self) -> List[PrunedLayerSummary]: return pruned_layers_summary - def strip_model(self, model: tf.keras.Model) -> tf.keras.Model: + def strip_model(self, model: tf.keras.Model, do_copy: bool = False) -> tf.keras.Model: + # Transform model for pruning creates copy of the model. return strip_model_from_masks(model, self._op_names) diff --git a/nncf/tensorflow/quantization/algorithm.py b/nncf/tensorflow/quantization/algorithm.py index 8e289290867..0f67fdde436 100644 --- a/nncf/tensorflow/quantization/algorithm.py +++ b/nncf/tensorflow/quantization/algorithm.py @@ -51,6 +51,7 @@ from nncf.common.scopes import should_consider_scope from nncf.common.stateful_classes_registry import TF_STATEFUL_CLASSES from nncf.common.statistics import NNCFStatistics +from nncf.common.utils.backend import copy_model from nncf.config.extractors import extract_range_init_params from nncf.config.schemata.defaults import QUANTIZATION_OVERFLOW_FIX from nncf.config.schemata.defaults import QUANTIZE_INPUTS @@ -698,7 +699,9 @@ def scheduler(self) -> CompressionScheduler: def loss(self) -> CompressionLoss: return self._loss - def strip_model(self, model: tf.keras.Model) -> tf.keras.Model: + def strip_model(self, model: tf.keras.Model, do_copy: bool = False) -> tf.keras.Model: + if do_copy: + model = copy_model(model) apply_overflow_fix(model, self._op_names) return model diff --git a/nncf/tensorflow/sparsity/base_algorithm.py b/nncf/tensorflow/sparsity/base_algorithm.py index 607a377acec..7e12be399cd 100644 --- a/nncf/tensorflow/sparsity/base_algorithm.py +++ b/nncf/tensorflow/sparsity/base_algorithm.py @@ -10,9 +10,10 @@ See the License for the specific language governing permissions and limitations under the License. """ +import tensorflow as tf -from nncf.common.sparsity.controller import SparsityController from nncf.common.compression import BaseCompressionAlgorithmController +from nncf.common.sparsity.controller import SparsityController from nncf.tensorflow.graph.metatypes import keras_layers as layer_metatypes from nncf.tensorflow.sparsity.utils import strip_model_from_masks @@ -48,5 +49,6 @@ def __init__(self, target_model, op_names): super().__init__(target_model) self._op_names = op_names - def strip_model(self, model): + def strip_model(self, model: tf.keras.Model, do_copy: bool = False) -> tf.keras.Model: + # Transform model for sparsity creates copy of the model. return strip_model_from_masks(model, self._op_names) diff --git a/nncf/torch/algo_selector.py b/nncf/torch/algo_selector.py index f37b86140d3..1e79b45c400 100644 --- a/nncf/torch/algo_selector.py +++ b/nncf/torch/algo_selector.py @@ -10,25 +10,22 @@ See the License for the specific language governing permissions and limitations under the License. """ - -# pylint:disable=relative-beyond-top-level from typing import Dict import torch -from nncf.torch.graph.transformations.layout import PTTransformationLayout -from nncf.torch.nncf_network import NNCFNetwork - -from nncf.api.compression import CompressionStage from nncf.api.compression import CompressionScheduler -from nncf.torch.compression_method_api import PTCompressionAlgorithmBuilder -from nncf.torch.compression_method_api import PTCompressionAlgorithmController - -from nncf.torch.compression_method_api import PTCompressionLoss +from nncf.api.compression import CompressionStage from nncf.common.compression import NO_COMPRESSION_ALGORITHM_NAME from nncf.common.schedulers import StubCompressionScheduler -from nncf.common.utils.registry import Registry from nncf.common.statistics import NNCFStatistics +from nncf.common.utils.backend import copy_model +from nncf.common.utils.registry import Registry +from nncf.torch.compression_method_api import PTCompressionAlgorithmBuilder +from nncf.torch.compression_method_api import PTCompressionAlgorithmController +from nncf.torch.compression_method_api import PTCompressionLoss +from nncf.torch.graph.transformations.layout import PTTransformationLayout +from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.utils import get_model_device PT_COMPRESSION_ALGORITHMS = Registry('compression algorithm', add_name_as_attr=True) @@ -86,3 +83,9 @@ def scheduler(self) -> CompressionScheduler: def statistics(self, quickly_collected_only: bool = False) -> NNCFStatistics: return NNCFStatistics() + + def prepare_for_inference(self, do_copy: bool = True) -> NNCFNetwork: + model = self.model + if do_copy: + model = copy_model(self.model) + return model diff --git a/nncf/torch/composite_compression.py b/nncf/torch/composite_compression.py index c99bdc7d380..bf788c396c1 100644 --- a/nncf/torch/composite_compression.py +++ b/nncf/torch/composite_compression.py @@ -10,7 +10,6 @@ See the License for the specific language governing permissions and limitations under the License. """ -import copy from typing import TypeVar import torch.nn @@ -133,29 +132,3 @@ def compression_rate(self) -> float: sum_compression_rate += sum_compression_rate not_none_compression_rate_cnt += 1 return sum_compression_rate / max(not_none_compression_rate_cnt, 1) - - def prepare_for_inference(self, make_model_copy: bool = True) -> NNCFNetwork: - """ - Prepare NNCFNetwork for inference by converting NNCF modules to torch native format. - - :param make_model_copy: `True` means that a copy of the model will be modified. - `False` means that the original model in the controller will be changed and - no further compression actions will be available. Defaults to True. - - :return NNCFNetwork: Converted model. - """ - model = self.model - if make_model_copy: - model = copy.deepcopy(self.model) - - for ctrl in self.child_ctrls: - if make_model_copy: - # pylint: disable=protected-access - saved_model = ctrl.model - ctrl._model = model - model = ctrl.prepare_for_inference(make_model_copy=False) - ctrl._model = saved_model - else: - model = ctrl.prepare_for_inference(make_model_copy=False) - - return model diff --git a/nncf/torch/compression_method_api.py b/nncf/torch/compression_method_api.py index 35ce5e52805..b59c2ffc078 100644 --- a/nncf/torch/compression_method_api.py +++ b/nncf/torch/compression_method_api.py @@ -101,17 +101,9 @@ def distributed(self): should be made inside this function. """ - def prepare_for_inference(self, make_model_copy: bool = True) -> NNCFNetwork: - """ - Prepare NNCFNetwork for inference by converting NNCF modules to torch native format. - - :param make_model_copy: `True` means that a copy of the model will be modified. - `False` means that the original model in the controller will be changed and - no further compression actions will be available. Defaults to True. - - :return NNCFNetwork: Converted model. - """ - raise NotImplementedError(f"Method `prepare_for_inference` not implemented for {type(self)}.") + def prepare_for_export(self) -> None: + # For Torch models no need to call strip_model + pass class PTCompressionAlgorithmBuilder(BaseCompressionAlgorithmBuilder): diff --git a/nncf/torch/nncf_network.py b/nncf/torch/nncf_network.py index 9decc8f37e9..6cb8b91aece 100644 --- a/nncf/torch/nncf_network.py +++ b/nncf/torch/nncf_network.py @@ -27,6 +27,7 @@ import torch from torch import nn +from nncf import nncf_logger from nncf.common.graph import NNCFNode from nncf.common.graph import NNCFNodeName from nncf.common.graph.definitions import MODEL_INPUT_OP_NAME @@ -38,7 +39,6 @@ from nncf.common.insertion_point_graph import PostHookInsertionPoint from nncf.common.insertion_point_graph import PreHookInsertionPoint from nncf.common.utils.debug import is_debug -from nncf import nncf_logger from nncf.torch.debug import CombinedDebugInterface from nncf.torch.debug import debuggable_forward from nncf.torch.dynamic_graph.context import TracingContext @@ -56,7 +56,6 @@ from nncf.torch.dynamic_graph.scope import Scope from nncf.torch.dynamic_graph.scope_access import get_module_by_scope from nncf.torch.dynamic_graph.trace_tensor import TracedTensor -from nncf.torch.nncf_module_replacement import replace_modules_by_nncf_modules from nncf.torch.graph.graph import PTNNCFGraph from nncf.torch.graph.graph_builder import GraphBuilder from nncf.torch.graph.graph_builder import GraphConverter @@ -68,6 +67,7 @@ from nncf.torch.layer_utils import _NNCFModuleMixin from nncf.torch.module_operations import UpdateWeight from nncf.torch.nested_objects_traversal import objwalk +from nncf.torch.nncf_module_replacement import replace_modules_by_nncf_modules from nncf.torch.quantization.layers import QUANTIZATION_MODULES from nncf.torch.utils import compute_FLOPs_hook from nncf.torch.utils import get_all_modules_by_type @@ -312,7 +312,8 @@ def _reset_nncf_modules(self): def get_clean_shallow_copy(self) -> 'NNCFNetwork': # WARNING: Will reset pre- and post-ops of the underlying model. Use save_nncf_module_additions # and load_nncf_module_additions to preserve these, or temporary_clean_view(). - from nncf.torch.utils import save_module_state, load_module_state #pylint: disable=cyclic-import + from nncf.torch.utils import load_module_state # pylint: disable=cyclic-import + from nncf.torch.utils import save_module_state # pylint: disable=cyclic-import saved_state = save_module_state(self) model_copy = NNCFNetwork(self.get_nncf_wrapped_model(), self.input_infos, self._user_dummy_forward_fn, self._wrap_inputs_fn, diff --git a/nncf/torch/pruning/filter_pruning/algo.py b/nncf/torch/pruning/filter_pruning/algo.py index 8339e4bf8bb..67ff35ac4d8 100644 --- a/nncf/torch/pruning/filter_pruning/algo.py +++ b/nncf/torch/pruning/filter_pruning/algo.py @@ -11,7 +11,6 @@ limitations under the License. """ -import copy import json from math import isclose from pathlib import Path @@ -45,6 +44,7 @@ from nncf.common.pruning.weights_flops_calculator import WeightsFlopsCalculator from nncf.common.schedulers import StubCompressionScheduler from nncf.common.statistics import NNCFStatistics +from nncf.common.utils.backend import copy_model from nncf.common.utils.debug import is_debug from nncf.common.utils.os import safe_open from nncf.config.extractors import extract_bn_adaptation_init_params @@ -658,19 +658,9 @@ def _run_batchnorm_adaptation(self): 'filter_pruning')) self._bn_adaptation.run(self.model) - def prepare_for_inference(self, make_model_copy: bool = True) -> NNCFNetwork: - """ - Prepare NNCFNetwork for inference by converting NNCF modules to torch native format. - - :param make_model_copy: `True` means that a copy of the model will be modified. - `False` means that the original model in the controller will be changed and - no further compression actions will be available. Defaults to True. - - :return NNCFNetwork: Converted model. - """ - model = self.model - if make_model_copy: - model = copy.deepcopy(self.model) + def strip_model(self, model: NNCFNetwork, do_copy: bool = False) -> NNCFNetwork: + if do_copy: + model = copy_model(model) graph = model.get_original_graph() ModelPruner(model, graph, PT_PRUNING_OPERATOR_METATYPES, PrunType.FILL_ZEROS).prune_model() diff --git a/nncf/torch/quantization/algo.py b/nncf/torch/quantization/algo.py index d801acaa557..ca502333f54 100644 --- a/nncf/torch/quantization/algo.py +++ b/nncf/torch/quantization/algo.py @@ -12,7 +12,6 @@ """ # pylint:disable=too-many-lines -import copy import shutil from collections import Counter from collections import OrderedDict @@ -42,10 +41,10 @@ from nncf.common.graph.definitions import MODEL_INPUT_OP_NAME from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes from nncf.common.graph.layer_attributes import WeightedLayerAttributes -from nncf.common.graph.transformations.commands import TargetPoint -from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.patterns.manager import PatternsManager from nncf.common.graph.patterns.manager import TargetDevice +from nncf.common.graph.transformations.commands import TargetPoint +from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.utils import get_first_nodes_of_type from nncf.common.hardware.config import HWConfig from nncf.common.hardware.config import HWConfigType @@ -71,10 +70,11 @@ from nncf.common.schedulers import BaseCompressionScheduler from nncf.common.scopes import matches_any from nncf.common.statistics import NNCFStatistics +from nncf.common.utils.backend import BackendType +from nncf.common.utils.backend import copy_model from nncf.common.utils.debug import is_debug from nncf.common.utils.dot_file_rw import write_dot_graph from nncf.common.utils.os import safe_open -from nncf.common.utils.backend import BackendType from nncf.config import NNCFConfig from nncf.config.extractors import extract_algo_specific_config from nncf.config.extractors import extract_bn_adaptation_init_params @@ -1459,23 +1459,11 @@ def statistics(self, quickly_collected_only=False) -> NNCFStatistics: nncf_stats.register('quantization', stats) return nncf_stats - def prepare_for_inference(self, make_model_copy: bool = True) -> NNCFNetwork: - """ - Prepare NNCFNetwork for inference by converting NNCF modules to torch native format. - - :param make_model_copy: `True` means that a copy of the model will be modified. - `False` means that the original model in the controller will be changed and - no further compression actions will be available. Defaults to True. - - :return NNCFNetwork: Converted model. - """ - model = self.model - if make_model_copy: - model = copy.deepcopy(self.model) - + def strip_model(self, model: NNCFNetwork, do_copy: bool = False) -> NNCFNetwork: + if do_copy: + model = copy_model(model) model = replace_quantizer_to_torch_native_module(model) model = remove_disabled_quantizers(model) - return model diff --git a/nncf/torch/sparsity/base_algo.py b/nncf/torch/sparsity/base_algo.py index 5151fca60c3..a2bff4f6629 100644 --- a/nncf/torch/sparsity/base_algo.py +++ b/nncf/torch/sparsity/base_algo.py @@ -10,7 +10,6 @@ See the License for the specific language governing permissions and limitations under the License. """ -import copy from typing import List import torch @@ -25,6 +24,7 @@ from nncf.common.schedulers import BaseCompressionScheduler from nncf.common.schedulers import StubCompressionScheduler from nncf.common.sparsity.controller import SparsityController +from nncf.common.utils.backend import copy_model from nncf.torch.algo_selector import ZeroCompressionLoss from nncf.torch.compression_method_api import PTCompressionAlgorithmBuilder from nncf.torch.compression_method_api import PTCompressionAlgorithmController @@ -116,26 +116,14 @@ def disable_scheduler(self): def compression_stage(self) -> CompressionStage: return CompressionStage.FULLY_COMPRESSED - def prepare_for_inference(self, make_model_copy: bool = True) -> NNCFNetwork: - """ - Prepare NNCFNetwork for inference by converting NNCF modules to torch native format. - - :param make_model_copy: `True` means that a copy of the model will be modified. - `False` means that the original model in the controller will be changed and - no further compression actions will be available. Defaults to True. - - :return NNCFNetwork: Converted model. - """ - model = self.model - if make_model_copy: - model = copy.deepcopy(self.model) + def strip_model(self, model: NNCFNetwork, do_copy: bool = False) -> NNCFNetwork: + if do_copy: + model = copy_model(model) for node in model.get_original_graph().get_all_nodes(): if node.node_type in ["nncf_model_input", "nncf_model_output"]: continue - nncf_module = model.get_containing_module(node.node_name) - if hasattr(nncf_module, "pre_ops"): for key in list(nncf_module.pre_ops.keys()): op = nncf_module.get_pre_op(key) diff --git a/tests/tensorflow/pruning/test_prepare_for_inference.py b/tests/tensorflow/pruning/test_prepare_for_inference.py new file mode 100644 index 00000000000..3127122883f --- /dev/null +++ b/tests/tensorflow/pruning/test_prepare_for_inference.py @@ -0,0 +1,71 @@ +""" + Copyright (c) 2023 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import pytest +import tensorflow as tf + +from tests.tensorflow.helpers import TFTensorListComparator +from tests.tensorflow.helpers import create_compressed_model_and_algo_for_test +from tests.tensorflow.helpers import get_empty_config +from tests.tensorflow.pruning.helpers import get_concat_test_model + + +@pytest.mark.parametrize("enable_quantization", (True, False), ids=("with_quantization", "no_quantization")) +def test_prepare_for_inference(enable_quantization): + input_shape = (1, 8, 8, 3) + model = get_concat_test_model(input_shape) + + config = get_empty_config(input_sample_sizes=input_shape) + config.update( + {"compression": [{"algorithm": "filter_pruning", "pruning_init": 0.5, "params": {"prune_first_conv": True}}]} + ) + if enable_quantization: + config["compression"].append( + { + "algorithm": "quantization", + "preset": "mixed", + "initializer": { + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 0, + } + }, + } + ) + + compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) + input_tensor = tf.ones(input_shape) + x_nncf = compressed_model(input_tensor) + + inference_model = compression_ctrl.prepare_for_inference() + x_tf = inference_model(input_tensor) + + TFTensorListComparator.check_equal(x_nncf, x_tf) + +@pytest.mark.parametrize("do_copy", (True, False)) +@pytest.mark.parametrize("enable_quantization", (True, False), ids=("with_quantization", "no_quantization")) +def test_do_copy(do_copy, enable_quantization): + input_shape = (1, 8, 8, 3) + model = get_concat_test_model(input_shape) + + config = get_empty_config(input_sample_sizes=input_shape) + config.update( + {"compression": [{"algorithm": "filter_pruning", "pruning_init": 0.5, "params": {"prune_first_conv": True}}]} + ) + if enable_quantization: + config["compression"].append({"algorithm": "quantization", "preset": "mixed"}) + + compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config, force_no_init=True) + inference_model = compression_ctrl.prepare_for_inference(do_copy=do_copy) + + # Transform model for pruning creates copy of the model in both cases + assert id(inference_model) != id(compression_model) diff --git a/tests/tensorflow/quantization/test_prepare_for_inference.py b/tests/tensorflow/quantization/test_prepare_for_inference.py new file mode 100644 index 00000000000..568600d83d3 --- /dev/null +++ b/tests/tensorflow/quantization/test_prepare_for_inference.py @@ -0,0 +1,61 @@ +""" + Copyright (c) 2023 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import pytest +import tensorflow as tf + +from tests.tensorflow.helpers import TFTensorListComparator +from tests.tensorflow.helpers import create_compressed_model_and_algo_for_test +from tests.tensorflow.helpers import get_basic_two_conv_test_model +from tests.tensorflow.quantization.utils import get_basic_quantization_config + + +def test_prepare_for_inference(): + model = get_basic_two_conv_test_model() + config = get_basic_quantization_config() + config["compression"] = { + "algorithm": "quantization", + "preset": "mixed", + "initializer": { + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 0, + } + } + } + + compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) + + input_tensor = tf.ones([1, 4, 4, 1]) + x_nncf = compressed_model(input_tensor) + + inference_model = compression_ctrl.prepare_for_inference() + x_tf = inference_model(input_tensor) + + TFTensorListComparator.check_equal(x_nncf, x_tf) + + +@pytest.mark.parametrize("do_copy", (True, False)) +def test_do_copy(do_copy): + model = get_basic_two_conv_test_model() + config = get_basic_quantization_config() + config["compression"] = { + "algorithm": "quantization", + "preset": "mixed", + } + compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config, force_no_init=True) + inference_model = compression_ctrl.prepare_for_inference(do_copy=do_copy) + + if do_copy: + assert id(inference_model) != id(compression_model) + else: + assert id(inference_model) == id(compression_model) diff --git a/tests/tensorflow/sparsity/test_prepare_for_inference.py b/tests/tensorflow/sparsity/test_prepare_for_inference.py new file mode 100644 index 00000000000..f59971aead3 --- /dev/null +++ b/tests/tensorflow/sparsity/test_prepare_for_inference.py @@ -0,0 +1,69 @@ +""" + Copyright (c) 2023 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import pytest +import tensorflow as tf + +from tests.tensorflow.helpers import TFTensorListComparator +from tests.tensorflow.helpers import create_compressed_model_and_algo_for_test +from tests.tensorflow.helpers import get_basic_conv_test_model +from tests.tensorflow.helpers import get_empty_config + + +@pytest.mark.parametrize("enable_quantization", (True, False), ids=("with_quantization", "no_quantization")) +def test_prepare_for_inference(enable_quantization): + input_shape = (1, 4, 4, 1) + model = get_basic_conv_test_model() + config = get_empty_config(input_sample_sizes=input_shape) + + config.update({"compression": [{"algorithm": "magnitude_sparsity"}]}) + if enable_quantization: + config["compression"].append( + { + "algorithm": "quantization", + "preset": "mixed", + "initializer": { + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 0, + } + }, + } + ) + + compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) + + input_tensor = tf.ones(input_shape) + x_nncf = compressed_model(input_tensor) + + inference_model = compression_ctrl.prepare_for_inference() + x_tf = inference_model(input_tensor) + + TFTensorListComparator.check_equal(x_nncf, x_tf) + + +@pytest.mark.parametrize("do_copy", (True, False)) +@pytest.mark.parametrize("enable_quantization", (True, False), ids=("with_quantization", "no_quantization")) +def test_do_copy(do_copy, enable_quantization): + input_shape = (1, 4, 4, 1) + model = get_basic_conv_test_model(input_shape=input_shape[1:]) + + config = get_empty_config(input_sample_sizes=input_shape) + config.update({"compression": [{"algorithm": "magnitude_sparsity"}]}) + if enable_quantization: + config["compression"].append({"algorithm": "quantization", "preset": "mixed"}) + + compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config, force_no_init=True) + inference_model = compression_ctrl.prepare_for_inference(do_copy=do_copy) + + # Transform model for sparsity creates copy of the model in both cases + assert id(inference_model) != id(compression_model) diff --git a/tests/torch/pruning/filter_pruning/test_prepare_for_inference.py b/tests/torch/pruning/filter_pruning/test_prepare_for_inference.py index 90b162731d5..cc576c0215c 100644 --- a/tests/torch/pruning/filter_pruning/test_prepare_for_inference.py +++ b/tests/torch/pruning/filter_pruning/test_prepare_for_inference.py @@ -67,16 +67,16 @@ def test_prepare_for_inference_pruning(enable_quantization): assert torch.equal(x_nncf, x_torch), f"{x_nncf=} != {x_torch}" -@pytest.mark.parametrize("make_model_copy", (True, False)) +@pytest.mark.parametrize("do_copy", (True, False)) @pytest.mark.parametrize("enable_quantization", (True, False), ids=("with_quantization", "no_quantization")) -def test_make_model_copy(make_model_copy, enable_quantization): +def test_do_copy(do_copy, enable_quantization): model = BasicConvTestModel() config = _get_config_for_algo(model.INPUT_SIZE, enable_quantization) compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) - inference_model = compression_ctrl.prepare_for_inference(make_model_copy=make_model_copy) + inference_model = compression_ctrl.prepare_for_inference(do_copy=do_copy) - if make_model_copy: + if do_copy: assert id(inference_model) != id(compressed_model) else: assert id(inference_model) == id(compressed_model) diff --git a/tests/torch/quantization/test_prepare_for_inference.py b/tests/torch/quantization/test_prepare_for_inference.py index 72c04f07e6a..01f07fdfa09 100644 --- a/tests/torch/quantization/test_prepare_for_inference.py +++ b/tests/torch/quantization/test_prepare_for_inference.py @@ -261,16 +261,16 @@ def test_prepare_for_inference_quantization(mode, overflow_fix, num_bits): assert torch.all(torch.isclose(x_nncf, x_torch)), f"{x_nncf.view(-1)} != {x_torch.view(-1)}" -@pytest.mark.parametrize("make_model_copy", (True, False)) -def test_make_model_copy(make_model_copy): +@pytest.mark.parametrize("do_copy", (True, False)) +def test_do_copy(do_copy): model = BasicConvTestModel() config = _get_config_for_algo(model.INPUT_SIZE) register_bn_adaptation_init_args(config) compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) - inference_model = compression_ctrl.prepare_for_inference(make_model_copy=make_model_copy) + inference_model = compression_ctrl.prepare_for_inference(do_copy=do_copy) - if make_model_copy: + if do_copy: assert id(inference_model) != id(compressed_model) else: assert id(inference_model) == id(compressed_model) diff --git a/tests/torch/quantization/test_sanity_sample.py b/tests/torch/quantization/test_sanity_sample.py index 1027644b801..97e5e0dcc5d 100644 --- a/tests/torch/quantization/test_sanity_sample.py +++ b/tests/torch/quantization/test_sanity_sample.py @@ -275,6 +275,8 @@ def setup_spy(self, mocker): create_model_location = sample_location + '.create_compressed_model' create_model_patch = mocker.patch(create_model_location) + self._torch_onn_export_mock = mocker.patch('torch.onnx.export') + if self._desc.sample_type_ == SampleType.CLASSIFICATION_STAGED: mocker.patch(sample_location + '.get_quantization_optimizer') @@ -287,10 +289,11 @@ def fn(*args, **kwargs): def validate_spy(self): super().validate_spy() self._reg_init_args_patch.assert_called() + if self.is_export_called: - self._ctrl_mock.export_model.assert_called_once() + self._torch_onn_export_mock.assert_called_once() else: - self._ctrl_mock.export_model.assert_not_called() + self._torch_onn_export_mock.assert_not_called() EXPORT_TEST_CASE_DESCRIPTORS = [ diff --git a/tests/torch/sparsity/test_prepare_for_inference.py b/tests/torch/sparsity/test_prepare_for_inference.py index 2b651699e78..383dafbc8fa 100644 --- a/tests/torch/sparsity/test_prepare_for_inference.py +++ b/tests/torch/sparsity/test_prepare_for_inference.py @@ -81,16 +81,16 @@ def test_prepare_for_inference_sparsity(enable_quantization): assert torch.all(torch.isclose(x_nncf, x_torch)), f"{x_nncf.view(-1)} != {x_torch.view(-1)}" -@pytest.mark.parametrize("make_model_copy", (True, False)) +@pytest.mark.parametrize("do_copy", (True, False)) @pytest.mark.parametrize("enable_quantization", (True, False), ids=("with_quantization", "no_quantization")) -def test_make_model_copy(make_model_copy, enable_quantization): +def test_do_copy(do_copy, enable_quantization): model = BasicConvTestModel() config = _get_config_for_algo(model.INPUT_SIZE, enable_quantization) compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) - inference_model = compression_ctrl.prepare_for_inference(make_model_copy=make_model_copy) + inference_model = compression_ctrl.prepare_for_inference(do_copy=do_copy) - if make_model_copy: + if do_copy: assert id(inference_model) != id(compressed_model) else: assert id(inference_model) == id(compressed_model) @@ -108,7 +108,7 @@ def test_corruption_binary_masks(): ref_mask_1 = torch.clone(compression_ctrl.sparsified_module_info[0].operand.binary_mask) ref_mask_2 = torch.clone(compression_ctrl.sparsified_module_info[1].operand.binary_mask) - compression_ctrl.prepare_for_inference(make_model_copy=False) + compression_ctrl.prepare_for_inference(do_copy=False) after_mask_1 = compression_ctrl.sparsified_module_info[0].operand.binary_mask after_mask_2 = compression_ctrl.sparsified_module_info[1].operand.binary_mask @@ -137,7 +137,7 @@ def tests_weights_after_onnx_export(tmp_path): weights_sparse.append(data) onnx_sparse_model_prepare_path = f"{tmp_path}/sparse_model_prepare.onnx" - compression_ctrl.prepare_for_inference(make_model_copy=False) + compression_ctrl.prepare_for_inference(do_copy=False) compression_ctrl.export_model(onnx_sparse_model_prepare_path, "onnx") onnx_prepare_model = onnx.load(onnx_sparse_model_prepare_path)