Merge branch 'openvinotoolkit:develop' into develop

anzr299 · Sep 18, 2024 · 415a222 · 415a222
2 parents 74d8f4c + a39e673
commit 415a222
Show file tree

Hide file tree

Showing 21 changed files with 85 additions and 107 deletions.
diff --git a/.github/workflows/model_hub.yml b/.github/workflows/model_hub.yml
@@ -14,7 +14,7 @@ jobs:
       - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
       - name: Install NNCF and test requirements
         run: make install-models-hub-torch
 

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
@@ -15,7 +15,7 @@ jobs:
       - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
       - name: Install NNCF
         run: pip install -e .
       - name: Install mypy

diff --git a/.github/workflows/pre-commit-linters.yml b/.github/workflows/pre-commit-linters.yml
@@ -15,7 +15,7 @@ jobs:
       - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
       - name: Install pre-commit package
         run: make install-pre-commit
       - name: Run pre-commit linter suite

diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml
@@ -23,10 +23,12 @@ jobs:
           lfs: true
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
           cache: pip
       - name: Install NNCF and test requirements
         run: make install-common-test
+      - name: Print installed modules
+        run: pip list
       - name: Run common precommit test scope
         run: make test-common
         env:
@@ -40,9 +42,11 @@ jobs:
           lfs: true
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
       - name: Install NNCF and test requirements
         run: make install-onnx-test
+      - name: Print installed modules
+        run: pip list
       - name: Run ONNX precommit test scope
         run: make test-onnx
         env:
@@ -56,10 +60,12 @@ jobs:
           lfs: true
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
           cache: pip
       - name: Install NNCF and test requirements
         run: make install-openvino-test
+      - name: Print installed modules
+        run: pip list
       - name: Run OV precommit test scope
         run: make test-openvino
         env:
@@ -83,7 +89,7 @@ jobs:
           lfs: true
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
           cache: pip
       - name: Runner info
         continue-on-error: true
@@ -92,6 +98,8 @@ jobs:
           cat /proc/cpuinfo
       - name: Install NNCF and test requirements
         run: make install-torch-test
+      - name: Print installed modules
+        run: pip list
       - name: Run PyTorch precommit test scope
         run: |
           make test-torch-cpu
@@ -128,10 +136,12 @@ jobs:
           lfs: true
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
           cache: pip
       - name: Install NNCF and test requirements
         run: make install-torch-test
+      - name: Print installed modules
+        run: pip list
       - name: Check CUDA
         run: |
           python -c "import torch; print(torch.cuda.is_available())"
@@ -155,13 +165,14 @@ jobs:
           lfs: true
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.9.19
+          python-version: 3.10.14
           cache: pip
       - name: Install NNCF and test requirements
         run: make install-tensorflow-test
+      - name: Print installed modules
+        run: pip list
       - name: Run TensorFlow precommit test scope
-        run: |
-          make test-tensorflow
+        run: make test-tensorflow
         env:
           NUM_WORKERS: 6
 
@@ -173,10 +184,11 @@ jobs:
           lfs: true
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8.18
+          python-version: 3.10.14
           cache: pip
       - name: Install NNCF and test requirements
-        run: |
-          pip install -r tests/tools/requirements.txt
+        run: pip install -r tests/tools/requirements.txt
+      - name: Print installed modules
+        run: pip list
       - name: Run tools precommit test scope
         run: PYTHONPATH=./ pytest -ra tests/tools
diff --git a/README.md b/README.md
@@ -444,7 +444,7 @@ conda install -c conda-forge nncf
   - ONNX\* ==1.16.0
   - OpenVINO\* >=2022.3.0
 
-This repository is tested on Python* 3.8.10, PyTorch* 2.4.0 (NVidia CUDA\* Toolkit 12.1) and TensorFlow* 2.12.1 (NVidia CUDA\* Toolkit 11.8).
+This repository is tested on Python* 3.10.14, PyTorch* 2.4.0 (NVidia CUDA\* Toolkit 12.1) and TensorFlow* 2.12.1 (NVidia CUDA\* Toolkit 11.8).
 
 ## NNCF Compressed NNCF Model Zoo
 

diff --git a/docs/Installation.md b/docs/Installation.md
@@ -43,7 +43,7 @@ as well as the supported versions of Python:
 
 | NNCF      | OpenVINO   | PyTorch  | ONNX     | TensorFlow | Python |
 |-----------|------------|----------|----------|------------|--------|
-| `develop` | `2024.3.0` | `2.4.0`  | `1.16.0` | `2.15.1`   | `3.8`* |
+| `develop` | `2024.3.0` | `2.4.0`  | `1.16.0` | `2.15.1`   | `3.10` |
 | `2.12.0`  | `2024.3.0` | `2.3.0`  | `1.16.0` | `2.15.1`   | `3.8`* |
 | `2.11.0`  | `2024.2.0` | `2.3.0`  | `1.16.0` | `2.12.0`   | `3.8`  |
 | `2.10.0`  | `2024.1.0` | `2.2.1`  | `1.16.0` | `2.12.0`   | `3.8`  |

diff --git a/docs/usage/post_training_compression/weights_compression/Usage.md b/docs/usage/post_training_compression/weights_compression/Usage.md
@@ -62,7 +62,7 @@ compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_SYM, ra
 ```
 
 - Accuracy of the 4-bit compressed models also can be improved by using AWQ, Scale Estimation, GPTQ or Lora Correction algorithms over data-based mixed-precision algorithm. These algorithms work by equalizing a subset of weights to minimize the difference between the original precision and the 4-bit precision.
-Unlike all others, the Lora Correction algorithm inserts an additional Linear layers for reducing quantization noise and further accuracy improvement. Inevitably, this approach introduces a memory and a runtime overheads, but they are negligible, since the inserted weight much smaller and can be quantized to 8-bit. The AWQ, Scale Estimation (SE) and Lora Correction (LC) algo can be used in any combination together: AWQ + SE, AWQ + LC, SE + LC, AWQ + SE + LC. The GPTQ algorithm can be combined with AWQ only. Below are examples demonstrating how to enable the AWQ, Scale Estimation, GPTQ or Lora Correction algorithms:
+Unlike all others, the Lora Correction algorithm inserts an additional Linear layers for reducing quantization noise and further accuracy improvement. Inevitably, this approach introduces a memory and a runtime overheads, but they are negligible, since the inserted weight much smaller and can be quantized to 8-bit. The AWQ, Scale Estimation (SE) and Lora Correction (LC) algo can be used in any combination together: AWQ + SE, AWQ + LC, SE + LC, AWQ + SE + LC. The GPTQ algorithm can be combined with AWQ and Scale Estimation in any combination: AWQ + GPTQ, GPTQ + SE, AWQ + GPTQ + SE. Below are examples demonstrating how to enable the AWQ, Scale Estimation, GPTQ or Lora Correction algorithms:
 
   Prepare the calibration dataset for data-based algorithms:
 
@@ -432,7 +432,7 @@ This modification applies only for patterns `MatMul-Multiply-MatMul` (for exampl
 #### Scale Estimation and GPTQ methods on Lambada OpenAI dataset
 
 Here is the perplexity and accuracy with data-free and data-aware mixed-precision INT4-INT8 weight compression for different language models on the [lambada openai dataset](https://huggingface.co/datasets/EleutherAI/lambada_openai).
-`_scale` suffix refers to the data-aware mixed-precision with Scale Estimation algorithm. `_gptq` suffix refers to the data-aware mixed-precision with GPTQ algorithm.
+`_scale` suffix refers to the data-aware mixed-precision with Scale Estimation algorithm. `_gptq` suffix refers to the data-aware mixed-precision with GPTQ algorithm. `_gptq_scale` suffix refers to the use of GPTQ algorithm with the Scale estimation algorithm to calculate the quantization parameters.
 `r100` means that embeddings and lm_head have INT8 precision and all other linear layers have INT4 precision.
 <table>
     <tr bgcolor='#B4B5BB'>
@@ -447,6 +447,12 @@ Here is the perplexity and accuracy with data-free and data-aware mixed-precisio
         <td>0.5925</td>
         <td>6.3024</td>
     </tr>
+    <tr>
+        <td></td>
+        <td>int4_sym_r100_gs64_gptq_scale</td>
+        <td>0.5795</td>
+        <td>7.1507</td>
+    </tr>
     <tr>
         <td></td>
         <td>int4_sym_r100_gs64_gptq</td>
@@ -477,7 +483,13 @@ Here is the perplexity and accuracy with data-free and data-aware mixed-precisio
         <td>0.595</td>
         <td>7.037</td>
     </tr>
-        <tr>
+    <tr>
+        <td></td>
+        <td>int4_sym_r100_gs64_gptq_scale</td>
+        <td>0.5909</td>
+        <td>7.391</td>
+    </tr>
+    <tr>
         <td></td>
         <td>int4_sym_r100_gs64_gptq</td>
         <td>0.567</td>
@@ -495,6 +507,12 @@ Here is the perplexity and accuracy with data-free and data-aware mixed-precisio
         <td>0.6839</td>
         <td>4.1681</td>
     </tr>
+    <tr>
+        <td></td>
+        <td>int4_sym_r100_gs128_gptq_scale</td>
+        <td>0.6757</td>
+        <td>4.5107</td>
+    </tr>
     <tr>
         <td></td>
         <td>int4_sym_r100_gs128_scale</td>

diff --git a/nncf/common/accuracy_aware_training/runner.py b/nncf/common/accuracy_aware_training/runner.py
@@ -42,7 +42,7 @@
 Image = Any  # Default type for Image.
 
 try:
-    import matplotlib.pyplot as plt  # type: ignore
+    import matplotlib.pyplot as plt
     import PIL.Image
     from PIL.Image import Image
 

diff --git a/nncf/common/insertion_point_graph.py b/nncf/common/insertion_point_graph.py
@@ -73,16 +73,13 @@ class InsertionPointGraph(nx.DiGraph):
     def __init__(
         self,
         nncf_graph: NNCFGraph,
-        weight_modifiable_node_names: List[NNCFNodeName] = None,
         allowed_pre_hook_insertion_points: List[PreHookInsertionPoint] = None,
         allowed_post_hook_insertion_points: List[PostHookInsertionPoint] = None,
     ):
         """
         Initializes the insertion point graph.
 
         :param nncf_graph: The base NNCFGraph representing the model structure.
-        :param weight_modifiable_node_names: Names of the nodes in `nncf_graph` that correspond to operations with
-          modifiable weights.
         :param allowed_pre_hook_insertion_points: A list of pre-hook insertion points for this graph to allow.
           If left unspecified, every node in `nncf_graph` will be allowed to have a separate pre-hook for each of its
           tensor inputs.
@@ -93,10 +90,6 @@ def __init__(
 
         super().__init__()
         self._base_nx_graph = deepcopy(nncf_graph.get_nx_graph_copy())
-        if weight_modifiable_node_names is None:
-            self._weight_modifiable_node_names = []
-        else:
-            self._weight_modifiable_node_names = weight_modifiable_node_names
 
         if allowed_pre_hook_insertion_points is None:
             allowed_pre_hook_insertion_points = self._get_default_pre_hook_ip_list(nncf_graph)
@@ -236,10 +229,6 @@ def __init__(
                     for follower_node_key in self.successors(from_node_key):
                         self.edges[from_node_key, follower_node_key][self.IS_INTEGER_PATH_EDGE_ATTR] = True
 
-    @property
-    def weight_modifiable_node_names(self) -> List[NNCFNodeName]:
-        return self._weight_modifiable_node_names
-
     @staticmethod
     def _get_default_pre_hook_ip_list(nncf_graph: NNCFGraph) -> List[PreHookInsertionPoint]:
         # Pre-hook all input ports of all nodes

diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py
@@ -211,7 +211,7 @@ class AdvancedQuantizationParameters:
         It regulates the calculation of the smooth scale. The default value stored in AdvancedSmoothQuantParameters.
         A negative value for each field switches off type smoothing. In case of inaccurate results,
         fields may be adjusted in the range from 0 to 1 or set -1 to disable smoothing for type.
-    :type smooth_quant_alpha: nncf.quantization.advanced_parameters.AdvancedSmoothQuantParameters
+    :type smooth_quant_alphas: nncf.quantization.advanced_parameters.AdvancedSmoothQuantParameters
     :param smooth_quant_alpha: Deprecated SmoothQuant-related parameter.
     :type smooth_quant_alpha: float
     :param backend_params: Backend-specific parameters.

diff --git a/nncf/tensorflow/quantization/algorithm.py b/nncf/tensorflow/quantization/algorithm.py
@@ -604,7 +604,7 @@ def _get_quantizer_propagation_solution(
         custom_layer_node_names: List[NNCFNodeName],
         model: tf.keras.Model,
     ) -> SingleConfigQuantizerSetup:
-        ip_graph = InsertionPointGraph(nncf_graph, [qn.node.node_name for qn in quantizable_weighted_layer_nodes])
+        ip_graph = InsertionPointGraph(nncf_graph)
 
         pattern = TF_HW_FUSED_PATTERNS.get_full_pattern_graph()
         ip_graph = ip_graph.get_ip_graph_with_merged_hw_optimized_operations(pattern)

diff --git a/nncf/torch/nncf_network.py b/nncf/torch/nncf_network.py
@@ -679,12 +679,8 @@ def get_original_insertion_point_graph(self) -> InsertionPointGraph:
             post_hook_ip = PostHookInsertionPoint(node.node_name)
             post_hooks.append(post_hook_ip)
 
-        weighted_nodes = self.get_weighted_original_graph_nodes()
-        weighted_node_names = [weighted_node.node_name for weighted_node in weighted_nodes]
-
         ip_graph = InsertionPointGraph(
             self._original_graphs_pair.nncf_graph,
-            weight_modifiable_node_names=weighted_node_names,
             allowed_pre_hook_insertion_points=pre_hooks,
             allowed_post_hook_insertion_points=post_hooks,
         )

diff --git a/tests/common/quantization/mock_graphs.py b/tests/common/quantization/mock_graphs.py
@@ -17,7 +17,6 @@
 
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
-from nncf.common.graph import NNCFNodeName
 from nncf.common.graph.layer_attributes import BaseLayerAttributes
 from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes
 from nncf.common.graph.layer_attributes import Dtype
@@ -359,7 +358,7 @@ def get_mock_model_graph_with_broken_output_edge_pattern(
     return get_nncf_graph_from_mock_nx_graph(mock_nx_graph)
 
 
-def get_ip_graph_for_test(nncf_graph: NNCFGraph, weighted_node_names: List[NNCFNodeName] = None) -> InsertionPointGraph:
+def get_ip_graph_for_test(nncf_graph: NNCFGraph) -> InsertionPointGraph:
     pre_hooks = []
     post_hooks = []
     for node in nncf_graph.get_all_nodes():
@@ -374,14 +373,8 @@ def get_ip_graph_for_test(nncf_graph: NNCFGraph, weighted_node_names: List[NNCFN
         ip = PostHookInsertionPoint(node.node_name)
         post_hooks.append(ip)
 
-    weighted_target_points = None
-    if weighted_node_names is not None:
-        weighted_target_points = []
-        for name in weighted_node_names:
-            weighted_target_points.append(name)
     ip_graph = InsertionPointGraph(
         nncf_graph,
-        weight_modifiable_node_names=weighted_target_points,
         allowed_pre_hook_insertion_points=pre_hooks,
         allowed_post_hook_insertion_points=post_hooks,
     )

diff --git a/tests/common/quantization/test_quantizer_propagation_graph.py b/tests/common/quantization/test_quantizer_propagation_graph.py
@@ -1689,9 +1689,7 @@ def output_quant_as_weights_test_struct(self, request):
 
     @pytest.fixture
     def model_graph_qpsg(self):
-        ip_graph = get_ip_graph_for_test(
-            MODEL_GRAPH, weighted_node_names=[node.node_name for node in MODEL_GRAPH.get_all_nodes()]
-        )
+        ip_graph = get_ip_graph_for_test(MODEL_GRAPH)
         quant_prop_graph = QPSG(ip_graph)
         return quant_prop_graph
 

diff --git a/tests/common/quantization/test_quantizer_propagation_solver.py b/tests/common/quantization/test_quantizer_propagation_solver.py
@@ -1932,7 +1932,7 @@ def test_metatypes_to_ignore(mocker):
         nncf_graph.add_edge_between_nncf_nodes(
             nodes[idx - 1].node_id, nodes[idx].node_id, [1, 1, 1, 1], 0, 0, Dtype.FLOAT
         )
-    ip_graph = InsertionPointGraph(nncf_graph=nncf_graph, weight_modifiable_node_names=["A", "B", "C"])
+    ip_graph = InsertionPointGraph(nncf_graph=nncf_graph)
 
     solver = QuantizerPropagationSolver(
         metatypes_to_ignore=[IGNORED_METATYPE],

diff --git a/tests/cross_fw/examples/conftest.py b/tests/cross_fw/examples/conftest.py
@@ -31,6 +31,7 @@ def pytest_addoption(parser):
     parser.addoption(
         "--ov_version_override", default=None, help="Parameter to set OpenVINO into the env with the version from PyPI"
     )
+    parser.addoption("--data", type=str, default=None, help="Path to test datasets")
 
 
 @pytest.fixture(scope="module")
@@ -46,3 +47,8 @@ def is_check_performance(request):
 @pytest.fixture(scope="module")
 def ov_version_override(request):
     return request.config.getoption("--ov_version_override")
+
+
+@pytest.fixture(scope="module")
+def data(request):
+    return request.config.getoption("--data")