Add torch.load warnings and path resolution (#458)

openvinotoolkit · Jan 29, 2021 · 23727a5 · 23727a5
1 parent abc9b44
commit 23727a5
Show file tree

Hide file tree

Showing 14 changed files with 47 additions and 50 deletions.
diff --git a/README.md b/README.md
@@ -108,6 +108,11 @@ pip install nncf
 #### As a Docker image
 Use one of the Dockerfiles in the [docker](./docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](#model-compression-samples).
 
+**NOTE**: If you want to use sample training scripts provided in the NNCF repository under `examples`, you should install the corresponding Python package dependencies:
+```
+pip install examples/requirements.txt
+```
+
 ## Contributing
 Refer to the [CONTRIBUTING.md](./CONTRIBUTING.md) file for guidelines on contributions to the NNCF repository.
 

diff --git a/examples/classification/README.md b/examples/classification/README.md
@@ -39,6 +39,8 @@ python main.py -m test --config=configs/quantization/mobilenet_v2_imagenet_int8.
 ```
 To validate an FP32 model checkpoint, make sure the compression algorithm settings are empty in the configuration file or `pretrained=True` is set.
 
+**WARNING**: The samples use `torch.load` functionality for checkpoint loading which, in turn, uses pickle facilities by default which are known to be vulnerable to arbitrary code execution attacks. **Only load the data you trust**
+
 #### Export Compressed Model
 
 To export trained model to the ONNX format, use the following command:

diff --git a/examples/common/model_loader.py b/examples/common/model_loader.py
@@ -24,6 +24,13 @@
 
 def load_model(model, pretrained=True, num_classes=1000, model_params=None,
                weights_path: str = None) -> torch.nn.Module:
+    """
+
+       ** WARNING: This is implemented using torch.load functionality,
+       which itself uses Python's pickling facilities that may be used to perform
+       arbitrary code execution during unpickling. Only load the data you trust.
+
+    """
     logger.info("Loading model: {}".format(model))
     if model_params is None:
         model_params = {}

diff --git a/examples/common/sample_config.py b/examples/common/sample_config.py
@@ -10,13 +10,15 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
+from pathlib import Path
 
 from addict import Dict
 
 import argparse
 import os
 
 from nncf import NNCFConfig
+from nncf.common.os import safe_open
 
 try:
     import jstyleson as json
@@ -74,7 +76,8 @@ def parse_known_args(self, args=None, namespace=None):
 class SampleConfig(Dict):
     @classmethod
     def from_json(cls, path) -> 'SampleConfig':
-        with open(path) as f:
+        file_path = Path(path).resolve()
+        with safe_open(file_path) as f:
             loaded_json = json.load(f)
         return cls(loaded_json)
 

diff --git a/examples/object_detection/README.md b/examples/object_detection/README.md
@@ -29,6 +29,8 @@ To estimate the test scores of your model checkpoint use the following command:
 `python main.py -m test --config=configs/ssd300_vgg_int8_voc.json --data <path_to_dataset> --resume <path_to_trained_model_checkpoint>`
 If you want to validate an FP32 model checkpoint, make sure the compression algorithm settings are empty in the configuration file or `pretrained=True` is set.
 
+**WARNING**: The samples use `torch.load` functionality for checkpoint loading which, in turn, uses pickle facilities by default which are known to be vulnerable to arbitrary code execution attacks. **Only load the data you trust**
+
 #### Export compressed model
 To export trained model to ONNX format use the following command:
 `python main.py -m test --config configs/ssd300_vgg_int8_voc.json --data <path_to_dataset> --resume <path_to_compressed_model_checkpoint> --to-onnx=../../results/ssd300_int8.onnx`

diff --git a/examples/object_detection/models/ssd_mobilenet.py b/examples/object_detection/models/ssd_mobilenet.py
@@ -106,6 +106,11 @@ def build_ssd_mobilenet(cfg, size, num_classes, config):
 
     if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None):
         logger.debug('Loading base network...')
+        #
+        # ** WARNING: torch.load functionality uses Python's pickling facilities that
+        # may be used to perform arbitrary code execution during unpickling. Only load the data you
+        # trust.
+        #
         basenet_weights = torch.load(config.basenet)['state_dict']
         new_weights = {}
         for wn, wv in basenet_weights.items():

diff --git a/examples/object_detection/models/ssd_vgg.py b/examples/object_detection/models/ssd_vgg.py
@@ -74,6 +74,11 @@ def load_weights(self, base_file):
         _, ext = os.path.splitext(base_file)
         if ext == '.pkl' or '.pth':
             logger.debug('Loading weights into state dict...')
+            #
+            # ** WARNING: torch.load functionality uses Python's pickling facilities that
+            # may be used to perform arbitrary code execution during unpickling. Only load the data you
+            # trust.
+            #
             self.load_state_dict(torch.load(base_file,
                                             map_location=lambda storage, loc: storage))
             logger.debug('Finished!')

diff --git a/examples/semantic_segmentation/README.md b/examples/semantic_segmentation/README.md
@@ -31,6 +31,8 @@ To estimate the test scores of your model checkpoint use the following command:
 `python main.py -m test --config=configs/unet_mapillary_int8.json --resume <path_to_trained_model_checkpoint>`
 If you want to validate an FP32 model checkpoint, make sure the compression algorithm settings are empty in the configuration file or `pretrained=True` is set.
 
+**WARNING**: The samples use `torch.load` functionality for checkpoint loading which, in turn, uses pickle facilities by default which are known to be vulnerable to arbitrary code execution attacks. **Only load the data you trust**
+
 #### Export compressed model
 To export trained model to ONNX format use the following command:
 `python main.py --mode test --config configs/unet_mapillary_int8.json --data <path_to_dataset> --resume <path_to_compressed_model_checkpoint> --to-onnx unet_int8.onnx`

diff --git a/examples/semantic_segmentation/utils/checkpoint.py b/examples/semantic_segmentation/utils/checkpoint.py
@@ -77,6 +77,12 @@ def load_checkpoint(model, model_path, device_name, optimizer=None, compression_
         model_path), "The model file \"{0}\" doesn't exist.".format(model_path)
 
     # Load the stored model parameters to the model instance
+
+    #
+    # ** WARNING: torch.load functionality uses Python's pickling facilities that
+    # may be used to perform arbitrary code execution during unpickling. Only load the data you
+    # trust.
+    #
     checkpoint = torch.load(model_path, map_location=device_name)
     load_state(model, checkpoint['state_dict'], is_resume=True)
     if optimizer is not None:

diff --git a/nncf/common/os.py b/nncf/common/os.py
@@ -16,7 +16,8 @@
 
 @contextmanager
 def safe_open(file: Path, *args, **kwargs):
-    # For security reasons, should not follow symlinks.
+    # For security reasons, should not follow symlinks. Use .resolve() on any Path
+    # objects before passing them here.
     if file.is_symlink():
         raise RuntimeError("File {} is a symbolic link, aborting.".format(str(file)))
     with open(str(file), *args, **kwargs) as f:

diff --git a/nncf/config/config.py b/nncf/config/config.py
@@ -52,7 +52,7 @@ def from_dict(cls, nncf_dict):
 
     @classmethod
     def from_json(cls, path) -> 'NNCFConfig':
-        file_path = Path(path)
+        file_path = Path(path).resolve()
         with safe_open(file_path) as f:
             loaded_json = json.load(f)
         return cls.from_dict(loaded_json)

diff --git a/nncf/hw_config.py b/nncf/hw_config.py
@@ -126,7 +126,7 @@ def from_dict(cls, dct: dict):
 
     @classmethod
     def from_json(cls, path):
-        file_path = Path(path)
+        file_path = Path(path).resolve()
         with safe_open(file_path) as f:
             json_config = json.load(f, object_pairs_hook=OrderedDict)
             return HWConfig.from_dict(json_config)

diff --git a/tests/test_api_behavior.py b/tests/test_api_behavior.py
@@ -10,15 +10,12 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-import abc
-from pathlib import Path
 
 import pytest
 import torch
 from torch.utils.data import DataLoader
 
 from nncf import register_default_init_args, NNCFConfig
-from nncf.hw_config import HWConfig
 from nncf.quantization.quantizer_setup import SingleConfigQuantizerSetup
 from nncf.tensor_statistics.algo import TensorStatisticsCollectionBuilder, TensorStatisticsCollectionController
 
@@ -133,46 +130,3 @@ def forward(self, x):
 def test_model_is_inited_with_own_device_by_default(nncf_config_with_default_init_args, original_device):
     model = DeviceCheckingModel(original_device)
     create_compressed_model_and_algo_for_test(model, nncf_config_with_default_init_args)
-
-
-@pytest.fixture(name='tmp_symlink_path')
-def tmp_symlink_path_(tmpdir) -> Path:
-    tmpdir_path = Path(tmpdir)
-    tmp_file_path = tmpdir_path / "tmp_file"
-    tmp_file_path.touch()
-    symlink_path = tmpdir_path / "symlink"
-    symlink_path.symlink_to(tmp_file_path)
-    return symlink_path
-
-
-class MockedFilePathConsumer(abc.ABC):
-    test_case_name = None
-
-    @abc.abstractmethod
-    def consume_file_path(self, path):
-        pass
-
-class HWConfigPathConsumer(MockedFilePathConsumer):
-    test_case_name = 'HWConfig'
-
-    def consume_file_path(self, path):
-        HWConfig.from_json(str(path))
-
-
-class NNCFConfigPathConsumer(MockedFilePathConsumer):
-    test_case_name = 'NNCFConfig'
-    def consume_file_path(self, path):
-        NNCFConfig.from_json(str(path))
-
-PATH_CONSUMERS = [HWConfigPathConsumer(),
-                  NNCFConfigPathConsumer()]
-
-@pytest.fixture(params=PATH_CONSUMERS, ids=[pc.test_case_name for pc in PATH_CONSUMERS],
-                name='path_consumer')
-def path_consumer_(request):
-    return request.param
-
-
-def test_symlink_paths_are_not_followed(tmp_symlink_path, path_consumer: MockedFilePathConsumer):
-    with pytest.raises(RuntimeError):
-        path_consumer.consume_file_path(tmp_symlink_path)
diff --git a/tests/test_models/ssd_vgg.py b/tests/test_models/ssd_vgg.py
@@ -156,6 +156,11 @@ def build_ssd_vgg(cfg, size, num_classes, config):
 
     if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None):
         print('Loading base network...')
+        #
+        # ** WARNING: torch.load functionality uses Python's pickling facilities that
+        # may be used to perform arbitrary code execution during unpickling. Only load the data you
+        # trust.
+        #
         basenet_weights = torch.load(config.basenet)
         new_weights = {}
         for wn, wv in basenet_weights.items():