Skip to content

Commit

Permalink
Add torch.load warnings and path resolution (#458)
Browse files Browse the repository at this point in the history
  • Loading branch information
vshampor committed Jan 29, 2021
1 parent abc9b44 commit 23727a5
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 50 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ pip install nncf
#### As a Docker image
Use one of the Dockerfiles in the [docker](./docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](#model-compression-samples).

**NOTE**: If you want to use sample training scripts provided in the NNCF repository under `examples`, you should install the corresponding Python package dependencies:
```
pip install examples/requirements.txt
```

## Contributing
Refer to the [CONTRIBUTING.md](./CONTRIBUTING.md) file for guidelines on contributions to the NNCF repository.

Expand Down
2 changes: 2 additions & 0 deletions examples/classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ python main.py -m test --config=configs/quantization/mobilenet_v2_imagenet_int8.
```
To validate an FP32 model checkpoint, make sure the compression algorithm settings are empty in the configuration file or `pretrained=True` is set.

**WARNING**: The samples use `torch.load` functionality for checkpoint loading which, in turn, uses pickle facilities by default which are known to be vulnerable to arbitrary code execution attacks. **Only load the data you trust**

#### Export Compressed Model

To export trained model to the ONNX format, use the following command:
Expand Down
7 changes: 7 additions & 0 deletions examples/common/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@

def load_model(model, pretrained=True, num_classes=1000, model_params=None,
weights_path: str = None) -> torch.nn.Module:
"""
** WARNING: This is implemented using torch.load functionality,
which itself uses Python's pickling facilities that may be used to perform
arbitrary code execution during unpickling. Only load the data you trust.
"""
logger.info("Loading model: {}".format(model))
if model_params is None:
model_params = {}
Expand Down
5 changes: 4 additions & 1 deletion examples/common/sample_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
See the License for the specific language governing permissions and
limitations under the License.
"""
from pathlib import Path

from addict import Dict

import argparse
import os

from nncf import NNCFConfig
from nncf.common.os import safe_open

try:
import jstyleson as json
Expand Down Expand Up @@ -74,7 +76,8 @@ def parse_known_args(self, args=None, namespace=None):
class SampleConfig(Dict):
@classmethod
def from_json(cls, path) -> 'SampleConfig':
with open(path) as f:
file_path = Path(path).resolve()
with safe_open(file_path) as f:
loaded_json = json.load(f)
return cls(loaded_json)

Expand Down
2 changes: 2 additions & 0 deletions examples/object_detection/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ To estimate the test scores of your model checkpoint use the following command:
`python main.py -m test --config=configs/ssd300_vgg_int8_voc.json --data <path_to_dataset> --resume <path_to_trained_model_checkpoint>`
If you want to validate an FP32 model checkpoint, make sure the compression algorithm settings are empty in the configuration file or `pretrained=True` is set.

**WARNING**: The samples use `torch.load` functionality for checkpoint loading which, in turn, uses pickle facilities by default which are known to be vulnerable to arbitrary code execution attacks. **Only load the data you trust**

#### Export compressed model
To export trained model to ONNX format use the following command:
`python main.py -m test --config configs/ssd300_vgg_int8_voc.json --data <path_to_dataset> --resume <path_to_compressed_model_checkpoint> --to-onnx=../../results/ssd300_int8.onnx`
Expand Down
5 changes: 5 additions & 0 deletions examples/object_detection/models/ssd_mobilenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ def build_ssd_mobilenet(cfg, size, num_classes, config):

if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None):
logger.debug('Loading base network...')
#
# ** WARNING: torch.load functionality uses Python's pickling facilities that
# may be used to perform arbitrary code execution during unpickling. Only load the data you
# trust.
#
basenet_weights = torch.load(config.basenet)['state_dict']
new_weights = {}
for wn, wv in basenet_weights.items():
Expand Down
5 changes: 5 additions & 0 deletions examples/object_detection/models/ssd_vgg.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ def load_weights(self, base_file):
_, ext = os.path.splitext(base_file)
if ext == '.pkl' or '.pth':
logger.debug('Loading weights into state dict...')
#
# ** WARNING: torch.load functionality uses Python's pickling facilities that
# may be used to perform arbitrary code execution during unpickling. Only load the data you
# trust.
#
self.load_state_dict(torch.load(base_file,
map_location=lambda storage, loc: storage))
logger.debug('Finished!')
Expand Down
2 changes: 2 additions & 0 deletions examples/semantic_segmentation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ To estimate the test scores of your model checkpoint use the following command:
`python main.py -m test --config=configs/unet_mapillary_int8.json --resume <path_to_trained_model_checkpoint>`
If you want to validate an FP32 model checkpoint, make sure the compression algorithm settings are empty in the configuration file or `pretrained=True` is set.

**WARNING**: The samples use `torch.load` functionality for checkpoint loading which, in turn, uses pickle facilities by default which are known to be vulnerable to arbitrary code execution attacks. **Only load the data you trust**

#### Export compressed model
To export trained model to ONNX format use the following command:
`python main.py --mode test --config configs/unet_mapillary_int8.json --data <path_to_dataset> --resume <path_to_compressed_model_checkpoint> --to-onnx unet_int8.onnx`
Expand Down
6 changes: 6 additions & 0 deletions examples/semantic_segmentation/utils/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def load_checkpoint(model, model_path, device_name, optimizer=None, compression_
model_path), "The model file \"{0}\" doesn't exist.".format(model_path)

# Load the stored model parameters to the model instance

#
# ** WARNING: torch.load functionality uses Python's pickling facilities that
# may be used to perform arbitrary code execution during unpickling. Only load the data you
# trust.
#
checkpoint = torch.load(model_path, map_location=device_name)
load_state(model, checkpoint['state_dict'], is_resume=True)
if optimizer is not None:
Expand Down
3 changes: 2 additions & 1 deletion nncf/common/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

@contextmanager
def safe_open(file: Path, *args, **kwargs):
# For security reasons, should not follow symlinks.
# For security reasons, should not follow symlinks. Use .resolve() on any Path
# objects before passing them here.
if file.is_symlink():
raise RuntimeError("File {} is a symbolic link, aborting.".format(str(file)))
with open(str(file), *args, **kwargs) as f:
Expand Down
2 changes: 1 addition & 1 deletion nncf/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def from_dict(cls, nncf_dict):

@classmethod
def from_json(cls, path) -> 'NNCFConfig':
file_path = Path(path)
file_path = Path(path).resolve()
with safe_open(file_path) as f:
loaded_json = json.load(f)
return cls.from_dict(loaded_json)
Expand Down
2 changes: 1 addition & 1 deletion nncf/hw_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def from_dict(cls, dct: dict):

@classmethod
def from_json(cls, path):
file_path = Path(path)
file_path = Path(path).resolve()
with safe_open(file_path) as f:
json_config = json.load(f, object_pairs_hook=OrderedDict)
return HWConfig.from_dict(json_config)
Expand Down
46 changes: 0 additions & 46 deletions tests/test_api_behavior.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,12 @@
See the License for the specific language governing permissions and
limitations under the License.
"""
import abc
from pathlib import Path

import pytest
import torch
from torch.utils.data import DataLoader

from nncf import register_default_init_args, NNCFConfig
from nncf.hw_config import HWConfig
from nncf.quantization.quantizer_setup import SingleConfigQuantizerSetup
from nncf.tensor_statistics.algo import TensorStatisticsCollectionBuilder, TensorStatisticsCollectionController

Expand Down Expand Up @@ -133,46 +130,3 @@ def forward(self, x):
def test_model_is_inited_with_own_device_by_default(nncf_config_with_default_init_args, original_device):
model = DeviceCheckingModel(original_device)
create_compressed_model_and_algo_for_test(model, nncf_config_with_default_init_args)


@pytest.fixture(name='tmp_symlink_path')
def tmp_symlink_path_(tmpdir) -> Path:
tmpdir_path = Path(tmpdir)
tmp_file_path = tmpdir_path / "tmp_file"
tmp_file_path.touch()
symlink_path = tmpdir_path / "symlink"
symlink_path.symlink_to(tmp_file_path)
return symlink_path


class MockedFilePathConsumer(abc.ABC):
test_case_name = None

@abc.abstractmethod
def consume_file_path(self, path):
pass

class HWConfigPathConsumer(MockedFilePathConsumer):
test_case_name = 'HWConfig'

def consume_file_path(self, path):
HWConfig.from_json(str(path))


class NNCFConfigPathConsumer(MockedFilePathConsumer):
test_case_name = 'NNCFConfig'
def consume_file_path(self, path):
NNCFConfig.from_json(str(path))

PATH_CONSUMERS = [HWConfigPathConsumer(),
NNCFConfigPathConsumer()]

@pytest.fixture(params=PATH_CONSUMERS, ids=[pc.test_case_name for pc in PATH_CONSUMERS],
name='path_consumer')
def path_consumer_(request):
return request.param


def test_symlink_paths_are_not_followed(tmp_symlink_path, path_consumer: MockedFilePathConsumer):
with pytest.raises(RuntimeError):
path_consumer.consume_file_path(tmp_symlink_path)
5 changes: 5 additions & 0 deletions tests/test_models/ssd_vgg.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ def build_ssd_vgg(cfg, size, num_classes, config):

if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None):
print('Loading base network...')
#
# ** WARNING: torch.load functionality uses Python's pickling facilities that
# may be used to perform arbitrary code execution during unpickling. Only load the data you
# trust.
#
basenet_weights = torch.load(config.basenet)
new_weights = {}
for wn, wv in basenet_weights.items():
Expand Down

0 comments on commit 23727a5

Please sign in to comment.