Skip to content

Commit

Permalink
Merge pull request open-mmlab#76 from cxxgtxy/master
Browse files Browse the repository at this point in the history
Support data prefetching on the linear classification task
  • Loading branch information
XiaohangZhan authored Dec 16, 2020
2 parents ed50004 + 15706cb commit a21f1a4
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 28 deletions.
17 changes: 11 additions & 6 deletions configs/benchmarks/linear_classification/imagenet/r50_last.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,16 @@
train_pipeline = [
dict(type='RandomResizedCrop', size=224),
dict(type='RandomHorizontalFlip'),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
data = dict(
imgs_per_gpu=32, # total 32*8=256, 8GPU linear cls
workers_per_gpu=5,
Expand All @@ -45,12 +46,14 @@
data_source=dict(
list_file=data_train_list, root=data_train_root,
**data_source_cfg),
pipeline=train_pipeline),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
list_file=data_test_list, root=data_test_root, **data_source_cfg),
pipeline=test_pipeline))
pipeline=test_pipeline,
prefetch=prefetch))
# additional hooks
custom_hooks = [
dict(
Expand All @@ -60,6 +63,8 @@
interval=1,
imgs_per_gpu=128,
workers_per_gpu=4,
prefetch=prefetch,
img_norm_cfg=img_norm_cfg,
eval_param=dict(topk=(1, 5)))
]
# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,16 @@
train_pipeline = [
dict(type='RandomResizedCrop', size=224),
dict(type='RandomHorizontalFlip'),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
data = dict(
imgs_per_gpu=32, # total 32*8=256, 8GPU linear cls
workers_per_gpu=5,
Expand All @@ -45,12 +46,14 @@
data_source=dict(
list_file=data_train_list, root=data_train_root,
**data_source_cfg),
pipeline=train_pipeline),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
list_file=data_test_list, root=data_test_root, **data_source_cfg),
pipeline=test_pipeline))
pipeline=test_pipeline,
prefetch=prefetch))
# additional hooks
custom_hooks = [
dict(
Expand All @@ -60,6 +63,8 @@
interval=1,
imgs_per_gpu=128,
workers_per_gpu=4,
prefetch=prefetch,
img_norm_cfg=img_norm_cfg,
eval_param=dict(topk=(1, 5)))
]
# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@
dict(type='CenterCrop', size=256),
dict(type='RandomCrop', size=224),
dict(type='RandomHorizontalFlip'),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
data = dict(
imgs_per_gpu=32, # total 32x8=256
workers_per_gpu=4,
Expand All @@ -52,12 +53,14 @@
data_source=dict(
list_file=data_train_list, root=data_train_root,
**data_source_cfg),
pipeline=train_pipeline),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
list_file=data_test_list, root=data_test_root, **data_source_cfg),
pipeline=test_pipeline))
pipeline=test_pipeline,
prefetch=prefetch))
# additional hooks
custom_hooks = [
dict(
Expand All @@ -67,6 +70,8 @@
interval=10,
imgs_per_gpu=32,
workers_per_gpu=4,
prefetch=prefetch,
img_norm_cfg=img_norm_cfg,
eval_param=dict(topk=(1, )))
]
# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@
dict(type='CenterCrop', size=256),
dict(type='RandomCrop', size=224),
dict(type='RandomHorizontalFlip'),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
test_pipeline = [
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
data = dict(
imgs_per_gpu=32, # total 32x8=256
workers_per_gpu=4,
Expand All @@ -52,12 +53,14 @@
data_source=dict(
list_file=data_train_list, root=data_train_root,
**data_source_cfg),
pipeline=train_pipeline),
pipeline=train_pipeline,
prefetch=prefetch),
val=dict(
type=dataset_type,
data_source=dict(
list_file=data_test_list, root=data_test_root, **data_source_cfg),
pipeline=test_pipeline))
pipeline=test_pipeline,
prefetch=prefetch))
# additional hooks
custom_hooks = [
dict(
Expand All @@ -67,6 +70,8 @@
interval=10,
imgs_per_gpu=32,
workers_per_gpu=4,
prefetch=prefetch,
img_norm_cfg=img_norm_cfg,
eval_param=dict(topk=(1, )))
]
# optimizer
Expand Down
2 changes: 1 addition & 1 deletion docs/GETTING_STARTED.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ prefetch = True
3 . Replacing Pillow with Pillow-SIMD (https://github.com/uploadcare/pillow-simd.git) to make use of SIMD command sets with modern CPU.
```shell
pip uninstall pillow
pip install Pillow-SIMD
pip install Pillow-SIMD or CC="cc -mavx2" pip install -U --force-reinstall pillow-simd if AVX2 is available.
```
We test it using MoCoV2 using a total batch size of 256 on Tesla V100. The training time per step is decreased to 0.17s from 0.23s.
## Benchmarks
Expand Down
7 changes: 5 additions & 2 deletions openselfsup/datasets/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,22 @@

from .registry import DATASETS
from .base import BaseDataset
from .utils import to_numpy


@DATASETS.register_module
class ClassificationDataset(BaseDataset):
"""Dataset for classification.
"""

def __init__(self, data_source, pipeline):
super(ClassificationDataset, self).__init__(data_source, pipeline)
def __init__(self, data_source, pipeline, prefetch=False):
super(ClassificationDataset, self).__init__(data_source, pipeline, prefetch)

def __getitem__(self, idx):
img, target = self.data_source.get_sample(idx)
img = self.pipeline(img)
if self.prefetch:
img = torch.from_numpy(to_numpy(img))
return dict(img=img, gt_label=target)

def evaluate(self, scores, keyword, logger=None, topk=(1, 5)):
Expand Down
5 changes: 4 additions & 1 deletion openselfsup/hooks/validate_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ def __init__(self,
eval_kwargs['imgs_per_gpu'],
eval_kwargs['workers_per_gpu'],
dist=dist_mode,
shuffle=False)
shuffle=False,
prefetch=eval_kwargs.get('prefetch', False),
img_norm_cfg=eval_kwargs.get('img_norm_cfg', dict()),
)
self.dist_mode = dist_mode
self.initial = initial
self.interval = interval
Expand Down

0 comments on commit a21f1a4

Please sign in to comment.