Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Summary:
Let's do SSL with vision transformers!

Pull Request resolved: facebookresearch#195

Reviewed By: prigoyal

Differential Revision: D26565026

Pulled By: growlix

fbshipit-source-id: d653bdfb876d783c917e9e240ac7acce98164cd8
  • Loading branch information
growlix authored and facebook-github-bot committed Feb 20, 2021
1 parent 86ad2b5 commit 4cffc1c
Show file tree
Hide file tree
Showing 48 changed files with 4,334 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ install_vissl_dep: &install_vissl_dep
name: Install Dependencies
working_directory: ~/vissl
command: |
pip install --progress-bar off torch==1.5.0 torchvision==0.6.0 opencv-python==3.4.2.17
pip install --progress-bar off torch==1.7.1 torchvision==0.8.2 opencv-python==3.4.2.17
pip install --progress-bar off -r requirements.txt
install_apex_gpu: &install_apex_gpu
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
config:
DATA:
TRAIN:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
TEST:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
OPTIMIZER:
num_epochs: 30
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
config:
DATA:
TRAIN:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [google-imagenet1k-per10]
TEST:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [google-imagenet1k-per10]
OPTIMIZER:
num_epochs: 30
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
config:
DATA:
TRAIN:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [google-imagenet1k-per01]
TEST:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [google-imagenet1k-per01]
OPTIMIZER:
num_epochs: 60
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# @package _global_
config:
VERBOSE: False
LOG_FREQUENCY: 10
TEST_ONLY: False
TEST_MODEL: True
SEED_VALUE: 0
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
ROLLING_BTIME_FREQ: 313
PERF_STAT_FREQUENCY: 10
TENSORBOARD_SETUP:
USE_TENSORBOARD: True
EXPERIMENT_LOG_DIR:
FLUSH_EVERY_N_MIN: 20
CHECKPOINT:
DIR: "."
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 10
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
TRANSFORMS:
- name: RandomResizedCrop
size: 384
- name: RandomHorizontalFlip
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
TEST:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
TRANSFORMS:
- name: Resize
size: 384
- name: CenterCrop
size: 384
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
METERS:
name: accuracy_list_meter
accuracy_list_meter:
num_meters: 1
topk_values: [1, 5]
MODEL:
GRAD_CLIP:
USE_GRAD_CLIP: True
TRAINER:
TRAIN_STEP_NAME: standard_train_step
LOSS:
name: cross_entropy_multiple_output_single_target
cross_entropy_multiple_output_single_target:
ignore_index: -1
OPTIMIZER:
name: sgd
weight_decay: 0.000
momentum: 0.9
num_epochs: 30
nesterov: True
regularize_bn: False
regularize_bias: True
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: true
base_value: 0.01
base_lr_batch_size: 256
name: composite
schedulers:
- name: linear
start_value: 0.0
end_value: 0.1
- name: cosine
start_value: 0.1
end_value: 0
interval_scaling: [rescaled, rescaled]
update_interval: step
lengths: [0.1, 0.9]
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 1
NUM_PROC_PER_NODE: 8
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
40 changes: 40 additions & 0 deletions configs/config/benchmark/imagenet1k_fulltune/models/vit_b16.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# @package _global_
config:
DATA:
TRAIN:
BATCHSIZE_PER_REPLICA: 32 # Fits on 16gb GPU
TEST:
BATCHSIZE_PER_REPLICA: 32
MODEL:
TRUNK:
NAME: vision_transformer
TRUNK_PARAMS:
VISION_TRANSFORMERS:
IMAGE_SIZE: 384
PATCH_SIZE: 16
NUM_LAYERS: 12
NUM_HEADS: 12
HIDDEN_DIM: 768
MLP_DIM: 3072
DROPOUT_RATE: 0.1
ATTENTION_DROPOUT_RATE: 0
CLASSIFIER: token
HEAD:
PARAMS: [
["mlp", {"dims": [768, 1000]}],
]
WEIGHTS_INIT:
PARAMS_FILE: "specify the model weights"
STATE_DICT_KEY_NAME: classy_state_dict
SKIP_LAYERS: [
'heads.0.clf.0.weight',
'heads.0.clf.0.bias',
'num_batches_tracked'
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: False
SYNC_BN_TYPE: apex
GROUP_SIZE: 8
AMP_PARAMS:
USE_AMP: True
AMP_ARGS: {"opt_level": "O1"}
40 changes: 40 additions & 0 deletions configs/config/benchmark/imagenet1k_fulltune/models/vit_s16.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# @package _global_
config:
DATA:
TRAIN:
BATCHSIZE_PER_REPLICA: 128 # Fits on 32gb GPU
TEST:
BATCHSIZE_PER_REPLICA: 128
MODEL:
TRUNK:
NAME: vision_transformer
TRUNK_PARAMS:
VISION_TRANSFORMERS:
IMAGE_SIZE: 384
PATCH_SIZE: 16
NUM_LAYERS: 12
NUM_HEADS: 6
HIDDEN_DIM: 384
MLP_DIM: 1536
DROPOUT_RATE: 0.1
ATTENTION_DROPOUT_RATE: 0
CLASSIFIER: token
HEAD:
PARAMS: [
["mlp", {"dims": [384, 1000]}],
]
WEIGHTS_INIT:
PARAMS_FILE: "specify the model weights"
STATE_DICT_KEY_NAME: classy_state_dict
SKIP_LAYERS: [
'heads.0.clf.0.weight',
'heads.0.clf.0.bias',
'num_batches_tracked'
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: False
SYNC_BN_TYPE: apex
GROUP_SIZE: 8
AMP_PARAMS:
USE_AMP: True
AMP_ARGS: {"opt_level": "O1"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# @package _global_
config:
VERBOSE: False
LOG_FREQUENCY: 10
TEST_ONLY: False
TEST_MODEL: True
SEED_VALUE: 0
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
ROLLING_BTIME_FREQ: 313
PERF_STAT_FREQUENCY: 10
TENSORBOARD_SETUP:
USE_TENSORBOARD: True
EXPERIMENT_LOG_DIR:
FLUSH_EVERY_N_MIN: 20
CHECKPOINT:
DIR:
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 10
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
BATCHSIZE_PER_REPLICA: 2048
TRANSFORMS:
- name: RandomResizedCrop
size: 224
- name: RandomHorizontalFlip
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
TEST:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
BATCHSIZE_PER_REPLICA: 2048
TRANSFORMS:
- name: Resize
size: 256
- name: CenterCrop
size: 224
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MODEL:
GRAD_CLIP:
USE_GRAD_CLIP: False
FEATURE_EVAL_SETTINGS:
EVAL_MODE_ON: True
FREEZE_TRUNK_ONLY: True
TRUNK:
NAME: vision_transformer
TRUNK_PARAMS:
VISION_TRANSFORMERS:
IMAGE_SIZE: 224
PATCH_SIZE: 16
NUM_LAYERS: 12
NUM_HEADS: 6
HIDDEN_DIM: 384
MLP_DIM: 1536
DROPOUT_RATE: 0
ATTENTION_DROPOUT_RATE: 0
CLASSIFIER: token
DROP_PATH_RATE: 0.1
HEAD:
PARAMS: [
["mlp", {"dims": [384, 1000]}],
]
WEIGHTS_INIT:
PARAMS_FILE: "specify the model weights"
STATE_DICT_KEY_NAME: classy_state_dict
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: False
SYNC_BN_TYPE: apex
GROUP_SIZE: 8
AMP_PARAMS:
USE_AMP: True
AMP_ARGS: {"opt_level": "O1"}
LOSS:
name: cross_entropy_multiple_output_single_target
cross_entropy_multiple_output_single_target:
ignore_index: -1
OPTIMIZER:
name: sgd
# In the OSS Caffe2 benchmark, RN50 models use 1e-4 and AlexNet models 5e-4
weight_decay: 0
momentum: 0.9
num_epochs: 100
nesterov: False
regularize_bn: True
regularize_bias: True
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: true
base_value: 0.1
base_lr_batch_size: 256
name: composite
schedulers:
- name: linear
start_value: 0.0
end_value: 0.1
- name: cosine
start_value: 0.1
end_value: 0
interval_scaling: [rescaled, rescaled]
update_interval: step
lengths: [0.1, 0.9]
METERS:
name: accuracy_list_meter
accuracy_list_meter:
num_meters: 1
topk_values: [1, 5]
TRAINER:
TRAIN_STEP_NAME: standard_train_step
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 1
NUM_PROC_PER_NODE: 8 # 1 GPU
RUN_ID: auto
MACHINE:
DEVICE: gpu
Loading

0 comments on commit 4cffc1c

Please sign in to comment.