forked from facebookresearch/vissl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: Let's do SSL with vision transformers! Pull Request resolved: facebookresearch#195 Reviewed By: prigoyal Differential Revision: D26565026 Pulled By: growlix fbshipit-source-id: d653bdfb876d783c917e9e240ac7acce98164cd8
- Loading branch information
1 parent
86ad2b5
commit 4cffc1c
Showing
48 changed files
with
4,334 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
13 changes: 13 additions & 0 deletions
13
configs/config/benchmark/imagenet1k_fulltune/datasets/imagenet_1k.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# @package _global_ | ||
config: | ||
DATA: | ||
TRAIN: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [imagenet1k_folder] | ||
TEST: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [imagenet1k_folder] | ||
OPTIMIZER: | ||
num_epochs: 30 |
13 changes: 13 additions & 0 deletions
13
configs/config/benchmark/imagenet1k_fulltune/datasets/imagenet_1k_10percent.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# @package _global_ | ||
config: | ||
DATA: | ||
TRAIN: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [google-imagenet1k-per10] | ||
TEST: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [google-imagenet1k-per10] | ||
OPTIMIZER: | ||
num_epochs: 30 |
13 changes: 13 additions & 0 deletions
13
configs/config/benchmark/imagenet1k_fulltune/datasets/imagenet_1k_1percent.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# @package _global_ | ||
config: | ||
DATA: | ||
TRAIN: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [google-imagenet1k-per01] | ||
TEST: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [google-imagenet1k-per01] | ||
OPTIMIZER: | ||
num_epochs: 60 |
101 changes: 101 additions & 0 deletions
101
configs/config/benchmark/imagenet1k_fulltune/eval_vit_8gpu_transfer_in1k_finetune.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# @package _global_ | ||
config: | ||
VERBOSE: False | ||
LOG_FREQUENCY: 10 | ||
TEST_ONLY: False | ||
TEST_MODEL: True | ||
SEED_VALUE: 0 | ||
MULTI_PROCESSING_METHOD: forkserver | ||
HOOKS: | ||
PERF_STATS: | ||
MONITOR_PERF_STATS: True | ||
ROLLING_BTIME_FREQ: 313 | ||
PERF_STAT_FREQUENCY: 10 | ||
TENSORBOARD_SETUP: | ||
USE_TENSORBOARD: True | ||
EXPERIMENT_LOG_DIR: | ||
FLUSH_EVERY_N_MIN: 20 | ||
CHECKPOINT: | ||
DIR: "." | ||
AUTO_RESUME: True | ||
CHECKPOINT_FREQUENCY: 10 | ||
DATA: | ||
NUM_DATALOADER_WORKERS: 5 | ||
TRAIN: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [imagenet1k_folder] | ||
TRANSFORMS: | ||
- name: RandomResizedCrop | ||
size: 384 | ||
- name: RandomHorizontalFlip | ||
- name: ToTensor | ||
- name: Normalize | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
MMAP_MODE: True | ||
COPY_TO_LOCAL_DISK: False | ||
COPY_DESTINATION_DIR: /tmp/imagenet1k/ | ||
TEST: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [imagenet1k_folder] | ||
TRANSFORMS: | ||
- name: Resize | ||
size: 384 | ||
- name: CenterCrop | ||
size: 384 | ||
- name: ToTensor | ||
- name: Normalize | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
MMAP_MODE: True | ||
COPY_TO_LOCAL_DISK: False | ||
COPY_DESTINATION_DIR: /tmp/imagenet1k/ | ||
METERS: | ||
name: accuracy_list_meter | ||
accuracy_list_meter: | ||
num_meters: 1 | ||
topk_values: [1, 5] | ||
MODEL: | ||
GRAD_CLIP: | ||
USE_GRAD_CLIP: True | ||
TRAINER: | ||
TRAIN_STEP_NAME: standard_train_step | ||
LOSS: | ||
name: cross_entropy_multiple_output_single_target | ||
cross_entropy_multiple_output_single_target: | ||
ignore_index: -1 | ||
OPTIMIZER: | ||
name: sgd | ||
weight_decay: 0.000 | ||
momentum: 0.9 | ||
num_epochs: 30 | ||
nesterov: True | ||
regularize_bn: False | ||
regularize_bias: True | ||
param_schedulers: | ||
lr: | ||
auto_lr_scaling: | ||
auto_scale: true | ||
base_value: 0.01 | ||
base_lr_batch_size: 256 | ||
name: composite | ||
schedulers: | ||
- name: linear | ||
start_value: 0.0 | ||
end_value: 0.1 | ||
- name: cosine | ||
start_value: 0.1 | ||
end_value: 0 | ||
interval_scaling: [rescaled, rescaled] | ||
update_interval: step | ||
lengths: [0.1, 0.9] | ||
DISTRIBUTED: | ||
BACKEND: nccl | ||
NUM_NODES: 1 | ||
NUM_PROC_PER_NODE: 8 | ||
INIT_METHOD: tcp | ||
RUN_ID: auto | ||
MACHINE: | ||
DEVICE: gpu |
40 changes: 40 additions & 0 deletions
40
configs/config/benchmark/imagenet1k_fulltune/models/vit_b16.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# @package _global_ | ||
config: | ||
DATA: | ||
TRAIN: | ||
BATCHSIZE_PER_REPLICA: 32 # Fits on 16gb GPU | ||
TEST: | ||
BATCHSIZE_PER_REPLICA: 32 | ||
MODEL: | ||
TRUNK: | ||
NAME: vision_transformer | ||
TRUNK_PARAMS: | ||
VISION_TRANSFORMERS: | ||
IMAGE_SIZE: 384 | ||
PATCH_SIZE: 16 | ||
NUM_LAYERS: 12 | ||
NUM_HEADS: 12 | ||
HIDDEN_DIM: 768 | ||
MLP_DIM: 3072 | ||
DROPOUT_RATE: 0.1 | ||
ATTENTION_DROPOUT_RATE: 0 | ||
CLASSIFIER: token | ||
HEAD: | ||
PARAMS: [ | ||
["mlp", {"dims": [768, 1000]}], | ||
] | ||
WEIGHTS_INIT: | ||
PARAMS_FILE: "specify the model weights" | ||
STATE_DICT_KEY_NAME: classy_state_dict | ||
SKIP_LAYERS: [ | ||
'heads.0.clf.0.weight', | ||
'heads.0.clf.0.bias', | ||
'num_batches_tracked' | ||
] | ||
SYNC_BN_CONFIG: | ||
CONVERT_BN_TO_SYNC_BN: False | ||
SYNC_BN_TYPE: apex | ||
GROUP_SIZE: 8 | ||
AMP_PARAMS: | ||
USE_AMP: True | ||
AMP_ARGS: {"opt_level": "O1"} |
40 changes: 40 additions & 0 deletions
40
configs/config/benchmark/imagenet1k_fulltune/models/vit_s16.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# @package _global_ | ||
config: | ||
DATA: | ||
TRAIN: | ||
BATCHSIZE_PER_REPLICA: 128 # Fits on 32gb GPU | ||
TEST: | ||
BATCHSIZE_PER_REPLICA: 128 | ||
MODEL: | ||
TRUNK: | ||
NAME: vision_transformer | ||
TRUNK_PARAMS: | ||
VISION_TRANSFORMERS: | ||
IMAGE_SIZE: 384 | ||
PATCH_SIZE: 16 | ||
NUM_LAYERS: 12 | ||
NUM_HEADS: 6 | ||
HIDDEN_DIM: 384 | ||
MLP_DIM: 1536 | ||
DROPOUT_RATE: 0.1 | ||
ATTENTION_DROPOUT_RATE: 0 | ||
CLASSIFIER: token | ||
HEAD: | ||
PARAMS: [ | ||
["mlp", {"dims": [384, 1000]}], | ||
] | ||
WEIGHTS_INIT: | ||
PARAMS_FILE: "specify the model weights" | ||
STATE_DICT_KEY_NAME: classy_state_dict | ||
SKIP_LAYERS: [ | ||
'heads.0.clf.0.weight', | ||
'heads.0.clf.0.bias', | ||
'num_batches_tracked' | ||
] | ||
SYNC_BN_CONFIG: | ||
CONVERT_BN_TO_SYNC_BN: False | ||
SYNC_BN_TYPE: apex | ||
GROUP_SIZE: 8 | ||
AMP_PARAMS: | ||
USE_AMP: True | ||
AMP_ARGS: {"opt_level": "O1"} |
128 changes: 128 additions & 0 deletions
128
configs/config/benchmark/linear_image_classification/imagenet1k/models/deit_s16.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
# @package _global_ | ||
config: | ||
VERBOSE: False | ||
LOG_FREQUENCY: 10 | ||
TEST_ONLY: False | ||
TEST_MODEL: True | ||
SEED_VALUE: 0 | ||
MULTI_PROCESSING_METHOD: forkserver | ||
HOOKS: | ||
PERF_STATS: | ||
MONITOR_PERF_STATS: True | ||
ROLLING_BTIME_FREQ: 313 | ||
PERF_STAT_FREQUENCY: 10 | ||
TENSORBOARD_SETUP: | ||
USE_TENSORBOARD: True | ||
EXPERIMENT_LOG_DIR: | ||
FLUSH_EVERY_N_MIN: 20 | ||
CHECKPOINT: | ||
DIR: | ||
AUTO_RESUME: True | ||
CHECKPOINT_FREQUENCY: 10 | ||
DATA: | ||
NUM_DATALOADER_WORKERS: 5 | ||
TRAIN: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [imagenet1k_folder] | ||
BATCHSIZE_PER_REPLICA: 2048 | ||
TRANSFORMS: | ||
- name: RandomResizedCrop | ||
size: 224 | ||
- name: RandomHorizontalFlip | ||
- name: ToTensor | ||
- name: Normalize | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
TEST: | ||
DATA_SOURCES: [disk_folder] | ||
LABEL_SOURCES: [disk_folder] | ||
DATASET_NAMES: [imagenet1k_folder] | ||
BATCHSIZE_PER_REPLICA: 2048 | ||
TRANSFORMS: | ||
- name: Resize | ||
size: 256 | ||
- name: CenterCrop | ||
size: 224 | ||
- name: ToTensor | ||
- name: Normalize | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
MODEL: | ||
GRAD_CLIP: | ||
USE_GRAD_CLIP: False | ||
FEATURE_EVAL_SETTINGS: | ||
EVAL_MODE_ON: True | ||
FREEZE_TRUNK_ONLY: True | ||
TRUNK: | ||
NAME: vision_transformer | ||
TRUNK_PARAMS: | ||
VISION_TRANSFORMERS: | ||
IMAGE_SIZE: 224 | ||
PATCH_SIZE: 16 | ||
NUM_LAYERS: 12 | ||
NUM_HEADS: 6 | ||
HIDDEN_DIM: 384 | ||
MLP_DIM: 1536 | ||
DROPOUT_RATE: 0 | ||
ATTENTION_DROPOUT_RATE: 0 | ||
CLASSIFIER: token | ||
DROP_PATH_RATE: 0.1 | ||
HEAD: | ||
PARAMS: [ | ||
["mlp", {"dims": [384, 1000]}], | ||
] | ||
WEIGHTS_INIT: | ||
PARAMS_FILE: "specify the model weights" | ||
STATE_DICT_KEY_NAME: classy_state_dict | ||
SYNC_BN_CONFIG: | ||
CONVERT_BN_TO_SYNC_BN: False | ||
SYNC_BN_TYPE: apex | ||
GROUP_SIZE: 8 | ||
AMP_PARAMS: | ||
USE_AMP: True | ||
AMP_ARGS: {"opt_level": "O1"} | ||
LOSS: | ||
name: cross_entropy_multiple_output_single_target | ||
cross_entropy_multiple_output_single_target: | ||
ignore_index: -1 | ||
OPTIMIZER: | ||
name: sgd | ||
# In the OSS Caffe2 benchmark, RN50 models use 1e-4 and AlexNet models 5e-4 | ||
weight_decay: 0 | ||
momentum: 0.9 | ||
num_epochs: 100 | ||
nesterov: False | ||
regularize_bn: True | ||
regularize_bias: True | ||
param_schedulers: | ||
lr: | ||
auto_lr_scaling: | ||
auto_scale: true | ||
base_value: 0.1 | ||
base_lr_batch_size: 256 | ||
name: composite | ||
schedulers: | ||
- name: linear | ||
start_value: 0.0 | ||
end_value: 0.1 | ||
- name: cosine | ||
start_value: 0.1 | ||
end_value: 0 | ||
interval_scaling: [rescaled, rescaled] | ||
update_interval: step | ||
lengths: [0.1, 0.9] | ||
METERS: | ||
name: accuracy_list_meter | ||
accuracy_list_meter: | ||
num_meters: 1 | ||
topk_values: [1, 5] | ||
TRAINER: | ||
TRAIN_STEP_NAME: standard_train_step | ||
DISTRIBUTED: | ||
BACKEND: nccl | ||
NUM_NODES: 1 | ||
NUM_PROC_PER_NODE: 8 # 1 GPU | ||
RUN_ID: auto | ||
MACHINE: | ||
DEVICE: gpu |
Oops, something went wrong.