-
Notifications
You must be signed in to change notification settings - Fork 0
/
template.yaml
98 lines (81 loc) · 1.66 KB
/
template.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Experiment name
name: experiment_name
# Name of output directory. Checkpoints and logs will be saved at `pwd`/output_dir
output_dir: outputs_dir_name
training_device: gpu
num_random_path: 3
target_bits: [6, 5, 4, 3, 2]
post_training_batchnorm_calibration: true
information_distortion_mitigation: false
enable_dynamic_bit_training: false
kd: false
# Dataset loader
dataloader:
dataset: imagenet
num_classes: 1000
path: /path/to/imagenet
batch_size: 128
workers: 32
deterministic: true
resume:
path:
lean: false
log:
num_best_scores: 3
print_freq: 20
#============================ Model ============================================
arch: mobilenetv2
pre_trained: true
#============================ Quantization =====================================
# (default for all layers)
quan:
act:
mode: lsq
bit: 2
per_channel: false
symmetric: false
all_positive: true
weight:
mode: lsq
bit: 2
per_channel: false
symmetric: false
all_positive: false
excepts:
# Specify quantized bit width for some layers, like this:
features.0.0:
act:
bit:
all_positive: false
weight:
bit:
features.0.1:
act:
bit:
weight:
bit:
classifier.1:
act:
bit:
weight:
bit:
#============================ Training / Evaluation ============================
eval: false
search: false
epochs: 160
smoothing: 0.1
scale_gradient: true
dropout: 0.0
kd: false
opt: sgd
lr: 0.02
momentum: 0.9
weight_decay: 0.000025
sched: cosine
min_lr: 0
decay_rate: 0.1
warmup_epochs: 5
warmup_lr: 0.00001
decay_epochs: 30
cooldown_epochs: 5
ema_decay: 0.9997