Skip to content

Commit

Permalink
support for multi-step training.
Browse files Browse the repository at this point in the history
  • Loading branch information
endernewton committed Jul 31, 2017
1 parent 4e7d408 commit 373abae
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 22 deletions.
7 changes: 2 additions & 5 deletions experiments/scripts/convert_vgg16.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,20 @@ case ${DATASET} in
pascal_voc)
TRAIN_IMDB="voc_2007_trainval"
TEST_IMDB="voc_2007_test"
STEPSIZE=50000
ITERS=70000
ANCHORS="[8,16,32]"
RATIOS="[0.5,1,2]"
;;
pascal_voc_0712)
TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
TEST_IMDB="voc_2007_test"
STEPSIZE=80000
ITERS=110000
ANCHORS="[8,16,32]"
RATIOS="[0.5,1,2]"
;;
coco)
TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
TEST_IMDB="coco_2014_minival"
STEPSIZE=350000
ITERS=490000
ANCHORS="[4,8,16,32]"
RATIOS="[0.5,1,2]"
Expand All @@ -57,14 +54,14 @@ if [ ! -f ${NET_FINAL}.index ]; then
--iters ${ITERS} \
--cfg experiments/cfgs/${NET}.yml \
--tag ${EXTRA_ARGS_SLUG} \
--set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
--set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
else
CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \
--snapshot ${NET_FINAL} \
--imdb ${TRAIN_IMDB} \
--iters ${ITERS} \
--cfg experiments/cfgs/${NET}.yml \
--set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
--set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
fi
fi

6 changes: 3 additions & 3 deletions experiments/scripts/train_faster_rcnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,23 @@ case ${DATASET} in
pascal_voc)
TRAIN_IMDB="voc_2007_trainval"
TEST_IMDB="voc_2007_test"
STEPSIZE=50000
STEPSIZE="[50000]"
ITERS=70000
ANCHORS="[8,16,32]"
RATIOS="[0.5,1,2]"
;;
pascal_voc_0712)
TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
TEST_IMDB="voc_2007_test"
STEPSIZE=80000
STEPSIZE="[80000]"
ITERS=110000
ANCHORS="[8,16,32]"
RATIOS="[0.5,1,2]"
;;
coco)
TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
TEST_IMDB="coco_2014_minival"
STEPSIZE=350000
STEPSIZE="[350000]"
ITERS=490000
ANCHORS="[4,8,16,32]"
RATIOS="[0.5,1,2]"
Expand Down
2 changes: 1 addition & 1 deletion lib/model/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
__C.TRAIN.GAMMA = 0.1

# Step size for reducing the learning rate, currently only support one step
__C.TRAIN.STEPSIZE = 30000
__C.TRAIN.STEPSIZE = [30000]

# Iteration intervals for showing the loss during training, on command line interface
__C.TRAIN.DISPLAY = 10
Expand Down
36 changes: 25 additions & 11 deletions lib/model/train_val.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,17 @@ def train_model(self, sess, max_iters):
sfiles = glob.glob(sfiles)
sfiles.sort(key=os.path.getmtime)
# Get the snapshot name in TensorFlow
redstr = '_iter_{:d}.'.format(cfg.TRAIN.STEPSIZE+1)
sfiles = [ss.replace('.meta', '') for ss in sfiles]
sfiles = [ss for ss in sfiles if redstr not in ss]
redfiles = []
for stepsize in cfg.TRAIN.STEPSIZE:
redfiles.append(os.path.join(self.output_dir,
cfg.TRAIN.SNAPSHOT_PREFIX + '_iter_{:d}.ckpt.meta'.format(stepsize+1)))
sfiles = [ss.replace('.meta', '') for ss in sfiles if ss not in redfiles]

nfiles = os.path.join(self.output_dir, cfg.TRAIN.SNAPSHOT_PREFIX + '_iter_*.pkl')
nfiles = glob.glob(nfiles)
nfiles.sort(key=os.path.getmtime)
nfiles = [nn for nn in nfiles if redstr not in nn]
redfiles = [redfile.replace('.ckpt.meta', '.pkl') for redfile in redfiles]
nfiles = [nn for nn in nfiles if nn not in redfiles]

lsf = len(sfiles)
assert len(nfiles) == lsf
Expand All @@ -171,8 +174,10 @@ def train_model(self, sess, max_iters):
# fully connected weights
self.net.fix_variables(sess, self.pretrained_model)
print('Fixed.')
sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE))
rate = cfg.TRAIN.LEARNING_RATE
sess.run(tf.assign(lr, rate))
last_snapshot_iter = 0
stepsizes = list(cfg.TRAIN.STEPSIZE)
else:
# Get the most recent snapshot and restore
ss_paths = [ss_paths[-1]]
Expand All @@ -198,21 +203,30 @@ def train_model(self, sess, max_iters):
self.data_layer_val._cur = cur_val
self.data_layer_val._perm = perm_val

# Set the learning rate, only reduce once
if last_snapshot_iter > cfg.TRAIN.STEPSIZE:
sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA))
# Set the learning rate
rate = cfg.TRAIN.LEARNING_RATE
stepsizes = []
for stepsize in cfg.TRAIN.STEPSIZE:
if last_snapshot_iter > stepsize:
rate *= cfg.TRAIN.GAMMA
else:
sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE))
stepsizes.append(stepsize)
sess.run(tf.assign(lr, rate))

timer = Timer()
iter = last_snapshot_iter + 1
last_summary_time = time.time()
stepsizes.append(max_iters)
stepsizes.reverse()
next_stepsize = stepsizes.pop()
while iter < max_iters + 1:
# Learning rate
if iter == cfg.TRAIN.STEPSIZE + 1:
if iter == next_stepsize + 1:
# Add snapshot here before reducing the learning rate
self.snapshot(sess, iter)
sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA))
rate *= cfg.TRAIN.GAMMA
sess.run(tf.assign(lr, rate))
next_stepsize = stepsizes.pop()

timer.tic()
# Get training data, one batch at a time
Expand Down
4 changes: 2 additions & 2 deletions lib/nets/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, batch_size=1):
self._event_summaries = {}
self._variables_to_fix = {}

def _add_image_summary(self, image, gt_boxes, im_info):
def _add_gt_image_summary(self, image, gt_boxes, im_info):
# add back mean
image += cfg.PIXEL_MEANS
# BGR to RGB (opencv uses BGR)
Expand Down Expand Up @@ -370,7 +370,7 @@ def create_architecture(self, sess, mode, num_classes, tag=None,

val_summaries = []
with tf.device("/cpu:0"):
val_summaries.append(self._add_image_summary(self._image, self._gt_boxes, self._im_info))
val_summaries.append(self._add_gt_image_summary(self._image, self._gt_boxes, self._im_info))
for key, var in self._event_summaries.items():
val_summaries.append(tf.summary.scalar(key, var))
for key, var in self._score_summaries.items():
Expand Down

0 comments on commit 373abae

Please sign in to comment.