From 22631d84cf5de8f9e8e6dfbe7f8211398f44b4c9 Mon Sep 17 00:00:00 2001 From: ace19-dev Date: Fri, 9 Aug 2019 19:13:31 +0900 Subject: [PATCH 1/3] update --- train.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index a0e2c7e..015a7fc 100644 --- a/train.py +++ b/train.py @@ -23,7 +23,7 @@ FLAGS = flags.FLAGS # Multi GPU - Must be a value of 1 or greater -flags.DEFINE_integer('num_gpu', 4, 'number of GPU') +flags.DEFINE_integer('num_gpu', 1, 'number of GPU') flags.DEFINE_string('train_logdir', './tfmodels', 'Where the checkpoint and logs are stored.') @@ -41,7 +41,7 @@ flags.DEFINE_enum('learning_policy', 'poly', ['poly', 'step'], 'Learning rate policy for training.') -flags.DEFINE_float('base_learning_rate', 0.002, +flags.DEFINE_float('base_learning_rate', 0.0003, 'The base learning rate for model training.') flags.DEFINE_float('learning_rate_decay_factor', 1e-4, 'The rate to decay the base learning rate.') @@ -103,8 +103,8 @@ flags.DEFINE_integer('how_many_training_epochs', 100, 'How many training loops to run') -flags.DEFINE_integer('batch_size', 256, 'batch size') -flags.DEFINE_integer('val_batch_size', 256, 'validation batch size') +flags.DEFINE_integer('batch_size', 32, 'batch size') +flags.DEFINE_integer('val_batch_size', 32, 'validation batch size') flags.DEFINE_integer('height', 224, 'height') flags.DEFINE_integer('width', 224, 'width') # flags.DEFINE_string('labels', @@ -186,8 +186,12 @@ def main(unused_argv): FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) summaries.add(tf.compat.v1.summary.scalar('learning_rate', learning_rate)) + # optimizers = \ + # [tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9) for _ in range(FLAGS.num_gpu)] + # optimizers = \ + # [tf.compat.v1.train.MomentumOptimizer(learning_rate, FLAGS.momentum) for _ in range(FLAGS.num_gpu)] optimizers = \ - [tf.compat.v1.train.MomentumOptimizer(learning_rate, FLAGS.momentum) for _ in range(FLAGS.num_gpu)] + [tf.train.GradientDescentOptimizer(learning_rate) for _ in range(FLAGS.num_gpu)] logits = [] losses = [] From f7a52f596e1d6f4478c5c9ffa14cd282b95ed283 Mon Sep 17 00:00:00 2001 From: ace19-dev Date: Fri, 9 Aug 2019 19:52:27 +0900 Subject: [PATCH 2/3] update --- train.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index 015a7fc..30afeb8 100644 --- a/train.py +++ b/train.py @@ -215,11 +215,11 @@ def main(unused_argv): attention_module='se_block') # # Print name and shape of parameter nodes (values not yet initialized) - # tf.compat.v1.logging.info("++++++++++++++++++++++++++++++++++") - # tf.compat.v1.logging.info("Parameters") - # tf.compat.v1.logging.info("++++++++++++++++++++++++++++++++++") - # for v in slim.get_model_variables(): - # tf.compat.v1.logging.info('name = %s, shape = %s' % (v.name, v.get_shape())) + tf.compat.v1.logging.info("++++++++++++++++++++++++++++++++++") + tf.compat.v1.logging.info("Parameters") + tf.compat.v1.logging.info("++++++++++++++++++++++++++++++++++") + for v in slim.get_model_variables(): + tf.compat.v1.logging.info('name = %s, shape = %s' % (v.name, v.get_shape())) # # TTA # logit = tf.cond(is_training, From d8cd0cc014d44faf964db2aed5fcea39d4c6398c Mon Sep 17 00:00:00 2001 From: ace19-dev Date: Fri, 9 Aug 2019 19:55:57 +0900 Subject: [PATCH 3/3] update --- utils/train_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/train_utils.py b/utils/train_utils.py index a5262e1..9649e1f 100644 --- a/utils/train_utils.py +++ b/utils/train_utils.py @@ -386,8 +386,7 @@ def custom_restore_fn(flags): flags.checkpoint_model_scope + var.op.name): var for var in variables_to_restore} - slim.assign_from_checkpoint_fn(flags.checkpoint_dir, - variables_to_restore) + slim.assign_from_checkpoint_fn(flags.checkpoint_dir, variables_to_restore) tf.compat.v1.logging.info('Fine-tuning from %s.' % flags.checkpoint_dir)