Skip to content

Commit

Permalink
Add linear learning warmup method in learning rate scheduler. (Paddle…
Browse files Browse the repository at this point in the history
…Paddle#16563)

* Add linear learning warmup method

This warmup lr can be combinated with other learning rate strategies.
For example:
            decayed_lr = fluid.layers.linear_lr_warmup(
                fluid.layers.piecewise_decay(boundaries, lr_steps),
                warmup_steps, start_lr, end_lr)
  • Loading branch information
qingqing01 authored Mar 31, 2019
1 parent a61ed97 commit 1ebd743
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 4 deletions.
1 change: 1 addition & 0 deletions paddle/fluid/API.spec
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], vara
paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8'))
paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565'))
paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99'))
Expand Down
58 changes: 57 additions & 1 deletion python/paddle/fluid/layers/learning_rate_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
__all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
'cosine_decay'
'cosine_decay', 'linear_lr_warmup'
]


Expand Down Expand Up @@ -383,3 +383,59 @@ def _balanced_weight(param_norm, grad_norm):
/ _balanced_weight(param_norm, grad_norm)
# set back param local learning rate
param.optimize_attr['learning_rate'] = decayed_lr


def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
"""
Applies linear learning rate warmup before the normal learning rate
scheduling.
.. code-block:: python
if global_step < warmup_steps:
linear_step = end_lr - start_lr
lr = start_lr + linear_step * (global_step / warmup_steps)
Args:
learning_rate (float | Variable): A float value or Variable.
warmup_steps (int): The warmup steps.
start_lr (float): The start learning of warmup.
end_lr (float): The end learning of warmup.
Returns:
The decayed learning rate in warmup period.
Examples:
.. code-block:: python
boundaries = [100, 200]
lr_steps = [0.1, 0.01, 0.001]
warmup_steps = 50
start_lr = 1. / 3.
end_lr = 0.1
decayed_lr = fluid.layers.linear_lr_warmup(
fluid.layers.piecewise_decay(boundaries, lr_steps),
warmup_steps, start_lr, end_lr)
"""
assert (isinstance(end_lr, float))
assert (isinstance(start_lr, float))
linear_step = end_lr - start_lr
with default_main_program()._lr_schedule_guard():
lr = tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate_warmup")

global_step = _decay_step_counter()

with control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
decayed_lr = start_lr + linear_step * (global_step /
float(warmup_steps))
tensor.assign(decayed_lr, lr)
with switch.default():
tensor.assign(learning_rate, lr)
return lr
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ def check_decay_with_place(self, place, python_decay_fn, fluid_decay_fn,
self.assertAlmostEqual(
python_decayed_lr,
lr_val[0],
msg='Failed fn is {0}, Python result is {1}, Fluid result is {2}'.
msg='Failed lr scheduler is {0}, step {1}, Python result is {2}, Fluid result is {3}'.
format(python_decay_fn.__name__,
str(python_decayed_lr), str(lr_val[0])))
str(step), str(python_decayed_lr), str(lr_val[0])))

def test_decay(self):
common_kwargs_true = {
Expand Down Expand Up @@ -164,12 +164,53 @@ def test_decay(self):
]

for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
print("decay_fn=" + py_decay_fn.__name__ + " kwargs=" + str(kwargs))
print("class=" + self.__class__.__name__ + "decay_fn=" +
py_decay_fn.__name__ + " kwargs=" + str(kwargs))
main_program = framework.Program()
startup_program = framework.Program()
with framework.program_guard(main_program, startup_program):
self.check_decay(py_decay_fn, fluid_decay_fn, kwargs)


def linear_lr_warmup(global_step, warmup_steps, start_lr, end_lr):
linear_step = end_lr - start_lr
decayed_lr = start_lr + linear_step * (global_step / warmup_steps)
return decayed_lr


class TestLinearWamrupLearningRateDecay(TestLearningRateDecay):
def check_decay_with_place(self, place, python_decay_fn, fluid_decay_fn,
kwargs):
main_prog = fluid.Program()
startup_prog = fluid.Program()

warmup_steps = 10
start_lr = 1. / 3.
end_lr = 0.1

with fluid.program_guard(main_prog, startup_prog):
decayed_lr = layers.linear_lr_warmup(
fluid_decay_fn(**kwargs), warmup_steps, start_lr, end_lr)

place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)

for step in range(20):
lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr])
if step < warmup_steps:
python_decayed_lr = linear_lr_warmup(
float(step), warmup_steps, start_lr, end_lr)
else:
python_decayed_lr = python_decay_fn(
global_step=float(step), **kwargs)
self.assertAlmostEqual(
python_decayed_lr,
lr_val[0],
msg='Test {0} Failed, step {1}, Python result is {2}, Fluid result is {3}'.
format(python_decay_fn.__name__,
str(step), str(python_decayed_lr), str(lr_val[0])))


if __name__ == '__main__':
unittest.main()

0 comments on commit 1ebd743

Please sign in to comment.