From 3232ae882e6c7ee5c3ddfeb8eda12d076c8ebcac Mon Sep 17 00:00:00 2001 From: Sophia Gu <14866379+sophiagu@users.noreply.github.com> Date: Tue, 15 Dec 2020 22:40:21 -0500 Subject: [PATCH] Update run_baselines_agent.py --- Scripts/run_baselines_agent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Scripts/run_baselines_agent.py b/Scripts/run_baselines_agent.py index 46b021b..6d1e81b 100644 --- a/Scripts/run_baselines_agent.py +++ b/Scripts/run_baselines_agent.py @@ -34,13 +34,13 @@ def _train(env_id, model_params, total_epochs, use_sigmoid_layer=False, is_evalu # learning rate: use 1e-7 for delta hedging and 1e-5 for mean reversion if use_sigmoid_layer: model = PPO2(SigmoidMlpPolicy, envs, n_steps=1, nminibatches=1, - learning_rate=lambda f: f * 1e-7, verbose=1, - policy_kwargs=dict(act_fun=tf.nn.tanh), + learning_rate=lambda f: f * 1e-5, verbose=1, + policy_kwargs=dict(act_fun=tf.nn.relu), **model_params) else: model = PPO2(MlpLstmPolicy, envs, n_steps=1, nminibatches=1, - learning_rate=lambda f: f * 1e-7, verbose=1, - policy_kwargs=dict(act_fun=tf.nn.tanh), + learning_rate=lambda f: f * 1e-5, verbose=1, + policy_kwargs=dict(act_fun=tf.nn.relu), **model_params) model.learn(total_timesteps=total_epochs * L)