From 3232ae882e6c7ee5c3ddfeb8eda12d076c8ebcac Mon Sep 17 00:00:00 2001
From: Sophia Gu <14866379+sophiagu@users.noreply.github.com>
Date: Tue, 15 Dec 2020 22:40:21 -0500
Subject: [PATCH] Update run_baselines_agent.py

---
 Scripts/run_baselines_agent.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Scripts/run_baselines_agent.py b/Scripts/run_baselines_agent.py
index 46b021b..6d1e81b 100644
--- a/Scripts/run_baselines_agent.py
+++ b/Scripts/run_baselines_agent.py
@@ -34,13 +34,13 @@ def _train(env_id, model_params, total_epochs, use_sigmoid_layer=False, is_evalu
   # learning rate: use 1e-7 for delta hedging and 1e-5 for mean reversion
   if use_sigmoid_layer:
     model = PPO2(SigmoidMlpPolicy, envs, n_steps=1, nminibatches=1,
-                 learning_rate=lambda f: f * 1e-7, verbose=1,
-                 policy_kwargs=dict(act_fun=tf.nn.tanh),
+                 learning_rate=lambda f: f * 1e-5, verbose=1,
+                 policy_kwargs=dict(act_fun=tf.nn.relu),
                  **model_params)
   else:
     model = PPO2(MlpLstmPolicy, envs, n_steps=1, nminibatches=1,
-                 learning_rate=lambda f: f * 1e-7, verbose=1,
-                 policy_kwargs=dict(act_fun=tf.nn.tanh),
+                 learning_rate=lambda f: f * 1e-5, verbose=1,
+                 policy_kwargs=dict(act_fun=tf.nn.relu),
                  **model_params)
 
   model.learn(total_timesteps=total_epochs * L)