Skip to content

Commit

Permalink
add plt to show result
Browse files Browse the repository at this point in the history
  • Loading branch information
MorvanZhou committed Jun 15, 2018
1 parent 682e89b commit 30a82ec
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
8 changes: 6 additions & 2 deletions experiments/Solve_BipedalWalker/A3C.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
UPDATE_GLOBAL_ITER = 10
GAMMA = 0.99
ENTROPY_BETA = 0.005
LR_A = 0.00002 # learning rate for actor
LR_A = 0.00005 # learning rate for actor
LR_C = 0.0001 # learning rate for critic
GLOBAL_RUNNING_R = []
GLOBAL_EP = 0
Expand Down Expand Up @@ -205,5 +205,9 @@ def work(self):
t.start()
worker_threads.append(t)
COORD.join(worker_threads)

import matplotlib.pyplot as plt
plt.plot(GLOBAL_RUNNING_R)
plt.xlabel('episode')
plt.ylabel('global running reward')
plt.show()

19 changes: 12 additions & 7 deletions experiments/Solve_BipedalWalker/A3C_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
MAX_GLOBAL_EP = 8000
GLOBAL_NET_SCOPE = 'Global_Net'
UPDATE_GLOBAL_ITER = 10
GAMMA = 0.99
ENTROPY_BETA = 0.005
LR_A = 0.00001 # learning rate for actor
GAMMA = 0.9
ENTROPY_BETA = 0.001
LR_A = 0.00002 # learning rate for actor
LR_C = 0.0001 # learning rate for critic
GLOBAL_RUNNING_R = []
GLOBAL_EP = 0
Expand Down Expand Up @@ -95,22 +95,22 @@ def __init__(self, scope, globalAC=None):
self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))

def _build_net(self):
w_init = tf.random_normal_initializer(0., .01)
w_init = tf.random_normal_initializer(0., .1)
with tf.variable_scope('critic'): # only critic controls the rnn update
cell_size = 128
cell_size = 126
s = tf.expand_dims(self.s, axis=1,
name='timely_input') # [time_step, feature] => [time_step, batch, feature]
rnn_cell = tf.contrib.rnn.BasicRNNCell(cell_size)
self.init_state = rnn_cell.zero_state(batch_size=1, dtype=tf.float32)
outputs, self.final_state = tf.nn.dynamic_rnn(
cell=rnn_cell, inputs=s, initial_state=self.init_state, time_major=True)
cell_out = tf.reshape(outputs, [-1, cell_size], name='flatten_rnn_outputs') # joined state representation
l_c = tf.layers.dense(cell_out, 300, tf.nn.relu6, kernel_initializer=w_init, name='lc')
l_c = tf.layers.dense(cell_out, 512, tf.nn.relu6, kernel_initializer=w_init, name='lc')
v = tf.layers.dense(l_c, 1, kernel_initializer=w_init, name='v') # state value

with tf.variable_scope('actor'): # state representation is based on critic
cell_out = tf.stop_gradient(cell_out, name='c_cell_out') # from what critic think it is
l_a = tf.layers.dense(cell_out, 400, tf.nn.relu6, kernel_initializer=w_init, name='la')
l_a = tf.layers.dense(cell_out, 512, tf.nn.relu6, kernel_initializer=w_init, name='la')
mu = tf.layers.dense(l_a, N_A, tf.nn.tanh, kernel_initializer=w_init, name='mu')
sigma = tf.layers.dense(l_a, N_A, tf.nn.softplus, kernel_initializer=w_init, name='sigma') # restrict variance
return mu, sigma, v
Expand Down Expand Up @@ -233,3 +233,8 @@ def work(self):
t.start()
worker_threads.append(t)
COORD.join(worker_threads)
import matplotlib.pyplot as plt
plt.plot(GLOBAL_RUNNING_R)
plt.xlabel('episode')
plt.ylabel('global running reward')
plt.show()

0 comments on commit 30a82ec

Please sign in to comment.