Skip to content

Commit

Permalink
add reduce_mean for calculating curious loss
Browse files Browse the repository at this point in the history
  • Loading branch information
MorvanZhou committed Jan 21, 2019
1 parent 7517807 commit 967c829
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion contents/Curiosity_Model/Curiosity.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _build_dynamics_net(self, s, a, s_):
squared_diff = tf.reduce_sum(tf.square(encoded_s_ - dyn_s_), axis=1) # intrinsic reward

# It is better to reduce the learning rate in order to stay curious
train_op = tf.train.RMSPropOptimizer(self.lr, name="dyn_opt").minimize(squared_diff)
train_op = tf.train.RMSPropOptimizer(self.lr, name="dyn_opt").minimize(tf.reduce_mean(squared_diff))
return dyn_s_, squared_diff, train_op

def _build_dqn(self, s, a, r, s_):
Expand Down
4 changes: 2 additions & 2 deletions contents/Curiosity_Model/Random_Network_Distillation.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def _build_predictor(self, s_, rand_encode_s_):
out = tf.layers.dense(net, self.s_encode_size)

with tf.name_scope("int_r"):
ri = squared_diff = tf.reduce_sum(tf.square(rand_encode_s_ - out), axis=1) # intrinsic reward
ri = tf.reduce_sum(tf.square(rand_encode_s_ - out), axis=1) # intrinsic reward
train_op = tf.train.RMSPropOptimizer(self.lr, name="predictor_opt").minimize(
squared_diff, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "predictor"))
tf.reduce_mean(ri), var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "predictor"))

return ri, train_op

Expand Down

0 comments on commit 967c829

Please sign in to comment.