Skip to content

Commit

Permalink
Miscellanea
Browse files Browse the repository at this point in the history
  • Loading branch information
xlnwel committed Sep 25, 2019
1 parent 598029c commit 530924c
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
10 changes: 5 additions & 5 deletions algo/rainbow_iqn/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ def run(self, obs, start, steps, render):

obs = self.env.reset() if done else next_obs

if hasattr(self, 'saver'):
self.save()

return obs

def act(self, obs, random_act=False, return_q=False):
Expand Down Expand Up @@ -200,7 +203,8 @@ def _build_graph(self):

self.priority, self.loss = self._loss()

self.opt_op, self.learning_rate, self.opt_step = self.Qnets._optimization_op(self.loss,
_, self.learning_rate, self.opt_step, _, self.opt_op = self.Qnets._optimization_op(
self.loss,
tvars=self.Qnets.main_variables,
opt_step=True, schedule_lr=self.Qnets.args['schedule_lr'])

Expand Down Expand Up @@ -435,8 +439,6 @@ def _learn(self, lr=None):

if self.update_step % 100 == 0:
self.writer.add_summary(summary, self.update_step)
if hasattr(self, 'saver'):
self.save()
else:
if self.buffer_type == 'proportional':
priority, saved_mem_idxs, _ = self.sess.run([self.priority,
Expand All @@ -456,8 +458,6 @@ def _learn(self, lr=None):

if self.update_step % 100 == 0:
self.writer.add_summary(summary, self.update_step)
if hasattr(self, 'saver'):
self.save()
else:
if self.buffer_type == 'proportional':
priority, saved_mem_idxs, _ = self.sess.run([self.priority,
Expand Down
5 changes: 3 additions & 2 deletions algo/rainbow_iqn/args.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ agent:
delta: 1 # kappa in paper, used in huber loss
quantile_embedding_dim: 64
# args for optimizer
schedule_lr: True
learning_rate: 1.e-4
schedule_lr: False
learning_rate: 6.25e-5
# end_lr: 6.25e-5
clip_norm: 10
env_stats:
times: 1
Expand Down
4 changes: 2 additions & 2 deletions basic_model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ def _optimization_op(self, loss, tvars=None, opt_step=None, schedule_lr=False):
with tf.variable_scope(self.name + '_optimizer'):
optimizer, learning_rate, opt_step = self._adam_optimizer(opt_step=opt_step, schedule_lr=schedule_lr)
grads_and_vars = self._compute_gradients(loss, optimizer, tvars=tvars)
opt = self._apply_gradients(optimizer, grads_and_vars, opt_step)
opt_op = self._apply_gradients(optimizer, grads_and_vars, opt_step)

return opt, learning_rate, opt_step
return optimizer, learning_rate, opt_step, grads_and_vars, opt_op

def _adam_optimizer(self, opt_step=None, schedule_lr=False):
# params for optimizer
Expand Down
3 changes: 1 addition & 2 deletions run/single_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ def main(env_args, agent_args, buffer_args, render=False):
inter_op_parallelism_threads=1,
allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
agent = Agent('Agent', agent_args, env_args, buffer_args,
agent = Agent('Agent', agent_args, env_args, buffer_args, save=True,
log_tensorboard=True, log_stats=True, log_params=False, device='/GPU:0')

model = agent_args['model_name']
pwc(f'Model {model} starts training')

agent.train(render, log_steps=int(1e4))

0 comments on commit 530924c

Please sign in to comment.