Skip to content

Commit

Permalink
Network modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
cdevin committed Sep 19, 2018
1 parent d184744 commit 1729936
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 15 deletions.
4 changes: 2 additions & 2 deletions examples/dqn_and_double_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
from rlkit.torch.networks import ObjectMlp, Mlp

envName = 'DiscretePointmass-v1'

envName = 'CartPole-v0'
def experiment(variant):
env = gym.make(envName)

training_env = gym.make(envName)

qf = ObjectMlp(
qf = Mlp(
hidden_sizes=[32, 32],
input_size=int(np.prod(env.observation_space.shape)),
output_size=env.action_space.n,
Expand Down
7 changes: 4 additions & 3 deletions rlkit/launchers/launcher_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,10 @@ def create_exp_name(exp_prefix, exp_id=0, seed=0):
:param exp_id:
:return:
"""
now = datetime.datetime.now(dateutil.tz.tzlocal())
timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
return "%s_%s_%04d--s-%d" % (exp_prefix, timestamp, exp_id, seed)
return exp_prefix
# now = datetime.datetime.now(dateutil.tz.tzlocal())
# timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
# return "%s_%s_%04d--s-%d" % (exp_prefix, timestamp, exp_id, seed)


def create_log_dir(exp_prefix, exp_id=0, seed=0, base_log_dir=None):
Expand Down
7 changes: 4 additions & 3 deletions rlkit/samplers/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
TEST=False
TEST=True

def rollout(env, agent, max_path_length=np.inf, animated=False):
"""
Expand Down Expand Up @@ -36,9 +37,9 @@ def rollout(env, agent, max_path_length=np.inf, animated=False):
#import pdb; pdb.set_trace()
num = str(np.random.rand(1)[0])[1:3]
if TEST:
env.env.log_filename = '/home/coline/test/Research2018/results/numpy/object'+num+'.npy'
filename_w ='/home/coline/test/Research2018/results/numpy/object'+num+'weights.npy'
filename_a ='/home/coline/test/Research2018/results/numpy/object'+num+'actions.npy'
env.env.log_filename = '/home/coline/test/Research2018/results/numpy/fullobject'+num+'.npy'
filename_w ='/home/coline/test/Research2018/results/numpy/fullobject'+num+'weights.npy'
filename_a ='/home/coline/test/Research2018/results/numpy/fullobject'+num+'actions.npy'
ws = []
p_as = []
while path_length < max_path_length:
Expand Down
18 changes: 11 additions & 7 deletions rlkit/torch/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def forward(self, input, return_preactivations=False):
p_a_tot = F.softmax(final_action, dim=1)
max, indices = p_a_tot.data.max(dim=1)
self.weights = [float(p[0, int(indices[0])]) for p in p_a]
#print(self.weights)
print(self.weights)
self.p_a_tot = p_a_tot.data.numpy()
#self.myactions = actions.data.numpy()
#import pdb; pdb.set_trace()
Expand Down Expand Up @@ -280,13 +280,16 @@ def __init__(
self.fcs = []
self.layer_norms = []
in_size = input_size
def mlp(layer_sizes, scope):
def mlp(layer_sizes, scope, last_layer_init=None):
in_size = layer_sizes[0]
layers = []
for i, next_size in enumerate(layer_sizes[1:]):
fc = nn.Linear(in_size, next_size)
in_size = next_size
hidden_init(fc.weight)
if i == len(layer_sizes[:1])-1 and last_layer_init is not None:
last_layer_init(fc.weight)
else:
hidden_init(fc.weight)
fc.bias.data.fill_(b_init_value)
layers.append(fc)
self.__setattr__(scope+"fc{}".format(i), fc)
Expand All @@ -295,7 +298,7 @@ def mlp(layer_sizes, scope):
self.affordance_mlp = mlp([num_classes,20,self.AFF_SIZE], 'affordance')
self.weight_mlp = mlp([self.AGENT_SIZE+num_classes, 10,1], 'weight')
self.translator_mlp = mlp([self.AFF_SIZE+self.AGENT_SIZE+2, 10,10, output_size], 'translator')
self.task_attention_mlp = mlp([self.num_tasks+self.AFF_SIZE, 10, 1], 'task_attention')
self.task_attention_mlp = mlp([self.num_tasks+num_classes+self.AGENT_SIZE, 10, 1,], 'task_attention', ptu.zeros_init)
# self.last_fc = nn.Linear(in_size, output_size)
# self.last_fc.weight.data.uniform_(-init_w, init_w)
# self.last_fc.bias.data.uniform_(-init_w, init_w)
Expand Down Expand Up @@ -347,9 +350,10 @@ def forward(self, input, return_preactivations=False):
actions = []
for obj, cls, cls_var in objects:
actions.append(self._apply_translator(agent, obj, affordances[cls]))
weights.append(self._run_mlp( torch.cat((task_input, affordances[cls]), dim=1),
weights.append(self._run_mlp( torch.cat((task_input, cls_var, agent), dim=1),
self.task_attention_mlp, F.relu, identity))
weights_ = torch.cat(weights, dim=1)
#import pdb; pdb.set_trace()
softweights = torch.unsqueeze(F.softmax(weights_, dim=1), dim=2)
stacked_actions = torch.stack(actions, dim=1)
weighted_actions = stacked_actions*softweights
Expand All @@ -359,8 +363,8 @@ def forward(self, input, return_preactivations=False):
p_a = [F.softmax(a, dim=1) for a in actions]
p_a_tot = F.softmax(final_action, dim=1)
max, indices = p_a_tot.data.max(dim=1)
self.weights = [float(p[0, int(indices[0])]) for p in p_a]
#print(self.weights)
self.weights = [float(w[0]) for w in softweights[0]]# [float(p[0, int(indices[0])]) for p in p_a]
# print(self.weights)
self.p_a_tot = p_a_tot.data.numpy()
#self.myactions = actions.data.numpy()
#import pdb; pdb.set_trace()
Expand Down
3 changes: 3 additions & 0 deletions rlkit/torch/pytorch_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def fanin_init(tensor):
bound = 1. / np.sqrt(fan_in)
return tensor.uniform_(-bound, bound)

def zeros_init(tensor):
return fanin_init(tensor)*0.0


def fanin_init_weights_like(tensor):
if isinstance(tensor, TorchVariable):
Expand Down

0 comments on commit 1729936

Please sign in to comment.