Network modifications

cdevin · Sep 19, 2018 · 1729936 · 1729936
1 parent d184744
commit 1729936
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 15 deletions.
diff --git a/examples/dqn_and_double_dqn.py b/examples/dqn_and_double_dqn.py
@@ -12,13 +12,13 @@
 from rlkit.torch.networks import ObjectMlp, Mlp
 
 envName = 'DiscretePointmass-v1'
-
+envName = 'CartPole-v0'
 def experiment(variant):
     env = gym.make(envName)
 
     training_env = gym.make(envName)
 
-    qf = ObjectMlp(
+    qf = Mlp(
         hidden_sizes=[32, 32],
         input_size=int(np.prod(env.observation_space.shape)),
         output_size=env.action_space.n,

diff --git a/rlkit/launchers/launcher_util.py b/rlkit/launchers/launcher_util.py
@@ -182,9 +182,10 @@ def create_exp_name(exp_prefix, exp_id=0, seed=0):
     :param exp_id:
     :return:
     """
-    now = datetime.datetime.now(dateutil.tz.tzlocal())
-    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
-    return "%s_%s_%04d--s-%d" % (exp_prefix, timestamp, exp_id, seed)
+    return exp_prefix
+    # now = datetime.datetime.now(dateutil.tz.tzlocal())
+    # timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
+    # return "%s_%s_%04d--s-%d" % (exp_prefix, timestamp, exp_id, seed)
 
 
 def create_log_dir(exp_prefix, exp_id=0, seed=0, base_log_dir=None):

diff --git a/rlkit/samplers/util.py b/rlkit/samplers/util.py
@@ -1,5 +1,6 @@
 import numpy as np
 TEST=False
+TEST=True
 
 def rollout(env, agent, max_path_length=np.inf, animated=False):
     """
@@ -36,9 +37,9 @@ def rollout(env, agent, max_path_length=np.inf, animated=False):
     #import pdb; pdb.set_trace()
     num = str(np.random.rand(1)[0])[1:3]
     if TEST:
-        env.env.log_filename = '/home/coline/test/Research2018/results/numpy/object'+num+'.npy'
-        filename_w ='/home/coline/test/Research2018/results/numpy/object'+num+'weights.npy'
-        filename_a ='/home/coline/test/Research2018/results/numpy/object'+num+'actions.npy'
+        env.env.log_filename = '/home/coline/test/Research2018/results/numpy/fullobject'+num+'.npy'
+        filename_w ='/home/coline/test/Research2018/results/numpy/fullobject'+num+'weights.npy'
+        filename_a ='/home/coline/test/Research2018/results/numpy/fullobject'+num+'actions.npy'
     ws = []
     p_as = []
     while path_length < max_path_length:

diff --git a/rlkit/torch/networks.py b/rlkit/torch/networks.py
@@ -230,7 +230,7 @@ def forward(self, input, return_preactivations=False):
         p_a_tot = F.softmax(final_action, dim=1)
         max, indices = p_a_tot.data.max(dim=1)
         self.weights = [float(p[0, int(indices[0])]) for p in p_a]
-        #print(self.weights)
+        print(self.weights)
         self.p_a_tot = p_a_tot.data.numpy()
         #self.myactions = actions.data.numpy()
         #import pdb; pdb.set_trace()
@@ -280,13 +280,16 @@ def __init__(
         self.fcs = []
         self.layer_norms = []
         in_size = input_size
-        def mlp(layer_sizes, scope):
+        def mlp(layer_sizes, scope, last_layer_init=None):
             in_size = layer_sizes[0]
             layers = []
             for i, next_size in enumerate(layer_sizes[1:]):
                 fc = nn.Linear(in_size, next_size)
                 in_size = next_size
-                hidden_init(fc.weight)
+                if i == len(layer_sizes[:1])-1 and last_layer_init is not None:
+                    last_layer_init(fc.weight)
+                else:
+                    hidden_init(fc.weight)
                 fc.bias.data.fill_(b_init_value)
                 layers.append(fc)
                 self.__setattr__(scope+"fc{}".format(i), fc)
@@ -295,7 +298,7 @@ def mlp(layer_sizes, scope):
         self.affordance_mlp = mlp([num_classes,20,self.AFF_SIZE], 'affordance')
         self.weight_mlp = mlp([self.AGENT_SIZE+num_classes, 10,1], 'weight')
         self.translator_mlp = mlp([self.AFF_SIZE+self.AGENT_SIZE+2, 10,10, output_size], 'translator')
-        self.task_attention_mlp = mlp([self.num_tasks+self.AFF_SIZE, 10, 1], 'task_attention')
+        self.task_attention_mlp = mlp([self.num_tasks+num_classes+self.AGENT_SIZE, 10, 1,], 'task_attention', ptu.zeros_init)
         # self.last_fc = nn.Linear(in_size, output_size)
         # self.last_fc.weight.data.uniform_(-init_w, init_w)
         # self.last_fc.bias.data.uniform_(-init_w, init_w)
@@ -347,9 +350,10 @@ def forward(self, input, return_preactivations=False):
         actions = []
         for obj, cls, cls_var in objects:
             actions.append(self._apply_translator(agent, obj, affordances[cls]))
-            weights.append(self._run_mlp( torch.cat((task_input, affordances[cls]), dim=1),
+            weights.append(self._run_mlp( torch.cat((task_input, cls_var, agent), dim=1),
                                           self.task_attention_mlp, F.relu, identity))
         weights_ = torch.cat(weights, dim=1)
+        #import pdb; pdb.set_trace()
         softweights = torch.unsqueeze(F.softmax(weights_, dim=1), dim=2)
         stacked_actions = torch.stack(actions, dim=1)
         weighted_actions = stacked_actions*softweights
@@ -359,8 +363,8 @@ def forward(self, input, return_preactivations=False):
         p_a = [F.softmax(a, dim=1) for a in actions]
         p_a_tot = F.softmax(final_action, dim=1)
         max, indices = p_a_tot.data.max(dim=1)
-        self.weights = [float(p[0, int(indices[0])]) for p in p_a]
-        #print(self.weights)
+        self.weights = [float(w[0]) for w in softweights[0]]# [float(p[0, int(indices[0])]) for p in p_a]
+        # print(self.weights)
         self.p_a_tot = p_a_tot.data.numpy()
         #self.myactions = actions.data.numpy()
         #import pdb; pdb.set_trace()

diff --git a/rlkit/torch/pytorch_util.py b/rlkit/torch/pytorch_util.py
@@ -147,6 +147,9 @@ def fanin_init(tensor):
     bound = 1. / np.sqrt(fan_in)
     return tensor.uniform_(-bound, bound)
 
+def zeros_init(tensor):
+    return fanin_init(tensor)*0.0
+
 
 def fanin_init_weights_like(tensor):
     if isinstance(tensor, TorchVariable):