Skip to content

Commit

Permalink
update tdm envs to use mujoco-py 1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
vitchyr committed May 21, 2018
1 parent 2c8561d commit 495d085
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
8 changes: 4 additions & 4 deletions rlkit/envs/ant.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ def __init__(self, use_low_gear_ratio=True):
automatically_set_obs_and_action_space=True,
)

def _step(self, a):
def step(self, a):
torso_xyz_before = self.get_body_com("torso")
self.do_simulation(a, self.frame_skip)
torso_xyz_after = self.get_body_com("torso")
torso_velocity = torso_xyz_after - torso_xyz_before
forward_reward = torso_velocity[0]/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
Expand All @@ -42,8 +42,8 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
])

def reset_model(self):
Expand Down
22 changes: 11 additions & 11 deletions rlkit/torch/tdm/envs/ant_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,20 @@ def set_goal(self, goal):
site_pos = self.model.site_pos.copy()
site_pos[0, 0:2] = goal
site_pos[0, 2] = 0.5
self.model.site_pos = site_pos
self.model.site_pos[:] = site_pos

def convert_obs_to_goals(self, obs):
return obs[:, 27:29]

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
self.get_body_com("torso"),
])

def _step(self, action):
ob, _, done, info_dict = super()._step(action)
def step(self, action):
ob, _, done, info_dict = super().step(action)
xy_pos = self.convert_ob_to_goal(ob)
pos_error = np.linalg.norm(xy_pos - self.multitask_goal)
reward = - pos_error
Expand Down Expand Up @@ -181,12 +181,12 @@ def set_goal(self, goal):
site_pos = self.model.site_pos.copy()
site_pos[0, 0:2] = goal[:2]
site_pos[0, 2] = 0.5
self.model.site_pos = site_pos
self.model.site_pos[:] = site_pos

def _get_obs(self):
raise NotImplementedError()

def _step(self, action):
def step(self, action):
# get_body_comvel doesn't work, so you need to save the last position
torso_xyz_before = self.get_body_com("torso")
self.do_simulation(action, self.frame_skip)
Expand All @@ -196,8 +196,8 @@ def _step(self, action):
done = False

ob = np.hstack((
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
self.get_body_com("torso"),
torso_velocity,
))
Expand All @@ -224,8 +224,8 @@ def reset_model(self):
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return np.hstack((
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
self.get_body_com("torso"),
np.zeros(3), # init velocity is zero
))
Expand Down
4 changes: 2 additions & 2 deletions rlkit/torch/tdm/envs/half_cheetah_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def set_goal(self, goal):
MultitaskEnv.set_goal(self, goal)
self.target_x_vel = goal

def _step(self, action):
ob, _, done, info_dict = super()._step(action)
def step(self, action):
ob, _, done, info_dict = super().step(action)
xvel = ob[8]
desired_xvel = self.target_x_vel
xvel_error = np.linalg.norm(xvel - desired_xvel)
Expand Down

0 comments on commit 495d085

Please sign in to comment.