Skip to content

Commit

Permalink
Record reward for unsupervised env and fix a small bug.
Browse files Browse the repository at this point in the history
The bug was that the health points could briefly be negative, and if
this happened at the end of an episode, it could result in the agent
receiving a higher penalty on death. We're not increasing the major
version of the package because the official Crafter report is not
released yet.
  • Loading branch information
danijar committed Sep 7, 2021
1 parent 8a0dc84 commit f366752
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 14 deletions.
22 changes: 11 additions & 11 deletions crafter/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,14 @@ def step(self, action):
# if self._player.distance(center) < 4 * max(self._view):
self._balance_chunk(chunk, objs)
obs = self._obs()
if self._reward:
reward = (self._player.health - self._last_health) / 10
self._last_health = self._player.health
unlocked = {
name for name, count in self._player.achievements.items()
if count > 0 and name not in self._unlocked}
if unlocked:
self._unlocked |= unlocked
reward += 1.0
else:
reward = 0.0
reward = (self._player.health - self._last_health) / 10
self._last_health = self._player.health
unlocked = {
name for name, count in self._player.achievements.items()
if count > 0 and name not in self._unlocked}
if unlocked:
self._unlocked |= unlocked
reward += 1.0
dead = self._player.health <= 0
over = self._length and self._step >= self._length
done = dead or over
Expand All @@ -114,7 +111,10 @@ def step(self, action):
'discount': 1 - float(dead),
'semantic': self._sem_view(),
'player_pos': self._player.pos,
'reward': reward,
}
if not self._reward:
reward = 0.0
return obs, reward, done, info

def render(self, size=None):
Expand Down
2 changes: 1 addition & 1 deletion crafter/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def health(self):

@health.setter
def health(self, value):
self.inventory['health'] = value
self.inventory['health'] = max(0, value)

@property
def all_dirs(self):
Expand Down
5 changes: 4 additions & 1 deletion crafter/recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, env, directory):
self._directory.mkdir(exist_ok=True, parents=True)
self._file = (self._directory / 'stats.jsonl').open('a')
self._length = None
self._reward = None
self._unlocked = None
self._stats = None

Expand All @@ -44,15 +45,17 @@ def __getattr__(self, name):
def reset(self):
obs = self._env.reset()
self._length = 0
self._reward = 0
self._unlocked = None
self._stats = None
return obs

def step(self, action):
obs, reward, done, info = self._env.step(action)
self._length += 1
self._reward += info['reward']
if done:
self._stats = {'length': self._length}
self._stats = {'length': self._length, 'reward': round(self._reward, 1)}
for key, value in info['achievements'].items():
self._stats[f'achievement_{key}'] = value
self._save()
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setuptools.setup(
name='crafter',
version='1.5.1',
version='1.6.0',
description='Open world survival game for reinforcement learning.',
url='http://github.com/danijar/crafter',
long_description=pathlib.Path('README.md').read_text(),
Expand Down

0 comments on commit f366752

Please sign in to comment.