Skip to content

Commit

Permalink
Rotating maze environment (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
khozzy authored Mar 19, 2021
1 parent 6f942cf commit fc043b6
Show file tree
Hide file tree
Showing 48 changed files with 860 additions and 517 deletions.
3 changes: 1 addition & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ python:
install:
- pip install -e ".[testing]"
script:
- python setup.py flake8
- python setup.py test
- make test
deploy:
skip_cleanup: true
provider: pypi
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
test:
flake8
py.test
16 changes: 1 addition & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,7 @@ The repository contains environments used in LCS literature that are compliant w

[![Build Status](https://travis-ci.org/ParrotPrediction/openai-envs.svg?branch=master)](https://travis-ci.org/ParrotPrediction/openai-envs)


## Currently implemented environments

- Maze (different variants)
- Binary Multiplexer
- Real Multiplexer
- Hand Eye
- Checkerboard
- Real-valued toy problems
- 1D Corridor
- 2D Grid
- Mountain Car with energy reward
- Finite State World (FSW)

For some usage examples look at [examples/](examples) directory.
For usage examples look at [examples/](examples) directory.

## Development

Expand Down
4 changes: 1 addition & 3 deletions examples/checkerboard.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import logging
import gym

# noinspection PyUnresolvedReferences
import gym_checkerboard
import gym_checkerboard # noqa: F401

logging.basicConfig(level=logging.INFO)

Expand Down
4 changes: 1 addition & 3 deletions examples/handeye.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import logging

import gym

# noinspection PyUnresolvedReferences
import gym_handeye
import gym_handeye # noqa: F401

logging.basicConfig(level=logging.DEBUG)

Expand Down
11 changes: 6 additions & 5 deletions examples/maze.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import logging
from gym_maze.internal.maze_impl import ACTION_LOOKUP

import gym

# noinspection PyUnresolvedReferences
import gym_maze
import gym_maze # noqa: F401

logging.basicConfig(level=logging.DEBUG)

Expand All @@ -20,11 +19,13 @@

action = maze.action_space.sample()

logging.info("\t\tExecuted action: [{}]".format(action))
logging.info("\t\tExecuted action: [{}]"
.format(ACTION_LOOKUP[action]))

observation, reward, done, info = maze.step(action)

if done:
logging.info(f"Episode finished after {t+1} timesteps.")
logging.info(f"Episode finished after {t+1} steps.")
logging.info(f"Last reward: {reward}")
break

Expand Down
28 changes: 28 additions & 0 deletions examples/maze_rotating.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import logging

import gym
import gym_maze # noqa: F401

logging.basicConfig(level=logging.DEBUG)

if __name__ == '__main__':
maze = gym.make('Maze288-v0')
maze.render()
# for i_episode in range(1):
# observation = maze.reset()
#
# for t in range(100):
# logging.info(f"Time: [{t}], observation: [{observation}]")
#
# action = maze.action_space.sample()
#
# logging.info("\t\tExecuted action: [{}]".format(action))
#
# observation, reward, done, info = maze.step(action)
#
# if done:
# logging.info(f"Episode finished after {t+1} steps.")
# logging.info(f"Last reward: {reward}")
# break

logging.info("Finished")
2 changes: 1 addition & 1 deletion examples/taxi.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import gym
import gym_taxi_goal
import gym_taxi_goal # noqa: F401


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion gym_checkerboard/checkerboard_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, dim: int, ndiv: int) -> None:
self.board = self._build_board()

def _build_board(self) -> np.ndarray:
x = np.empty(pow(self.nd, self.n), dtype=np.bool)
x = np.empty(pow(self.nd, self.n), dtype=bool)

# alternating cell colors
x[:] = WHITE
Expand Down
5 changes: 2 additions & 3 deletions gym_corridor/tests/test_corridor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import gym

# noinspection PyUnresolvedReferences
import gym_corridor # noqa: F401
from gym_corridor.corridor import MOVE_LEFT, MOVE_RIGHT

Expand All @@ -30,7 +29,7 @@ def test_should_visualize(self):
vis = corr.render(mode='ansi')

# then
assert 1 <= int(obs) < 20
assert 0 <= int(obs) < 19
assert len(vis) == 22
assert 1 == vis.count('X')
assert 1 == vis.count('$')
Expand Down Expand Up @@ -88,7 +87,7 @@ def test_should_move_in_both_directions(self):
corr = gym.make('corridor-20-v0')
p0 = corr.reset()

while p0 in ["1", "19"]:
while p0 in ["0", "19"]:
p0 = corr.reset()

# when & then
Expand Down
2 changes: 1 addition & 1 deletion gym_fsw/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from gym.envs.registration import register

from .fsw import FiniteStateWorld
from .fsw import FiniteStateWorld # noqa: F401

register(
id='fsw-5-v0',
Expand Down
1 change: 0 additions & 1 deletion gym_fsw/tests/test_fsw.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ def test_should_follow_suboptimal_path_and_find_reward(self):
assert reward == 100
assert done is True


def test_should_initialize_bigger_environment(self):
# given
fsw = gym.make('fsw-10-v0')
Expand Down
20 changes: 10 additions & 10 deletions gym_grid/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,17 @@ def _state_action(self):
Return states and possible actions in each of them
"""

# Assign all actions for all states
mapping = {}
# Assign all actions for all states (mapping)
m = {}
for x in range(1, self._size + 1):
for y in range(1, self._size + 1):
mapping[(x, y)] = [MOVE_LEFT, MOVE_RIGHT, MOVE_UP, MOVE_DOWN]
m[(x, y)] = [MOVE_LEFT, MOVE_RIGHT, MOVE_UP, MOVE_DOWN]

# Remove actions from certain states
top_row = dict(filter(lambda i: i[0][0] == self._size, mapping.items()))
bottom_row = dict(filter(lambda i: i[0][0] == 1, mapping.items()))
left_col = dict(filter(lambda i: i[0][1] == 1, mapping.items()))
right_col = dict(filter(lambda i: i[0][1] == self._size, mapping.items()))
top_row = dict(filter(lambda i: i[0][0] == self._size, m.items()))
bottom_row = dict(filter(lambda i: i[0][0] == 1, m.items()))
left_col = dict(filter(lambda i: i[0][1] == 1, m.items()))
right_col = dict(filter(lambda i: i[0][1] == self._size, m.items()))

for actions in top_row.values():
actions.remove(MOVE_UP)
Expand All @@ -123,9 +123,9 @@ def _state_action(self):
actions.remove(MOVE_RIGHT)

# No actions possible when found reward
mapping[(self._size, self._size)] = []
m[(self._size, self._size)] = []

# Cast (int, int) key to (str, str)
mapping = {(str(k[0]), str(k[1])): v for k, v in mapping.items()}
m = {(str(k[0]), str(k[1])): v for k, v in m.items()}

return mapping
return m
54 changes: 33 additions & 21 deletions gym_maze/__init__.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@
from gym.envs.registration import register

# noinspection PyUnresolvedReferences
from .maze import Maze # noqa: F401
from .maze import PATH_MAPPING, WALL_MAPPING, REWARD_MAPPING # noqa: F401

ACTION_LOOKUP = {
0: 'N',
1: 'NE',
2: 'E',
3: 'SE',
4: 'S',
5: 'SW',
6: 'W',
7: 'NW'
}


def find_action_by_direction(direction):
for key, val in ACTION_LOOKUP.items():
if val == direction:
return key

from gym_maze.common import MAZE_PATH as PATH_MAPPING # noqa: F401
from gym_maze.common import MAZE_REWARD as REWARD_MAPPING # noqa: F401
from gym_maze.common import MAZE_WALL as WALL_MAPPING # noqa: F401
from gym_maze.maze import Maze # noqa: F401
from gym_maze.rotating_maze import RotatingMaze # noqa: F401

register(
id='MazeF1-v0',
Expand Down Expand Up @@ -91,3 +75,31 @@ def find_action_by_direction(direction):
max_episode_steps=50,
nondeterministic=True
)

register(
id='Maze228-v0',
entry_point='gym_maze.envs:Maze228',
max_episode_steps=250,
nondeterministic=True
)

register(
id='Maze252-v0',
entry_point='gym_maze.envs:Maze252',
max_episode_steps=250,
nondeterministic=True
)

register(
id='Maze288-v0',
entry_point='gym_maze.envs:Maze288',
max_episode_steps=250,
nondeterministic=True
)

register(
id='Maze324-v0',
entry_point='gym_maze.envs:Maze324',
max_episode_steps=250,
nondeterministic=True
)
4 changes: 4 additions & 0 deletions gym_maze/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MAZE_PATH = 0
MAZE_WALL = 1
MAZE_ANIMAT = 5
MAZE_REWARD = 9
31 changes: 31 additions & 0 deletions gym_maze/common/maze_observation_space.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import random

import gym
import numpy as np

from gym_maze.common import MAZE_ANIMAT, MAZE_WALL, MAZE_PATH, MAZE_REWARD


class MazeObservationSpace(gym.Space):
def __init__(self, n):
# n is the number of visible neighbour fields, typically 8
self.np_random = np.random.RandomState()
self.n = n
gym.Space.__init__(self, (self.n,), str)

def seed(self, seed):
self.np_random.seed(seed)

def sample(self):
states = map(str, [MAZE_PATH, MAZE_WALL, MAZE_REWARD])
return tuple(random.choice(list(states)) for _ in range(self.n))

def contains(self, x):
states = map(str, [MAZE_PATH, MAZE_WALL, MAZE_ANIMAT, MAZE_REWARD])
return all(elem in states for elem in x)

def to_jsonable(self, sample_n):
return list(sample_n)

def from_jsonable(self, sample_n):
return tuple(sample_n)
26 changes: 26 additions & 0 deletions gym_maze/common/maze_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np
from gym.utils import colorize

from gym_maze.common import MAZE_ANIMAT, MAZE_WALL, MAZE_PATH, MAZE_REWARD


def render(out, board: np.ndarray):
out.write("\n")
board = np.copy(board)

for row in board:
out.write(" ".join(_render_element(el) for el in row))
out.write("\n")


def _render_element(el):
if el == MAZE_WALL:
return colorize('■', 'gray')
elif el == MAZE_PATH:
return colorize('□', 'white')
elif el == MAZE_REWARD:
return colorize('$', 'yellow')
elif el == MAZE_ANIMAT:
return colorize('A', 'red')
else:
return colorize(el, 'cyan')
Loading

0 comments on commit fc043b6

Please sign in to comment.