Rotating maze environment (#19)

ParrotPrediction · Mar 19, 2021 · fc043b6 · fc043b6
1 parent 6f942cf
commit fc043b6
Show file tree

Hide file tree

Showing 48 changed files with 860 additions and 517 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -4,8 +4,7 @@ python:
 install:
 - pip install -e ".[testing]"
 script:
-- python setup.py flake8
-- python setup.py test
+- make test
 deploy:
   skip_cleanup: true
   provider: pypi

diff --git a/Makefile b/Makefile
@@ -0,0 +1,3 @@
+test:
+	flake8
+	py.test
diff --git a/README.md b/README.md
@@ -3,21 +3,7 @@ The repository contains environments used in LCS literature that are compliant w
 
 [![Build Status](https://travis-ci.org/ParrotPrediction/openai-envs.svg?branch=master)](https://travis-ci.org/ParrotPrediction/openai-envs)
 
-
-## Currently implemented environments
-
-- Maze (different variants)
-- Binary Multiplexer
-- Real Multiplexer
-- Hand Eye
-- Checkerboard
-- Real-valued toy problems
-- 1D Corridor
-- 2D Grid
-- Mountain Car with energy reward
-- Finite State World (FSW)
-
-For some usage examples look at [examples/](examples) directory.
+For usage examples look at [examples/](examples) directory.
 
 ## Development
 

diff --git a/examples/checkerboard.py b/examples/checkerboard.py
@@ -1,8 +1,6 @@
 import logging
 import gym
-
-# noinspection PyUnresolvedReferences
-import gym_checkerboard
+import gym_checkerboard  # noqa: F401
 
 logging.basicConfig(level=logging.INFO)
 

diff --git a/examples/handeye.py b/examples/handeye.py
@@ -1,9 +1,7 @@
 import logging
 
 import gym
-
-# noinspection PyUnresolvedReferences
-import gym_handeye
+import gym_handeye  # noqa: F401
 
 logging.basicConfig(level=logging.DEBUG)
 

diff --git a/examples/maze.py b/examples/maze.py
@@ -1,9 +1,8 @@
 import logging
+from gym_maze.internal.maze_impl import ACTION_LOOKUP
 
 import gym
-
-# noinspection PyUnresolvedReferences
-import gym_maze
+import gym_maze  # noqa: F401
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -20,11 +19,13 @@
 
             action = maze.action_space.sample()
 
-            logging.info("\t\tExecuted action: [{}]".format(action))
+            logging.info("\t\tExecuted action: [{}]"
+                         .format(ACTION_LOOKUP[action]))
+
             observation, reward, done, info = maze.step(action)
 
             if done:
-                logging.info(f"Episode finished after {t+1} timesteps.")
+                logging.info(f"Episode finished after {t+1} steps.")
                 logging.info(f"Last reward: {reward}")
                 break
 

diff --git a/examples/maze_rotating.py b/examples/maze_rotating.py
@@ -0,0 +1,28 @@
+import logging
+
+import gym
+import gym_maze  # noqa: F401
+
+logging.basicConfig(level=logging.DEBUG)
+
+if __name__ == '__main__':
+    maze = gym.make('Maze288-v0')
+    maze.render()
+    # for i_episode in range(1):
+    #     observation = maze.reset()
+    #
+    #     for t in range(100):
+    #         logging.info(f"Time: [{t}], observation: [{observation}]")
+    #
+    #         action = maze.action_space.sample()
+    #
+    #         logging.info("\t\tExecuted action: [{}]".format(action))
+    #
+    #         observation, reward, done, info = maze.step(action)
+    #
+    #         if done:
+    #             logging.info(f"Episode finished after {t+1} steps.")
+    #             logging.info(f"Last reward: {reward}")
+    #             break
+
+    logging.info("Finished")
diff --git a/examples/taxi.py b/examples/taxi.py
@@ -1,5 +1,5 @@
 import gym
-import gym_taxi_goal
+import gym_taxi_goal  # noqa: F401
 
 
 if __name__ == '__main__':

diff --git a/gym_checkerboard/checkerboard_simulator.py b/gym_checkerboard/checkerboard_simulator.py
@@ -17,7 +17,7 @@ def __init__(self, dim: int, ndiv: int) -> None:
         self.board = self._build_board()
 
     def _build_board(self) -> np.ndarray:
-        x = np.empty(pow(self.nd, self.n), dtype=np.bool)
+        x = np.empty(pow(self.nd, self.n), dtype=bool)
 
         # alternating cell colors
         x[:] = WHITE

diff --git a/gym_corridor/tests/test_corridor.py b/gym_corridor/tests/test_corridor.py
@@ -3,7 +3,6 @@
 
 import gym
 
-# noinspection PyUnresolvedReferences
 import gym_corridor  # noqa: F401
 from gym_corridor.corridor import MOVE_LEFT, MOVE_RIGHT
 
@@ -30,7 +29,7 @@ def test_should_visualize(self):
         vis = corr.render(mode='ansi')
 
         # then
-        assert 1 <= int(obs) < 20
+        assert 0 <= int(obs) < 19
         assert len(vis) == 22
         assert 1 == vis.count('X')
         assert 1 == vis.count('$')
@@ -88,7 +87,7 @@ def test_should_move_in_both_directions(self):
         corr = gym.make('corridor-20-v0')
         p0 = corr.reset()
 
-        while p0 in ["1", "19"]:
+        while p0 in ["0", "19"]:
             p0 = corr.reset()
 
         # when & then

diff --git a/gym_fsw/__init__.py b/gym_fsw/__init__.py
@@ -1,6 +1,6 @@
 from gym.envs.registration import register
 
-from .fsw import FiniteStateWorld
+from .fsw import FiniteStateWorld  # noqa: F401
 
 register(
     id='fsw-5-v0',

diff --git a/gym_fsw/tests/test_fsw.py b/gym_fsw/tests/test_fsw.py
@@ -84,7 +84,6 @@ def test_should_follow_suboptimal_path_and_find_reward(self):
         assert reward == 100
         assert done is True
 
-
     def test_should_initialize_bigger_environment(self):
         # given
         fsw = gym.make('fsw-10-v0')

diff --git a/gym_grid/grid.py b/gym_grid/grid.py
@@ -98,17 +98,17 @@ def _state_action(self):
         Return states and possible actions in each of them
         """
 
-        # Assign all actions for all states
-        mapping = {}
+        # Assign all actions for all states (mapping)
+        m = {}
         for x in range(1, self._size + 1):
             for y in range(1, self._size + 1):
-                mapping[(x, y)] = [MOVE_LEFT, MOVE_RIGHT, MOVE_UP, MOVE_DOWN]
+                m[(x, y)] = [MOVE_LEFT, MOVE_RIGHT, MOVE_UP, MOVE_DOWN]
 
         # Remove actions from certain states
-        top_row = dict(filter(lambda i: i[0][0] == self._size, mapping.items()))
-        bottom_row = dict(filter(lambda i: i[0][0] == 1, mapping.items()))
-        left_col = dict(filter(lambda i: i[0][1] == 1, mapping.items()))
-        right_col = dict(filter(lambda i: i[0][1] == self._size, mapping.items()))
+        top_row = dict(filter(lambda i: i[0][0] == self._size, m.items()))
+        bottom_row = dict(filter(lambda i: i[0][0] == 1, m.items()))
+        left_col = dict(filter(lambda i: i[0][1] == 1, m.items()))
+        right_col = dict(filter(lambda i: i[0][1] == self._size, m.items()))
 
         for actions in top_row.values():
             actions.remove(MOVE_UP)
@@ -123,9 +123,9 @@ def _state_action(self):
             actions.remove(MOVE_RIGHT)
 
         # No actions possible when found reward
-        mapping[(self._size, self._size)] = []
+        m[(self._size, self._size)] = []
 
         # Cast (int, int) key to (str, str)
-        mapping = {(str(k[0]), str(k[1])): v for k, v in mapping.items()}
+        m = {(str(k[0]), str(k[1])): v for k, v in m.items()}
 
-        return mapping
+        return m
diff --git a/gym_maze/__init__.py b/gym_maze/__init__.py
@@ -1,26 +1,10 @@
 from gym.envs.registration import register
 
-# noinspection PyUnresolvedReferences
-from .maze import Maze  # noqa: F401
-from .maze import PATH_MAPPING, WALL_MAPPING, REWARD_MAPPING  # noqa: F401
-
-ACTION_LOOKUP = {
-    0: 'N',
-    1: 'NE',
-    2: 'E',
-    3: 'SE',
-    4: 'S',
-    5: 'SW',
-    6: 'W',
-    7: 'NW'
-}
-
-
-def find_action_by_direction(direction):
-    for key, val in ACTION_LOOKUP.items():
-        if val == direction:
-            return key
-
+from gym_maze.common import MAZE_PATH as PATH_MAPPING  # noqa: F401
+from gym_maze.common import MAZE_REWARD as REWARD_MAPPING  # noqa: F401
+from gym_maze.common import MAZE_WALL as WALL_MAPPING  # noqa: F401
+from gym_maze.maze import Maze  # noqa: F401
+from gym_maze.rotating_maze import RotatingMaze  # noqa: F401
 
 register(
     id='MazeF1-v0',
@@ -91,3 +75,31 @@ def find_action_by_direction(direction):
     max_episode_steps=50,
     nondeterministic=True
 )
+
+register(
+    id='Maze228-v0',
+    entry_point='gym_maze.envs:Maze228',
+    max_episode_steps=250,
+    nondeterministic=True
+)
+
+register(
+    id='Maze252-v0',
+    entry_point='gym_maze.envs:Maze252',
+    max_episode_steps=250,
+    nondeterministic=True
+)
+
+register(
+    id='Maze288-v0',
+    entry_point='gym_maze.envs:Maze288',
+    max_episode_steps=250,
+    nondeterministic=True
+)
+
+register(
+    id='Maze324-v0',
+    entry_point='gym_maze.envs:Maze324',
+    max_episode_steps=250,
+    nondeterministic=True
+)
diff --git a/gym_maze/common/__init__.py b/gym_maze/common/__init__.py
@@ -0,0 +1,4 @@
+MAZE_PATH = 0
+MAZE_WALL = 1
+MAZE_ANIMAT = 5
+MAZE_REWARD = 9
diff --git a/gym_maze/common/maze_observation_space.py b/gym_maze/common/maze_observation_space.py
@@ -0,0 +1,31 @@
+import random
+
+import gym
+import numpy as np
+
+from gym_maze.common import MAZE_ANIMAT, MAZE_WALL, MAZE_PATH, MAZE_REWARD
+
+
+class MazeObservationSpace(gym.Space):
+    def __init__(self, n):
+        # n is the number of visible neighbour fields, typically 8
+        self.np_random = np.random.RandomState()
+        self.n = n
+        gym.Space.__init__(self, (self.n,), str)
+
+    def seed(self, seed):
+        self.np_random.seed(seed)
+
+    def sample(self):
+        states = map(str, [MAZE_PATH, MAZE_WALL, MAZE_REWARD])
+        return tuple(random.choice(list(states)) for _ in range(self.n))
+
+    def contains(self, x):
+        states = map(str, [MAZE_PATH, MAZE_WALL, MAZE_ANIMAT, MAZE_REWARD])
+        return all(elem in states for elem in x)
+
+    def to_jsonable(self, sample_n):
+        return list(sample_n)
+
+    def from_jsonable(self, sample_n):
+        return tuple(sample_n)
diff --git a/gym_maze/common/maze_renderer.py b/gym_maze/common/maze_renderer.py
@@ -0,0 +1,26 @@
+import numpy as np
+from gym.utils import colorize
+
+from gym_maze.common import MAZE_ANIMAT, MAZE_WALL, MAZE_PATH, MAZE_REWARD
+
+
+def render(out, board: np.ndarray):
+    out.write("\n")
+    board = np.copy(board)
+
+    for row in board:
+        out.write(" ".join(_render_element(el) for el in row))
+        out.write("\n")
+
+
+def _render_element(el):
+    if el == MAZE_WALL:
+        return colorize('■', 'gray')
+    elif el == MAZE_PATH:
+        return colorize('□', 'white')
+    elif el == MAZE_REWARD:
+        return colorize('$', 'yellow')
+    elif el == MAZE_ANIMAT:
+        return colorize('A', 'red')
+    else:
+        return colorize(el, 'cyan')