Merge pull request #136 from udacity/smartcab/dev

Make the wheel better
udacity · Oct 31, 2016 · 271afa2 · 271afa2
2 parents 4bdd1e2 + 665814c
commit 271afa2
Show file tree

Hide file tree

Showing 6 changed files with 265 additions and 142 deletions.
diff --git a/projects/smartcab/plotting.py b/projects/smartcab/plotting.py
diff --git a/projects/smartcab/smartcab/agent.py b/projects/smartcab/smartcab/agent.py
@@ -8,7 +8,7 @@ class LearningAgent(Agent):
     """ An agent that learns to drive in the Smartcab world.
         This is the object you will be modifying. """ 
 
-    def __init__(self, env, learning=True, epsilon=0.5, alpha=0.5, gamma=0):
+    def __init__(self, env, learning=True, epsilon=1.0, alpha=0.5, gamma=0.0):
         super(LearningAgent, self).__init__(env)     # Set the agent in the evironment 
         self.planner = RoutePlanner(self.env, self)  # Create a route planner
         self.valid_actions = self.env.valid_actions  # The set of valid actions
@@ -31,7 +31,7 @@ def reset(self, destination=None, testing=False):
             'testing' is set to True if testing trials are being used
             once training trials have completed. """
 
-        # Create a series of waypoints
+        # Select the destination as the new location to route to
         self.planner.route_to(destination)
 
         ########### 
@@ -80,6 +80,19 @@ def get_maxQ(self, state):
         return maxQ 
 
 
+    def createQ(self, state):
+        """ The createQ function is called when a state is generated by the agent. """
+
+        ########### 
+        ## TO DO ##
+        ###########
+        # When learning, check if the 'state' is not in the Q-table
+        # If it is not, create a new dictionary for that state
+        #   Then, for each action available, set the initial Q-value to 0.0
+
+        return
+
+
     def choose_action(self, state):
         """ The choose_action function is called when the agent is asked to choose
             which action to take, based on the 'state' the smartcab is in. """
@@ -118,14 +131,15 @@ def update(self):
             environment for a given trial. This function will build the agent
             state, choose an action, receive a reward, and learn if enabled. """
 
-        # Update the agent based on the functions built above.
-        state = self.build_state()                   # Build the agent pre-action state               
-        action = self.choose_action(state)           # Choose an action based on the agent state     
-        reward = self.env.act(self, action)          # Receive a reward based on the action     
-        new_state = self.build_state()               # Build the agent's post-action state
-        self.learn(state, action, reward, new_state) # Run the Q-Learning algorithm
-
-        return        
+        state = self.build_state()                   # Get current state
+        self.createQ(state)                          # Create 'state' in Q-table
+        action = self.choose_action(state)           # Choose an action
+        reward = self.env.act(self, action)          # Receive a reward
+        new_state = self.build_state()               # Get new state
+        self.createQ(new_state)                      # Create 'new_state' in Q-table
+        self.learn(state, action, reward, new_state) # Q-learn
+
+        return
 
 
 def run():
@@ -143,10 +157,10 @@ def run():
     ##############
     # Create the driving agent
     # Flags:
-    #   learning  - set to True to force the driving agent to use Q-learning
-    #     epsilon - continuous value for the exploration factor, default is 0.5
-    #     alpha   - continuous value for the learning rate, default is 0.5
-    #     gamma   - continuous value for the discount factor, default is 0
+    #   learning - set to True to force the driving agent to use Q-learning
+    #   epsilon  - continuous value for the exploration factor, default is 1
+    #   alpha    - continuous value for the learning rate, default is 0.5
+    #   gamma    - continuous value for the discount factor, default is 0
     agent = env.create_agent(LearningAgent)
 
     ##############
@@ -158,16 +172,17 @@ def run():
     ##############
     # Create the simulation
     # Flags:
-    #   update_delay - continuous value (in seconds) for time between steps
+    #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
     #   display      - set to False to disable the GUI if PyGame is enabled
     #   log_metrics  - set to True to log trial and simulation results to /logs
-    sim = Simulator(env, update_delay=0.5)
+    sim = Simulator(env)
 
     ##############
     # Run the simulator
     # Flags:
-    #   n_train - discrete number of training trials to perform, default is 10
-    #   n_test  - discrete number of testing trials to perform, default is 0
+    #   tolerance - epsilon tolerance before beginning testing, default is 0.01 
+    #   n_train   - maximum limit of training trials to perform, default is 300
+    #   n_test    - discrete number of testing trials to perform, default is 0
     sim.run()
 
 if __name__ == '__main__':

diff --git a/projects/smartcab/smartcab/environment.py b/projects/smartcab/smartcab/environment.py
@@ -89,16 +89,23 @@ def __init__(self, verbose=False, num_dummies=100, grid_size = (8, 6)):
         }
 
     def create_agent(self, agent_class, *args, **kwargs):
+        """ When called, create_agent creates an agent in the environment. """
+
         agent = agent_class(self, *args, **kwargs)
         self.agent_states[agent] = {'location': random.choice(self.intersections.keys()), 'heading': (0, 1)}
         return agent
 
     def set_primary_agent(self, agent, enforce_deadline=False):
+        """ When called, set_primary_agent sets 'agent' as the primary agent.
+            The primary agent is the smartcab that is followed in the environment. """
+
         self.primary_agent = agent
         agent.primary_agent = True
         self.enforce_deadline = enforce_deadline
 
     def reset(self, testing=False):
+        """ This function is called at the beginning of a new trial. """
+
         self.done = False
         self.t = 0
 
@@ -134,14 +141,16 @@ def reset(self, testing=False):
             agent.reset(destination=(destination if agent is self.primary_agent else None), testing=testing)
             if agent is self.primary_agent:
                 # Reset metrics for this trial (step data will be set during the step)
-                self.trial_data['testing'] = False
+                self.trial_data['testing'] = testing
                 self.trial_data['initial_deadline'] = deadline
                 self.trial_data['final_deadline'] = deadline
                 self.trial_data['net_reward'] = 0.0
                 self.trial_data['actions'] = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
+                self.trial_data['parameters'] = {'e': agent.epsilon, 'a': agent.alpha, 'g': agent.gamma}
                 self.trial_data['success'] = 0
 
     def step(self):
+        """ This function is called when a time step is taken turing a trial. """
 
         # Pretty print to terminal
         print ""
@@ -173,16 +182,20 @@ def step(self):
             if agent_deadline <= self.hard_time_limit:
                 self.done = True
                 self.success = False
-                print "Environment.step(): Primary agent hit hard time limit ({})! Trial aborted.".format(self.hard_time_limit)
+                if self.verbose: # Debugging
+                    print "Environment.step(): Primary agent hit hard time limit ({})! Trial aborted.".format(self.hard_time_limit)
             elif self.enforce_deadline and agent_deadline <= 0:
                 self.done = True
                 self.success = False
-                if(self.verbose == True): # Debugging
+                if self.verbose: # Debugging
                     print "Environment.step(): Primary agent ran out of time! Trial aborted."
 
         self.t += 1
 
     def sense(self, agent):
+        """ This function is called when information is requested about the sensor
+            inputs from an 'agent' in the environment. """
+
         assert agent in self.agent_states, "Unknown agent!"
 
         state = self.agent_states[agent]
@@ -215,6 +228,8 @@ def sense(self, agent):
         return {'light': light, 'oncoming': oncoming, 'left': left, 'right': right}
 
     def get_deadline(self, agent):
+        """ Returns the deadline remaining for an agent. """
+
         return self.agent_states[agent]['deadline'] if agent is self.primary_agent else None
 
     def act(self, agent, action):
@@ -240,8 +255,8 @@ def act(self, agent, action):
         violation = 0
 
         # Reward scheme
-        # First initialize reward uniformly random from [-2, 2]
-        reward = 4 * random.random() - 2
+        # First initialize reward uniformly random from [-1, 1]
+        reward = 2 * random.random() - 1
 
         # Create a penalty factor as a function of remaining deadline
         # Scales reward multiplicatively from [0, 1]
@@ -292,11 +307,11 @@ def act(self, agent, action):
         # Did the agent attempt a valid move?
         if violation == 0:
             if action == agent.get_next_waypoint(): # Was it the correct action?
-                reward += 2 * (1 - penalty) # (2, 0)
+                reward += 2 - penalty # (2, 1)
             elif action == None and light != 'green': # Was the agent stuck at a red light?
-                reward += 2 * (1 - penalty) # (2, 0)
+                reward += 2 - penalty # (2, 1)
             else: # Valid but incorrect
-                reward -= 2 * penalty # (0, -2)
+                reward += 1 - penalty # (1, 0)
 
             # Move the agent
             if action is not None:
@@ -311,16 +326,15 @@ def act(self, agent, action):
             elif violation == 2: # Major violation
                 reward += -10
             elif violation == 3: # Minor accident
-                reward += -50
+                reward += -20
             elif violation == 4: # Major accident
-                reward += -100
+                reward += -40
 
         # Did agent reach the goal after a valid move?
         if agent is self.primary_agent:
             if state['location'] == state['destination']:
                 # Did agent get to destination before deadline?
                 if state['deadline'] >= 0:
-                    # reward += 10 * (1 - penalty) # (0, 10)
                     self.trial_data['success'] = 1
 
                 # Stop the trial
@@ -353,7 +367,7 @@ def act(self, agent, action):
         return reward
 
     def compute_dist(self, a, b):
-        """ L1 distance between two points in a world that wraps. """
+        """ Compute the Manhattan (L1) distance of a spherical world. """
 
         dx1 = abs(b[0] - a[0])
         dx2 = abs(self.grid_size[0] - dx1)

diff --git a/projects/smartcab/smartcab/planner.py b/projects/smartcab/smartcab/planner.py
@@ -1,20 +1,21 @@
 import random
 
 class RoutePlanner(object):
-    """Complex route planner that is meant for a perpendicular grid network."""
+    """ Complex route planner that is meant for a perpendicular grid network. """
 
     def __init__(self, env, agent):
         self.env = env
         self.agent = agent
         self.destination = None
 
     def route_to(self, destination=None):
+        """ Select the destination if one is provided, otherwise choose a random intersection. """
+
         self.destination = destination if destination is not None else random.choice(self.env.intersections.keys())
-        #print "RoutePlanner.route_to(): destination = {}".format(destination)  # [debug]
 
     def next_waypoint(self):
-        """Creates the next waypoint based on current heading, location,
-           intended destination and L1 distance from destination."""
+        """ Creates the next waypoint based on current heading, location,
+            intended destination and L1 distance from destination. """
 
         # Collect global location details
         bounds = self.env.grid_size