Skip to content
This repository has been archived by the owner on Feb 24, 2022. It is now read-only.

Commit

Permalink
Merge pull request #136 from udacity/smartcab/dev
Browse files Browse the repository at this point in the history
Make the wheel better
  • Loading branch information
jared-weed committed Oct 31, 2016
2 parents 4bdd1e2 + 665814c commit 271afa2
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 142 deletions.
87 changes: 0 additions & 87 deletions projects/smartcab/plotting.py

This file was deleted.

51 changes: 33 additions & 18 deletions projects/smartcab/smartcab/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class LearningAgent(Agent):
""" An agent that learns to drive in the Smartcab world.
This is the object you will be modifying. """

def __init__(self, env, learning=True, epsilon=0.5, alpha=0.5, gamma=0):
def __init__(self, env, learning=True, epsilon=1.0, alpha=0.5, gamma=0.0):
super(LearningAgent, self).__init__(env) # Set the agent in the evironment
self.planner = RoutePlanner(self.env, self) # Create a route planner
self.valid_actions = self.env.valid_actions # The set of valid actions
Expand All @@ -31,7 +31,7 @@ def reset(self, destination=None, testing=False):
'testing' is set to True if testing trials are being used
once training trials have completed. """

# Create a series of waypoints
# Select the destination as the new location to route to
self.planner.route_to(destination)

###########
Expand Down Expand Up @@ -80,6 +80,19 @@ def get_maxQ(self, state):
return maxQ


def createQ(self, state):
""" The createQ function is called when a state is generated by the agent. """

###########
## TO DO ##
###########
# When learning, check if the 'state' is not in the Q-table
# If it is not, create a new dictionary for that state
# Then, for each action available, set the initial Q-value to 0.0

return


def choose_action(self, state):
""" The choose_action function is called when the agent is asked to choose
which action to take, based on the 'state' the smartcab is in. """
Expand Down Expand Up @@ -118,14 +131,15 @@ def update(self):
environment for a given trial. This function will build the agent
state, choose an action, receive a reward, and learn if enabled. """

# Update the agent based on the functions built above.
state = self.build_state() # Build the agent pre-action state
action = self.choose_action(state) # Choose an action based on the agent state
reward = self.env.act(self, action) # Receive a reward based on the action
new_state = self.build_state() # Build the agent's post-action state
self.learn(state, action, reward, new_state) # Run the Q-Learning algorithm

return
state = self.build_state() # Get current state
self.createQ(state) # Create 'state' in Q-table
action = self.choose_action(state) # Choose an action
reward = self.env.act(self, action) # Receive a reward
new_state = self.build_state() # Get new state
self.createQ(new_state) # Create 'new_state' in Q-table
self.learn(state, action, reward, new_state) # Q-learn

return


def run():
Expand All @@ -143,10 +157,10 @@ def run():
##############
# Create the driving agent
# Flags:
# learning - set to True to force the driving agent to use Q-learning
# epsilon - continuous value for the exploration factor, default is 0.5
# alpha - continuous value for the learning rate, default is 0.5
# gamma - continuous value for the discount factor, default is 0
# learning - set to True to force the driving agent to use Q-learning
# epsilon - continuous value for the exploration factor, default is 1
# alpha - continuous value for the learning rate, default is 0.5
# gamma - continuous value for the discount factor, default is 0
agent = env.create_agent(LearningAgent)

##############
Expand All @@ -158,16 +172,17 @@ def run():
##############
# Create the simulation
# Flags:
# update_delay - continuous value (in seconds) for time between steps
# update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
# display - set to False to disable the GUI if PyGame is enabled
# log_metrics - set to True to log trial and simulation results to /logs
sim = Simulator(env, update_delay=0.5)
sim = Simulator(env)

##############
# Run the simulator
# Flags:
# n_train - discrete number of training trials to perform, default is 10
# n_test - discrete number of testing trials to perform, default is 0
# tolerance - epsilon tolerance before beginning testing, default is 0.01
# n_train - maximum limit of training trials to perform, default is 300
# n_test - discrete number of testing trials to perform, default is 0
sim.run()

if __name__ == '__main__':
Expand Down
38 changes: 26 additions & 12 deletions projects/smartcab/smartcab/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,23 @@ def __init__(self, verbose=False, num_dummies=100, grid_size = (8, 6)):
}

def create_agent(self, agent_class, *args, **kwargs):
""" When called, create_agent creates an agent in the environment. """

agent = agent_class(self, *args, **kwargs)
self.agent_states[agent] = {'location': random.choice(self.intersections.keys()), 'heading': (0, 1)}
return agent

def set_primary_agent(self, agent, enforce_deadline=False):
""" When called, set_primary_agent sets 'agent' as the primary agent.
The primary agent is the smartcab that is followed in the environment. """

self.primary_agent = agent
agent.primary_agent = True
self.enforce_deadline = enforce_deadline

def reset(self, testing=False):
""" This function is called at the beginning of a new trial. """

self.done = False
self.t = 0

Expand Down Expand Up @@ -134,14 +141,16 @@ def reset(self, testing=False):
agent.reset(destination=(destination if agent is self.primary_agent else None), testing=testing)
if agent is self.primary_agent:
# Reset metrics for this trial (step data will be set during the step)
self.trial_data['testing'] = False
self.trial_data['testing'] = testing
self.trial_data['initial_deadline'] = deadline
self.trial_data['final_deadline'] = deadline
self.trial_data['net_reward'] = 0.0
self.trial_data['actions'] = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
self.trial_data['parameters'] = {'e': agent.epsilon, 'a': agent.alpha, 'g': agent.gamma}
self.trial_data['success'] = 0

def step(self):
""" This function is called when a time step is taken turing a trial. """

# Pretty print to terminal
print ""
Expand Down Expand Up @@ -173,16 +182,20 @@ def step(self):
if agent_deadline <= self.hard_time_limit:
self.done = True
self.success = False
print "Environment.step(): Primary agent hit hard time limit ({})! Trial aborted.".format(self.hard_time_limit)
if self.verbose: # Debugging
print "Environment.step(): Primary agent hit hard time limit ({})! Trial aborted.".format(self.hard_time_limit)
elif self.enforce_deadline and agent_deadline <= 0:
self.done = True
self.success = False
if(self.verbose == True): # Debugging
if self.verbose: # Debugging
print "Environment.step(): Primary agent ran out of time! Trial aborted."

self.t += 1

def sense(self, agent):
""" This function is called when information is requested about the sensor
inputs from an 'agent' in the environment. """

assert agent in self.agent_states, "Unknown agent!"

state = self.agent_states[agent]
Expand Down Expand Up @@ -215,6 +228,8 @@ def sense(self, agent):
return {'light': light, 'oncoming': oncoming, 'left': left, 'right': right}

def get_deadline(self, agent):
""" Returns the deadline remaining for an agent. """

return self.agent_states[agent]['deadline'] if agent is self.primary_agent else None

def act(self, agent, action):
Expand All @@ -240,8 +255,8 @@ def act(self, agent, action):
violation = 0

# Reward scheme
# First initialize reward uniformly random from [-2, 2]
reward = 4 * random.random() - 2
# First initialize reward uniformly random from [-1, 1]
reward = 2 * random.random() - 1

# Create a penalty factor as a function of remaining deadline
# Scales reward multiplicatively from [0, 1]
Expand Down Expand Up @@ -292,11 +307,11 @@ def act(self, agent, action):
# Did the agent attempt a valid move?
if violation == 0:
if action == agent.get_next_waypoint(): # Was it the correct action?
reward += 2 * (1 - penalty) # (2, 0)
reward += 2 - penalty # (2, 1)
elif action == None and light != 'green': # Was the agent stuck at a red light?
reward += 2 * (1 - penalty) # (2, 0)
reward += 2 - penalty # (2, 1)
else: # Valid but incorrect
reward -= 2 * penalty # (0, -2)
reward += 1 - penalty # (1, 0)

# Move the agent
if action is not None:
Expand All @@ -311,16 +326,15 @@ def act(self, agent, action):
elif violation == 2: # Major violation
reward += -10
elif violation == 3: # Minor accident
reward += -50
reward += -20
elif violation == 4: # Major accident
reward += -100
reward += -40

# Did agent reach the goal after a valid move?
if agent is self.primary_agent:
if state['location'] == state['destination']:
# Did agent get to destination before deadline?
if state['deadline'] >= 0:
# reward += 10 * (1 - penalty) # (0, 10)
self.trial_data['success'] = 1

# Stop the trial
Expand Down Expand Up @@ -353,7 +367,7 @@ def act(self, agent, action):
return reward

def compute_dist(self, a, b):
""" L1 distance between two points in a world that wraps. """
""" Compute the Manhattan (L1) distance of a spherical world. """

dx1 = abs(b[0] - a[0])
dx2 = abs(self.grid_size[0] - dx1)
Expand Down
9 changes: 5 additions & 4 deletions projects/smartcab/smartcab/planner.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
import random

class RoutePlanner(object):
"""Complex route planner that is meant for a perpendicular grid network."""
""" Complex route planner that is meant for a perpendicular grid network. """

def __init__(self, env, agent):
self.env = env
self.agent = agent
self.destination = None

def route_to(self, destination=None):
""" Select the destination if one is provided, otherwise choose a random intersection. """

self.destination = destination if destination is not None else random.choice(self.env.intersections.keys())
#print "RoutePlanner.route_to(): destination = {}".format(destination) # [debug]

def next_waypoint(self):
"""Creates the next waypoint based on current heading, location,
intended destination and L1 distance from destination."""
""" Creates the next waypoint based on current heading, location,
intended destination and L1 distance from destination. """

# Collect global location details
bounds = self.env.grid_size
Expand Down
Loading

0 comments on commit 271afa2

Please sign in to comment.