Skip to content

Commit

Permalink
fix selecting action
Browse files Browse the repository at this point in the history
  • Loading branch information
MorvanZhou committed Sep 3, 2018
1 parent a3ee123 commit 1faef74
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
4 changes: 2 additions & 2 deletions contents/2_Q_Learning_maze/RL_brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def choose_action(self, observation):
if np.random.uniform() < self.epsilon:
# choose best action
state_action = self.q_table.loc[observation, :]
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
action = state_action.idxmax()
# some actions may have the same value, randomly choose on in these actions
action = np.random.choice(state_action[state_action == np.max(state_action)].index)
else:
# choose random action
action = np.random.choice(self.actions)
Expand Down
4 changes: 2 additions & 2 deletions contents/3_Sarsa_maze/RL_brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def choose_action(self, observation):
if np.random.rand() < self.epsilon:
# choose best action
state_action = self.q_table.loc[observation, :]
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
action = state_action.idxmax()
# some actions may have the same value, randomly choose on in these actions
action = np.random.choice(state_action[state_action == np.max(state_action)].index)
else:
# choose random action
action = np.random.choice(self.actions)
Expand Down
4 changes: 2 additions & 2 deletions contents/4_Sarsa_lambda_maze/RL_brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def choose_action(self, observation):
if np.random.rand() < self.epsilon:
# choose best action
state_action = self.q_table.loc[observation, :]
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
action = state_action.idxmax()
# some actions may have the same value, randomly choose on in these actions
action = np.random.choice(state_action[state_action == np.max(state_action)].index)
else:
# choose random action
action = np.random.choice(self.actions)
Expand Down

0 comments on commit 1faef74

Please sign in to comment.