fix selecting action

TTSArrows · Sep 3, 2018 · 1faef74 · 1faef74
1 parent a3ee123
commit 1faef74
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/contents/2_Q_Learning_maze/RL_brain.py b/contents/2_Q_Learning_maze/RL_brain.py
@@ -23,8 +23,8 @@ def choose_action(self, observation):
         if np.random.uniform() < self.epsilon:
             # choose best action
             state_action = self.q_table.loc[observation, :]
-            state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
-            action = state_action.idxmax()
+            # some actions may have the same value, randomly choose on in these actions
+            action = np.random.choice(state_action[state_action == np.max(state_action)].index)
         else:
             # choose random action
             action = np.random.choice(self.actions)

diff --git a/contents/3_Sarsa_maze/RL_brain.py b/contents/3_Sarsa_maze/RL_brain.py
@@ -35,8 +35,8 @@ def choose_action(self, observation):
         if np.random.rand() < self.epsilon:
             # choose best action
             state_action = self.q_table.loc[observation, :]
-            state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
-            action = state_action.idxmax()
+            # some actions may have the same value, randomly choose on in these actions
+            action = np.random.choice(state_action[state_action == np.max(state_action)].index)
         else:
             # choose random action
             action = np.random.choice(self.actions)

diff --git a/contents/4_Sarsa_lambda_maze/RL_brain.py b/contents/4_Sarsa_lambda_maze/RL_brain.py
@@ -35,8 +35,8 @@ def choose_action(self, observation):
         if np.random.rand() < self.epsilon:
             # choose best action
             state_action = self.q_table.loc[observation, :]
-            state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
-            action = state_action.idxmax()
+            # some actions may have the same value, randomly choose on in these actions
+            action = np.random.choice(state_action[state_action == np.max(state_action)].index)
         else:
             # choose random action
             action = np.random.choice(self.actions)