fixed a bug in selections

Theo-Jaunet · Jul 30, 2019 · 9ad8812 · 9ad8812
1 parent b153a97
commit 9ad8812
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 42 deletions.
diff --git a/index.html b/index.html
@@ -35,7 +35,7 @@
         gtag('config', 'UA-105697527-4');
     </script>
 
-     <link rel="icon" href="assets/favicon.ico"/>
+    <link rel="icon" href="assets/favicon.ico"/>
 
     <link rel="stylesheet" href="css/normalize.css">
     <link rel="stylesheet" href="css/skeleton.css">
@@ -55,16 +55,23 @@
 
     <div style="width: 35px;height: auto;display: inline-block;margin-right: 20px;position: absolute;left: 515.7px;top: 74px;z-index: 50">
         <img class="play" style="width: 35px;position: absolute;left: -49px;top:-29px" src="assets/play-sign.svg">
-        <input id="timebar" type="range" min="0" max="66" step="1" value="0" style="width: 295px;position: absolute;top: 14px;">
-        <p id="stepctn" style="position: absolute;display: inline-block;left: 118px;width: 88px;top: -18px;user-focus: none;user-select: none">Step --/--</p>
+        <input id="timebar" type="range" min="0" max="66" step="1" value="0"
+               style="width: 295px;position: absolute;top: 14px;">
+        <p id="stepctn"
+           style="position: absolute;display: inline-block;left: 118px;width: 88px;top: -18px;user-focus: none;user-select: none">
+            Step --/--</p>
     </div>
     <div class="row" style="margin-top: 15px">
         <div class="five  columns" style="position: relative">
             <h5> What if an artificial doom Player had reduced memory?</h5>
             <p style="max-height: 100px;">We trained an artificial DOOM player using Deep Reinforcement learning.
-                It has to gather items in a specific order: <br>Green Armor <img src="assets/armorGreen.png" class="item-icon" height="57px"> <img src="assets/arrow.png" class="arrow-icon"> Red Armor
-                <img src="assets/armorRed.png" class="item-icon">  <img src="assets/arrow.png" class="arrow-icon"> Health Pack <img src="assets/hp.png" class="item-icon">  <img src="assets/arrow.png" class="arrow-icon">  Soul-sphere <img src="assets/soul.png"
-                                                                                                                                                                                                                class="item-icon" style="width: 20px"> , with the
+                It has to gather items in a specific order: <br>Green Armor <img src="assets/armorGreen.png"
+                                                                                 class="item-icon" height="57px"> <img
+                        src="assets/arrow.png" class="arrow-icon"> Red Armor
+                <img src="assets/armorRed.png" class="item-icon"> <img src="assets/arrow.png" class="arrow-icon"> Health
+                Pack <img src="assets/hp.png" class="item-icon"> <img src="assets/arrow.png" class="arrow-icon">
+                Soul-sphere <img src="assets/soul.png"
+                                 class="item-icon" style="width: 20px"> , with the
                 shortest path possible.
                 Let’s explore how changing the train agent memory influences its trajectory!</p>
 
@@ -102,14 +109,17 @@ <h5> What if an artificial doom Player had reduced memory?</h5>
             <h5 style="width: 100%" id="card_title"> Full Memory</h5>
             <p id="card_txt" style="margin-top: 20px;width: 100%">
 
-              <!--  In order to play doom, the artificial doom player receive at each time-steps game capture (image) corresponding to its field of view.
-                From this game capture, it decides which action it should do. As the artificial doom player
-                 decides, it builds an inner representation of the previously seen game captures. Such representation,
-                is a vector <i>(1x32)</i> with values in a scale from <span class="cell"></span> inactive to <span class="cell" style="background-color: rgb(191, 84, 47)"></span> active.-->
+                <!--  In order to play doom, the artificial doom player receive at each time-steps game capture (image) corresponding to its field of view.
+                  From this game capture, it decides which action it should do. As the artificial doom player
+                   decides, it builds an inner representation of the previously seen game captures. Such representation,
+                  is a vector <i>(1x32)</i> with values in a scale from <span class="cell"></span> inactive to <span class="cell" style="background-color: rgb(191, 84, 47)"></span> active.-->
             </p>
 
         </div>
-        <div onclick="$('.right-card-arr').trigger('click')" style="cursor:pointer;position: absolute;right: 28px;bottom: -40px"> <a>next <img src="assets/arrow.png" class="arrow-icon">  </a></div>
+        <div onclick="$('.right-card-arr').trigger('click')"
+             style="cursor:pointer;position: absolute;right: 28px;bottom: -40px"><a>next <img src="assets/arrow.png"
+                                                                                              class="arrow-icon"> </a>
+        </div>
     </div>
 
 
@@ -118,33 +128,49 @@ <h5 style="width: 100%" id="card_title"> Full Memory</h5>
             <div style="text-align: left;width: 800px;display: inline-block">
                 <h4>Deep Reinforcement Learning and Memory</h4>
 
-                <p> We used the Advantage Actor Critic (A2C), as presented by <i><a href="https://arxiv.org/abs/1904.01806"> E.Beetching et. al. </a> </i>.
-                    This model learns through trial and error to associate an observation (i.e. matrice of pixels), at the time-step t to an action (at) such as turn left.
+                <p> We used the Advantage Actor Critic (A2C), as presented by <i><a
+                        href="https://arxiv.org/abs/1904.01806"> E.Beetching et. al. </a> </i>.
+                    This model learns through trial and error to associate an observation (i.e. matrice of pixels), at
+                    the time-step t to an action (at) such as turn left.
                     It can achieve this by using neuronal networks with shared parameters <i>theta.</i>
                 </p>
 
                 <p>
                     The model is composed of three stages with different purposes.
-                    First, 3 convolutional layers to analyze and extract features from the inputted game capture (image).
-                    This results in a tensor of 16 features <i>(ft)</i> shaped as 10x4 matrices. Those features are then flatten into a vector of 1x32 using a Fully Connected layer.
+                    First, 3 convolutional layers to analyze and extract features from the inputted game capture
+                    (image).
+                    This results in a tensor of 16 features <i>(ft)</i> shaped as 10x4 matrices. Those features are then
+                    flatten into a vector of 1x32 using a Fully Connected layer.
                     The purpose of such operation is to prepare them for the next stage which is the memory.
- </p>     <p>
-                    The memory of such model is handled by a GRU layer, which takes a vector as input and outputs a hidden state <i>(ht)</i>, a vector of 32 elements.
-                    GRU layers maintains and updates a latent representation through time-steps using the combination of its current input <i>(ft)</i> and its previous hidden state <i>ht-1</i>.
-                    Each element of the hidden states is a quantity within the range[−1,1]. A value close to 0 represents low activity, whereas a value close to any extremity represents high activity.
-                    Hidden states can change their values between two time-steps. Such value changes can be widely observed across hidden states elements during trajectories.
-                    However, it remains unclear which elements, correspond to which representations, and thus, responsible for decisions.
-
-
-                    Finally, the last stage consist of mapping the current hidden state <i>h_t</i>  to a probability distribution over the 5 available actions <i>(right, left, forward, forward+right, forward+left)</i>.
+                </p>
+                <p>
+                    The memory of such model is handled by a GRU layer, which takes a vector as input and outputs a
+                    hidden state <i>(ht)</i>, a vector of 32 elements.
+                    GRU layers maintains and updates a latent representation through time-steps using the combination of
+                    its current input <i>(ft)</i> and its previous hidden state <i>ht-1</i>.
+                    Each element of the hidden states is a quantity within the range[−1,1]. A value close to 0
+                    represents low activity, whereas a value close to any extremity represents high activity.
+                    Hidden states can change their values between two time-steps. Such value changes can be widely
+                    observed across hidden states elements during trajectories.
+                    However, it remains unclear which elements, correspond to which representations, and thus,
+                    responsible for decisions.
+
+
+                    Finally, the last stage consist of mapping the current hidden state <i>h_t</i> to a probability
+                    distribution over the 5 available actions <i>(right, left, forward, forward+right, forward+left)</i>.
                 </p>
 
-                <p> During the training phase, the agent is forced to explore its environment with random actions and can recieve rewards depending on their outcome:
-                    +0.5 for gathering an item in the right order (armor -> health pack -> soul-sphere), and -0.25 for gathering the wrong item.
-                    The combination of the observation, action and reward ordered by time-steps <i>t</i> forms a rollout which is then used to optimize the neural network with gradient descent.
+                <p> During the training phase, the agent is forced to explore its environment with random actions and
+                    can recieve rewards depending on their outcome:
+                    +0.5 for gathering an item in the right order (armor -> health pack -> soul-sphere), and -0.25 for
+                    gathering the wrong item.
+                    The combination of the observation, action and reward ordered by time-steps <i>t</i> forms a rollout
+                    which is then used to optimize the neural network with gradient descent.
                 </p>
 
-                <p> For a more detailed introduction to memory, we recommand reading <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/"> Christopher Olah's blog</a> on LTSMs. </p>
+                <p> For a more detailed introduction to memory, we recommand reading <a
+                        href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/"> Christopher Olah's blog</a>
+                    on LTSMs. </p>
             </div>
         </div>
     </div>
@@ -154,19 +180,23 @@ <h4>Deep Reinforcement Learning and Memory</h4>
             <div style="text-align: left;width: 800px;display: inline-block">
                 <h4> Why Manipulating the Memory?</h4>
                 <p>
-                    As detailed in the previous section, the agent's actions are directly linked to its memory, therefore,
+                    As detailed in the previous section, the agent's actions are directly linked to its memory,
+                    therefore,
                     each of its decisions may be justified by its current hidden state <i>ht</i>, and its values.
-                    However, such memory is hard to understand due to the fact that it is time-varying and using abstract values.
+                    However, such memory is hard to understand due to the fact that it is time-varying and using
+                    abstract values.
                     Being able to erase memory elements, and observing how the agent behaves without them,
-                    may help understanding and interpreting their roles in the decision process and information they may represent.
+                    may help understanding and interpreting their roles in the decision process and information they may
+                    represent.
 
-                    In addition, the hidden states length is manually set by the model's builder, therefore such value maybe unfit to the agent's needs, which may results in unused or redundant elements.
-                    Removing them, and thus reducing the memory length can reduce the computation power needed by the agent, and both reduce the training time and the energy consumption footprint.
+                    In addition, the hidden states length is manually set by the model's builder, therefore such value
+                    maybe unfit to the agent's needs, which may results in unused or redundant elements.
+                    Removing them, and thus reducing the memory length can reduce the computation power needed by the
+                    agent, and both reduce the training time and the energy consumption footprint.
 
                 </p>
 
 
-
             </div>
         </div>
     </div>
@@ -176,10 +206,13 @@ <h4> Why Manipulating the Memory?</h4>
             <div style="text-align: left;width: 800px;display: inline-block">
                 <h4> How do we Erase Memory Elements?</h4>
 
-                <p> In order to simulate a reduced memory we implemented a method that allows to generate trajectories from agents with limited memory.
+                <p> In order to simulate a reduced memory we implemented a method that allows to generate trajectories
+                    from agents with limited memory.
                     Technically, we hijack the memory vectors by applying a mask to them before each decisions.
-                    This mask is a 1x32 vector, with its values either set to 0 (remove the element) or set to 1 (keep the element).
-                    Each memory element is multiplied by its corresponding mask element, and therefore either have values as they should, or values constantly equal to 0 (i.e., inactive).
+                    This mask is a 1x32 vector, with its values either set to 0 (remove the element) or set to 1 (keep
+                    the element).
+                    Each memory element is multiplied by its corresponding mask element, and therefore either have
+                    values as they should, or values constantly equal to 0 (i.e., inactive).
                     The outcome of such operation is then used by the model to decide which action it should do.
                     This method allows to change the agent's memory without having to retrain a model.
 
@@ -194,17 +227,19 @@ <h4> How do we Erase Memory Elements?</h4>
 
     <div class="row" style="text-align: center">
         <div class="twelve columns" style="margin-top: 15%;display: inline-block">
-            <div style="text-align: left;width: 800px;display: inline-block">
-                This work takes place in Théo jaunet's Ph.D., and continues previous work such as DRLViz (<a href="https://github.com/sical/drlviz">github</a>)
+            <div style=" text-align: left;width: 800px;margin-bottom: 50px;display: inline-block">
                 <h5>Authors</h5>
                 <p>Théo Jaunet, Romain Vuillemot and Christian Wolf</p>
+                This work takes place in Théo jaunet's Ph.D., and continues previous works such as DRLViz (<a
+                    href="https://github.com/sical/drlviz">github</a>)
             </div>
+
         </div>
     </div>
 
 </div>
-    <!-- End Document
-      –––––––––––––––––––––––––––––––––––––––––––––––––– -->
+<!-- End Document
+  –––––––––––––––––––––––––––––––––––––––––––––––––– -->
 </body>
 <script src="js/utils.js"></script>
 <script src="js/drawVector.js"></script>

diff --git a/js/drawVector.js b/js/drawVector.js
@@ -11,6 +11,7 @@ let goplz = false;
 let tri = {
     'act': [17, 16, 4, 12, 0, 18, 11, 7, 15, 19, 30, 3, 5, 8, 31, 25, 14, 29, 22, 6, 1, 24, 2, 20, 10, 28, 21, 9, 23, 27, 26, 13],
     'ch': [8, 29, 12, 25, 11, 7, 15, 24, 19, 3, 14, 22, 20, 30, 2, 13, 21, 23, 27, 4, 0, 18, 10, 9, 5, 6, 16, 28, 31, 26, 1, 17]
+    // 5,7,8,11,12,15,21,24,25,29
 };
 let is_new = false;
 

diff --git a/js/main.js b/js/main.js
@@ -12,7 +12,8 @@ let tops = [];
 let diy = [];
 let random = [];
 let mains = [];
-let selecs_list = ['after','rest0', 'rest1', 'only'];
+// let selecs_list = ['after','rest0', 'rest1', 'only'];
+let selecs_list = ['rest0', 'rest1', 'only'];
 let selecs = [];
 
 let area = d3.line()
@@ -93,6 +94,7 @@ function load_data(data, index) {
             break;
         case  "3":
             if (selecs[iz] === undefined && data !== mains[0]) {
+                console.log('lalal');
                 selecs[iz] = data;
             }
             break;