Update code for ELo-SACv2

LostXine · Oct 13, 2022 · cf79b98 · cf79b98
commit cf79b98
Show file tree

Hide file tree

Showing 29 changed files with 2,230 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__/
+.ipynb_checkpoints/
+*.pyc
+
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Xiang Li
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,49 @@
+# ELo-SAC: Evolving Losses + Soft Actor Critic
+
+This repository is the official implementation of ELo-SACv2 as a part of our paper **Does Self-supervised Learning Really Improve Reinforcement Learning from Pixels?** ([openreview](https://openreview.net/forum?id=fVslVNBfjd8), [arxiv](https://arxiv.org/abs/2206.05266)) at NeurIPS 2022.
+
+Our implementation is based on [SAC+AE](https://github.com/denisyarats/pytorch_sac_ae) by Denis Yarats and [CURL](https://github.com/MishaLaskin/curl) by Michael Laskin. 
+
+You may also want to check ELo-SACv3 at the main branch of this repository, and Atari experiments were done in a separate codebase (Check [ELo-Rainbow](https://github.com/LostXine/elo-rainbow)).
+
+## Installation 
+
+All of the dependencies are in the `conda_env.yml` file. They can be installed manually or with the following command:
+
+```
+conda env create -f conda_env.yml
+```
+
+Change the server IP and port in the `search-server-ip` file if necessary.
+
+## Instructions
+
+First, start the search server using `bash start-server.sh` or the following command:
+
+```
+python3 search-server.py --port 61888 --timeout 24
+```
+
+which will start a HTTP server listening at the given port. 
+The server runs a PSO (Particle Swarm Optimization) algorithm and distributes tasks to the clients with GPUs.
+Timeout means how many hours the server will wait for the client to report results before it assigns the same task to another client.
+Our optimization status is stored at `save_server/evol_rl.npy` and will be automatically loaded.
+One could start a new search by assigning `--path` to a new file.
+
+To start the parameter search on clients, run `bash search.sh`. 
+The client will connect to the HTTP server and request the hyper-parameters for training.
+When the training completes, the client will report the evaluation results to the server and requests a new task.
+
+Run `bash check_status.sh` or `bash check_full_status.sh` to check the search status.
+
+To stop the search, **stop** the current server and **restart** the search server with `--stop True` (see the `server-stop.sh` file).
+All the clients will stop searching after finishing the current search.
+
+To evaluate the optimal combination, run `bash eval-s09.sh` and it will start to train ELo-SAC agents in 6 DMControl environments with 10 random seeds.
+
+See the `train.py` file for training hyper-parameters.
+
+## Contact
+
+1. Issue
+2. email: xiangli8@cs.stonybrook.edu
diff --git a/check_full_status.sh b/check_full_status.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+url=`cat search-server-ip`
+curl -H 'Content-Type: application/json' $url"get_full_status"
diff --git a/check_status.sh b/check_status.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+url=`cat search-server-ip`
+curl -H 'Content-Type: application/json' $url"get_status"
diff --git a/conda_env.yml b/conda_env.yml
@@ -0,0 +1,24 @@
+name: elo-sac
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - python
+  - pytorch
+  - torchvision
+  - cudatoolkit=11.3
+  - absl-py
+  - pyparsing
+  - pillow
+  - pip
+  - pip:
+    - numpy
+    - termcolor
+    - git+https://github.com/deepmind/dm_control.git
+    - git+https://github.com/1nadequacy/dmc2gym.git
+    - tensorboard
+    - imageio
+    - imageio-ffmpeg
+    - scikit-image
+    - setproctitle
+    - scipy
diff --git a/decoder.py b/decoder.py
@@ -0,0 +1,74 @@
+import torch
+import torch.nn as nn
+
+from encoder import OUT_DIM
+
+
+class PixelDecoder(nn.Module):
+    def __init__(self, obs_shape, feature_dim, num_layers=2, num_filters=32):
+        super().__init__()
+
+        self.num_layers = num_layers
+        self.num_filters = num_filters
+        self.out_dim = OUT_DIM[num_layers]
+
+        self.fc = nn.Linear(
+            feature_dim, num_filters * self.out_dim * self.out_dim
+        )
+
+        self.deconvs = nn.ModuleList()
+
+        for i in range(self.num_layers - 1):
+            self.deconvs.append(
+                nn.ConvTranspose2d(num_filters, num_filters, 3, stride=1)
+            )
+        self.deconvs.append(
+            nn.ConvTranspose2d(
+                num_filters, obs_shape[0], 3, stride=2, output_padding=1
+            )
+        )
+
+        self.outputs = dict()
+
+    def forward(self, h):
+        h = torch.relu(self.fc(h))
+        self.outputs['fc'] = h
+
+        deconv = h.view(-1, self.num_filters, self.out_dim, self.out_dim)
+        self.outputs['deconv1'] = deconv
+
+        for i in range(0, self.num_layers - 1):
+            deconv = torch.relu(self.deconvs[i](deconv))
+            self.outputs['deconv%s' % (i + 1)] = deconv
+
+        obs = self.deconvs[-1](deconv)
+        self.outputs['obs'] = obs
+
+        return obs
+
+    def log(self, L, step, log_freq):
+        if step % log_freq != 0:
+            return
+
+        for k, v in self.outputs.items():
+            L.log_histogram('train_decoder/%s_hist' % k, v, step)
+            if len(v.shape) > 2:
+                L.log_image('train_decoder/%s_i' % k, v[0], step)
+
+        for i in range(self.num_layers):
+            L.log_param(
+                'train_decoder/deconv%s' % (i + 1), self.deconvs[i], step
+            )
+        L.log_param('train_decoder/fc', self.fc, step)
+
+
+_AVAILABLE_DECODERS = {'pixel': PixelDecoder}
+
+
+def make_decoder(
+    decoder_type, obs_shape, feature_dim, num_layers, num_filters
+):
+    assert decoder_type in _AVAILABLE_DECODERS
+    return _AVAILABLE_DECODERS[decoder_type](
+        obs_shape, feature_dim, num_layers, num_filters
+    )