Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Niels Justesen authored and Niels Justesen committed Mar 19, 2018
0 parents commit f2370e9
Show file tree
Hide file tree
Showing 68 changed files with 31,250 additions and 0 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Byte-compiled / optimized / DLL files
__pycache__/

# Vizdoom files
_vizdoom.ini
*.wad.bak

# Log files
logs/

# Network models
trained_models/
*.pt

# Mac specifig ignores
.DS_Store

# Archives
.zip
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Rarity of Events
Code for the Rarity of Events method that rewards the agent based on the temporal rarity of events.
Pre-trained models are found in the models directory.

# Packages to install
pytorch
scipy
sdl2
vizdoom

## Training A2C baseline
~~~~
# A2C baseline
python main.py --num-processes 16 --config-path scenario/deathmatch.cfg --num-frames 75000000 --no-vis
# A2C+RoE
python main.py --num-processes 16 --config-path scenario/deathmatch.cfg --num-frames 75000000 --no-vis --roe
~~~~

## Running the agent
~~~~
# A2C baseline
python enjoy.py --config-path scenario/deatmatch.cfg
# A2C+RoE
python enjoy.py --config-path scenario/deatmatch.cfg --roe
~~~~
77 changes: 77 additions & 0 deletions arguments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import argparse

import torch


def get_args():
parser = argparse.ArgumentParser(description='RL')
parser.add_argument('--lr', type=float, default=7e-4,
help='learning rate (default: 7e-4)')
parser.add_argument('--eps', type=float, default=1e-5,
help='RMSprop optimizer epsilon (default: 1e-5)')
parser.add_argument('--alpha', type=float, default=0.99,
help='RMSprop optimizer apha (default: 0.99)')
parser.add_argument('--gamma', type=float, default=0.99,
help='discount factor for rewards (default: 0.99)')
parser.add_argument('--entropy-coef', type=float, default=0.01,
help='entropy term coefficient (default: 0.01)')
parser.add_argument('--value-loss-coef', type=float, default=0.5,
help='value loss coefficient (default: 0.5)')
parser.add_argument('--max-grad-norm', type=float, default=0.5,
help='value loss coefficient (default: 0.5)')
parser.add_argument('--seed', type=int, default=1,
help='random seed (default: 1)')
parser.add_argument('--num-processes', type=int, default=16,
help='how many training CPU processes to use (default: 16)')
parser.add_argument('--num-steps', type=int, default=20,
help='number of forward steps in A2C (default: 20)')
parser.add_argument('--ppo-epoch', type=int, default=4,
help='number of ppo epochs (default: 4)')
parser.add_argument('--batch-size', type=int, default=64,
help='ppo batch size (default: 64)')
parser.add_argument('--clip-param', type=float, default=0.2,
help='ppo clip parameter (default: 0.2)')
parser.add_argument('--num-stack', type=int, default=1,
help='number of frames to stack (default: 1)')
parser.add_argument('--log-interval', type=int, default=100,
help='log interval, one log per n updates (default: 100)')
parser.add_argument('--save-interval', type=int, default=100,
help='save interval, one save per n updates (default: 100)')
parser.add_argument('--vis-interval', type=int, default=100,
help='vis interval, one log per n updates (default: 100)')
parser.add_argument('--num-frames', type=int, default=10e6,
help='number of frames to train (default: 10e6)')
parser.add_argument('--env-name', default='VizDoom',
help='environment to train on (default: VizDoom)')
parser.add_argument('--config-path', default='./scenarios/basic.cfg',
help='vizdoom configuration file path (default: ./scenarios/basic.cfg)')
parser.add_argument('--source-models-path', default='./models',
help='directory from where to load source task models [A2T only] (default: ./models)')
parser.add_argument('--log-dir', default='./',
help='directory to save agent logs (default: /tmp/vizdoom)')
parser.add_argument('--save-dir', default='./models',
help='directory to save agent logs (default: ./models/)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--no-vis', action='store_true', default=False,
help='disables visdom visualization')
parser.add_argument('--resume', action='store_true', default=False,
help='Resume training')
parser.add_argument('--shaped', action='store_true', default=False,
help='Trains using shaped intrinsic reward')
parser.add_argument('--bots', action='store_true', default=False,
help='Is the scenario with bots? (default: False)')
parser.add_argument('--roe', action='store_true', default=False,
help='Trains using Rairty of Events (default: False)')
parser.add_argument('--visual', action='store_true', default=False,
help='Trains with visuals (default: False)')
parser.add_argument('--num-events', type=int, default=26,
help='number of events to record (default: 26)')
parser.add_argument('--num-vars', type=int, default=17,
help='number of vars to record (default: 17)')
args = parser.parse_args()

args.cuda = not args.no_cuda and torch.cuda.is_available()
args.vis = not args.no_vis

return args
85 changes: 85 additions & 0 deletions distributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from utils import AddBias


class Categorical(nn.Module):
def __init__(self, num_inputs, num_outputs):
super(Categorical, self).__init__()
self.linear = nn.Linear(num_inputs, num_outputs)

def forward(self, x):
x = self.linear(x)
return x

def sample(self, x, deterministic):
x = self(x)

probs = F.softmax(x)
if deterministic is False:
action = probs.multinomial()
else:
action = probs.max(1)[1]
return action

def logprobs_and_entropy(self, x, actions):
x = self(x)

log_probs = F.log_softmax(x)
probs = F.softmax(x)

action_log_probs = log_probs.gather(1, actions)

dist_entropy = -(log_probs * probs).sum(-1).mean()
return action_log_probs, dist_entropy


class DiagGaussian(nn.Module):
def __init__(self, num_inputs, num_outputs):
super(DiagGaussian, self).__init__()
self.fc_mean = nn.Linear(num_inputs, num_outputs)
self.logstd = AddBias(torch.zeros(num_outputs))

def forward(self, x):
x = self.fc_mean(x)
action_mean = x

# An ugly hack for my KFAC implementation.
zeros = Variable(torch.zeros(x.size()), volatile=x.volatile)
if x.is_cuda:
zeros = zeros.cuda()

x = self.logstd(zeros)
action_logstd = x
return action_mean, action_logstd

def sample(self, x, deterministic):
action_mean, action_logstd = self(x)

action_std = action_logstd.exp()

noise = Variable(torch.randn(action_std.size()))
if action_std.is_cuda:
noise = noise.cuda()

if deterministic is False:
action = action_mean + action_std * noise
else:
action = action_mean
return action

def logprobs_and_entropy(self, x, actions):
action_mean, action_logstd = self(x)

action_std = action_logstd.exp()

action_log_probs = -0.5 * ((actions - action_mean) / action_std).pow(2) - 0.5 * math.log(2 * math.pi) - action_logstd
action_log_probs = action_log_probs.sum(1, keepdim=True)
dist_entropy = 0.5 + math.log(2 * math.pi) + action_log_probs
dist_entropy = dist_entropy.sum(-1).mean()

return action_log_probs, dist_entropy
175 changes: 175 additions & 0 deletions enjoy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import argparse
import os
import pickle

import torch
from torch.autograd import Variable

from envs import make_env
from vec_env import VecEnv
from time import sleep
import matplotlib.animation as animation
import numpy as np
import scipy.misc
from pylab import *

parser = argparse.ArgumentParser(description='RL')
parser.add_argument('--algo', default='a2c',
help='algorithm to use: a2c | acktr')
parser.add_argument('--seed', type=int, default=1,
help='random seed (default: 1)')
parser.add_argument('--num-stack', type=int, default=1,
help='number of frames to stack (default: 1)')
parser.add_argument('--log-interval', type=int, default=10,
help='log interval, one log per n updates (default: 10)')
parser.add_argument('--env-name', default='VizDoom',
help='environment to train on (default: VizDoom)')
parser.add_argument('--config-path', default='./scenarios/deady_corridor.cfg',
help='vizdoom configuration file path (default: ./scenarios/basic.cfg)')
parser.add_argument('--load-dir', default='./models/',
help='directory with models')
parser.add_argument('--log-dir', default='/tmp/doom/',
help='directory to save agent logs (default: /tmp/doom)')
parser.add_argument('--roe', action='store_true', default=False,
help='Loads the RoE model (default: False)')
parser.add_argument('--demo', action='store_true', default=True,
help='Play in real-time with visuals (default: False)')
parser.add_argument('--record', action='store_true', default=False,
help='Record game (default: False)')
parser.add_argument('--heatmap', action='store_true', default=False,
help='Saves data for heatmaps (default: False)')
args = parser.parse_args()

try:
os.makedirs(args.log_dir)
except OSError:
pass

envs = VecEnv([make_env(0, config_file_path=args.config_path, visual=args.demo)], record=args.record)

scenario = args.config_path.split("/")[1].split(".")[0]
exp_name = scenario + ("_event" if args.roe else "")

print("Scenario: " + scenario)
print("Experiment: " + exp_name)

if args.roe:
model_name = args.algo + "/vizdoom_" + scenario.split("-")[0] + "_event"
else:
model_name = args.algo + "/vizdoom_" + scenario.split("-")[0]

print("Model: " + model_name)
actor_critic = torch.load(os.path.join(args.load_dir, model_name + ".pt"))

actor_critic.eval()

obs_shape = envs.observation_space_shape
obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])
current_obs = torch.zeros(1, *obs_shape)

if args.record:
try:
os.remove("recording_" + scenario + ".lmp")
except Exception as e:
pass

def update_current_obs(obs):
shape_dim0 = envs.observation_space_shape[0]
obs = torch.from_numpy(obs).float()
if args.num_stack > 1:
current_obs[:, :-shape_dim0] = current_obs[:, shape_dim0:]
current_obs[:, -shape_dim0:] = obs

obs = envs.reset()
update_current_obs(obs)
#vars = envs.get_all_game_variables()
#vars = torch.from_numpy(to_input_vars(vars)).float()

num_episodes = 10 if not args.record else 1
total_rewards = []
episode_cnt = 0
episode_reward = 0.0
total_kills = []

frame = 0

deterministic = True
num_of_events = 26
episode_events = np.zeros(num_of_events)

positions = []
positions_episode = []
position = envs.get_position()[0]
positions_episode.append(position)

while episode_cnt < num_episodes:
if args.demo:
sleep(1/24)

# Save frames
#scipy.misc.imsave('./frames/' + scenario + '_' + str(frame) + '.jpg', current_obs.numpy()[0][0])
frame += 1

#actor_critic.vars = Variable(vars)
value, action = actor_critic.act(Variable(current_obs, volatile=True),
deterministic=deterministic)
if deterministic:
cpu_actions = action.data.cpu().numpy() # Enable for deterministic play
else:
cpu_actions = action.data.squeeze(1).cpu().numpy()

# Obser reward and next obs
obs, reward, done, _, events = envs.step([cpu_actions[0]])

# Fix reward
if scenario in ["deathmatch", "my_way_home"]:
reward[0] *= 100
if scenario == "deadly_corridor":
reward[0] = 1 if events[0][2] >= 1 else 0

#print('Frame', frame)
#print ('Reward:', reward[0] * 100)

position = envs.get_position()[0]
positions_episode.append(position)

if events[0][15] > 0:
print("kill: " + str(events[0][15]))

#vars = torch.from_numpy(np.array(to_input_vars(vars))).float()
episode_reward += reward[0] * 100
episode_events = episode_events + np.array(events[0])

if done:
#print("Reward: " + str(episode_reward))
positions.append(np.copy(positions_episode))
positions_episode = []

total_rewards.append(episode_reward)
episode_cnt += 1
episode_reward = 0.0
episode_game_variables = envs.get_all_game_variables()[0]
total_kills.append(episode_events[15])
episode_events = np.zeros(num_of_events)

obs = envs.reset()

position = envs.get_position()[0]
positions_episode.append(position)

#actor_critic = torch.load(os.path.join(args.load_dir, log_file_name.split(".log")[0] + ".pt"))
#actor_critic.eval()

update_current_obs(obs)

print ('Avg reward:', np.mean(total_rewards))
print ('Std. dev reward:', np.std(total_rewards))
print ('Avg kills:', np.mean(total_kills))
print ('Std. dev. kills:', np.std(total_kills))

heat_name = scenario + "_" + model_name + ".p"

if args.heatmap:
pickle.dump( positions, open( "./heat_data/" + exp_name, "wb" ) )

envs.close()
Loading

0 comments on commit f2370e9

Please sign in to comment.