-
Notifications
You must be signed in to change notification settings - Fork 0
/
SimpleNeocorticalNetwork.py
114 lines (90 loc) · 5.31 KB
/
SimpleNeocorticalNetwork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import theano
import theano.tensor as T
import numpy as np
from Tools import binomial_f, uniform_f
theano.config.floatX = 'float32'
# Ans et al. (1997): Error measure the cross-entropy. Learning rate: 0.01, momentum term: 0.5.
class SimpleNeocorticalNetwork:
def __init__(self, in_dim, h_dim, out_dim, alpha, momentum):
self.alpha = alpha
self.momentum = momentum
self.dims = [in_dim, h_dim, out_dim]
_in = np.zeros((1, in_dim), dtype=np.float32)
_h = np.zeros((1, h_dim), dtype=np.float32)
_out = np.zeros((1, out_dim), dtype=np.float32)
self._in = theano.shared(name='_in', value=_in.astype(theano.config.floatX), borrow=True)
self._h = theano.shared(name='_h', value=_h.astype(theano.config.floatX), borrow=True)
self._out = theano.shared(name='_out', value=_out.astype(theano.config.floatX), borrow=True)
in_h_Ws = uniform_f(in_dim, h_dim)
h_out_Ws = uniform_f(h_dim, out_dim)
self.in_h_Ws = theano.shared(name='in_h_Ws', value=in_h_Ws.astype(theano.config.floatX), borrow=True)
self.h_out_Ws = theano.shared(name='h_out_Ws', value=h_out_Ws.astype(theano.config.floatX), borrow=True)
prev_dW1 = np.zeros_like(in_h_Ws, dtype=np.float32)
prev_dW2 = np.zeros_like(h_out_Ws, dtype=np.float32)
self.prev_delta_W_in_h = theano.shared(name='prev_delta_W_in_h', value=prev_dW1.astype(theano.config.floatX),
borrow=True)
self.prev_delta_W_h_out = theano.shared(name='prev_delta_W_h_out', value=prev_dW2.astype(theano.config.floatX),
borrow=True)
new_input = T.fmatrix()
input_hidden_Ws = T.fmatrix()
hidden_output_Ws = T.fmatrix()
sum_h = new_input.dot(input_hidden_Ws)
next_h = T.tanh(sum_h)
sum_out = next_h.dot(hidden_output_Ws)
next_out = T.tanh(sum_out)
self.feed_forward = theano.function([new_input, input_hidden_Ws, hidden_output_Ws],
updates=[(self._in, new_input), (self._h, next_h), (self._out, next_out)])
Ws_h_out = T.fmatrix()
Ws_in_h = T.fmatrix()
prev_delta_W_in_h = T.fmatrix()
prev_delta_W_h_out = T.fmatrix()
o_in = T.fmatrix()
o_h = T.fmatrix()
o_out = T.fmatrix()
target_out = T.fmatrix()
# L2 norm
tmp = o_out-target_out
error = tmp
tmp_grad_h_out = np.ones_like(o_out, dtype=np.float32) / T.cosh(o_out)
diracs_out = error * tmp_grad_h_out * tmp_grad_h_out
delta_W_h_out = - self.alpha * o_h.T.dot(diracs_out) + self.momentum * prev_delta_W_h_out
new_Ws_h_out = Ws_h_out + delta_W_h_out
tmp_grad_in_h = np.ones_like(o_h, dtype=np.float32) / T.cosh(o_h)
diracs_h_layer_terms = tmp_grad_in_h * tmp_grad_in_h
diracs_h_chain = diracs_out.dot(Ws_h_out.T)
diracs_h = diracs_h_chain * diracs_h_layer_terms
delta_W_in_h = - self.alpha * o_in.T.dot(diracs_h) + self.momentum * prev_delta_W_in_h
new_Ws_in_h = Ws_in_h + delta_W_in_h
self.back_propagate = theano.function([Ws_in_h, Ws_h_out, o_in, o_h, o_out, target_out,
prev_delta_W_in_h, prev_delta_W_h_out],
updates=[(self.h_out_Ws, new_Ws_h_out), (self.in_h_Ws, new_Ws_in_h),
(self.prev_delta_W_in_h, delta_W_in_h),
(self.prev_delta_W_h_out, delta_W_h_out)])
# self.set_input = theano.function([new_input], updates=[(self._in, new_input)])
def train(self, IOPairs):
for input_pattern, output_pattern in IOPairs:
# no learning criteria, only propagate once?
self.feed_forward(input_pattern, self.in_h_Ws.get_value(return_internal_type=True),
self.h_out_Ws.get_value(return_internal_type=True))
self.back_propagate(self.in_h_Ws.get_value(return_internal_type=True),
self.h_out_Ws.get_value(return_internal_type=True),
self._in.get_value(return_internal_type=True),
self._h.get_value(return_internal_type=True),
self._out.get_value(return_internal_type=True), output_pattern,
self.prev_delta_W_in_h.get_value(return_internal_type=True),
self.prev_delta_W_h_out.get_value(return_internal_type=True))
def get_random_IO(self):
# random input
random_in_pattern = binomial_f(1, self.dims[0], 0.5)
random_in_pattern = random_in_pattern * 2 - np.ones_like(random_in_pattern)
# print "random_in_pattern:", random_in_pattern
return self.get_IO(random_in_pattern)
def get_IO(self, input_pattern):
self.feed_forward(input_pattern, self.in_h_Ws.get_value(), self.h_out_Ws.get_value())
corresponding_output = self._out.get_value()
return [input_pattern, corresponding_output]
def print_layers(self):
print "\nPrinting layer activation values:"
print "input:\t", self._in.get_value()
print "hidden:\t", self._h.get_value()
print "output:\t", self._out.get_value()