├── kalmann
    ├── __init__.py
    └── knn.py
├── setup.py
├── README.md
├── LICENSE
├── demos
    ├── 1d_fit.py
    ├── 2d_classify.py
    ├── 1d_fit_analysis.py
    └── 3d_dynamic.py
└── .gitignore


/kalmann/__init__.py:
--------------------------------------------------------------------------------
1 | from knn import KNN, load_knn
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | To install, change to this directory and do:
 3 |     sudo python setup.py install
 4 | 
 5 | """
 6 | from distutils.core import setup
 7 | 
 8 | setup(name='kalmann',
 9 |       version='1.0',
10 |       description="Extended Kalman Filter for Training Neural-Networks",
11 |       author="Jason Nezvadovitz",
12 |       packages=['kalmann'],
13 |      )
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # kalmaNN
 2 | Extended Kalman Filter for Training Neural-Networks
 3 | 
 4 | An implementation of chapter two from [Kalman Filtering and Neural Networks](https://books.google.com/books?id=6dHEojwXU6MC&lpg=PP1&pg=PA30#v=onepage&q&f=false) by Haykin.
 5 | 
 6 | The file knn.py contains a class for creating and training a multilayer perceptron by either extended Kalman filter (Newton-Raphson) or stochastic gradient descent. Enjoy!
 7 | 
 8 | ![alt text](http://i.imgur.com/lTObwbI.png)
 9 | 
10 | ![alt text](http://i.imgur.com/y1qXrFU.png)
11 | 
12 | ![alt text](http://i.imgur.com/pxZf36m.png)
13 | 
14 | ![alt text](http://i.imgur.com/tGaiMFV.png)
15 | 
16 | ![alt text](http://i.imgur.com/wSyH7QP.png)
17 | 
18 | ![alt text](http://i.imgur.com/Mi8B6y0.png)
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Jason Nezvadovitz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/demos/1d_fit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Training and using a KNN for 1D data interpolation and extrapolation.
 4 | Comparison of training methods, EKF vs SGD.
 5 | 
 6 | """
 7 | # Dependencies
 8 | from __future__ import division
 9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | import kalmann
12 | 
13 | # Get some noisy training data, a fun compact function
14 | stdev = 0.05
15 | U = np.arange(-10, 10, 0.2)
16 | Y = np.exp(-U**2) + 0.5*np.exp(-(U-3)**2) + np.random.normal(0, stdev, len(U))
17 | 
18 | # Create two identical KNN's that will be trained differently
19 | knn_ekf = kalmann.KNN(nu=1, ny=1, nl=10, neuron='logistic')
20 | knn_sgd = kalmann.KNN(nu=1, ny=1, nl=10, neuron='logistic')
21 | 
22 | # Train
23 | nepochs_ekf = 100
24 | nepochs_sgd = 400
25 | knn_ekf.train(nepochs=nepochs_ekf, U=U, Y=Y, method='ekf', P=0.5, Q=0, R=0.1+stdev**2, pulse_T=0.75)
26 | knn_sgd.train(nepochs=nepochs_sgd, U=U, Y=Y, method='sgd', step=0.05, pulse_T=0.5)
27 | 
28 | # Evaluation
29 | X = np.arange(-15, 15, 0.01)
30 | plt.suptitle("Data Fit", fontsize=22)
31 | plt.scatter(U, Y, c='b', s=5)
32 | plt.plot(X, knn_ekf.feedforward(X), c='g', lw=3, label='EKF: {} epochs'.format(nepochs_ekf))
33 | plt.plot(X, knn_sgd.feedforward(X), c='k', ls=':', lw=2, label='SGD: {} epochs'.format(nepochs_sgd))
34 | plt.grid(True)
35 | plt.legend(fontsize=22)
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 
91 | # CUSTOM:
92 | # KNN data files
93 | *.knn
94 | 


--------------------------------------------------------------------------------
/demos/2d_classify.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Training and using a KNN for classification of 2D data.
 4 | Comparison of training methods, EKF vs SGD.
 5 | 
 6 | """
 7 | from __future__ import division
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import kalmann
11 | 
12 | # Get some noisy training data classifications, spirals!
13 | n = 100
14 | stdev = 0.2
15 | U = np.zeros((n*3, 2))
16 | Y = np.zeros((n*3, 1), dtype='uint8')
17 | for j in xrange(3):
18 |     ix = range(n*j, n*(j+1))
19 |     r = np.linspace(0, 1, n)
20 |     t = np.linspace(j*4, (j+1)*4, n) + np.random.normal(0, stdev, n)
21 |     U[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
22 |     Y[ix] = j
23 | Y[-20:-18] = 0
24 | 
25 | # Create two identical KNN's that will be trained differently
26 | knn_ekf = kalmann.KNN(nu=2, ny=1, nl=10, neuron='logistic')
27 | knn_sgd = kalmann.KNN(nu=2, ny=1, nl=10, neuron='logistic')
28 | 
29 | # Train
30 | nepochs_ekf = 100
31 | nepochs_sgd = 200
32 | knn_ekf.train(nepochs=nepochs_ekf, U=U, Y=Y, method='ekf', P=0.2, Q=0, R=stdev**2, pulse_T=2)
33 | knn_sgd.train(nepochs=nepochs_sgd, U=U, Y=Y, method='sgd', step=0.1, pulse_T=2)
34 | 
35 | # Use the KNNs as classifiers
36 | F_ekf = knn_ekf.classify(U, high=2, low=0)
37 | F_sgd = knn_sgd.classify(U, high=2, low=0)
38 | print("EKF Classification Accuracy: {}%".format(int(100*np.sum(F_ekf==Y)/len(Y))))
39 | print("SGD Classification Accuracy: {}%\n".format(int(100*np.sum(F_sgd==Y)/len(Y))))
40 | 
41 | # Evaluation
42 | fig = plt.figure()
43 | ax = fig.add_subplot(1, 3, 1)
44 | ax.set_title("True Classifications", fontsize=22)
45 | ax.scatter(U[:, 0], U[:, 1], c=Y[:,0])
46 | plt.axis('equal')
47 | ax = fig.add_subplot(1, 3, 2)
48 | ax.set_title("EKF: {} epochs".format(nepochs_ekf), fontsize=22)
49 | ax.scatter(U[:, 0], U[:, 1], c=F_ekf[:,0])
50 | plt.axis('equal')
51 | ax = fig.add_subplot(1, 3, 3)
52 | ax.set_title("SGD: {} epochs".format(nepochs_sgd), fontsize=22)
53 | ax.scatter(U[:, 0], U[:, 1], c=F_sgd[:,0])
54 | plt.axis('equal')
55 | plt.show()
56 | 


--------------------------------------------------------------------------------
/demos/1d_fit_analysis.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Use the 1D data interpolation/extrapolation problem to benchmark convergence
 4 | variance. Comparison of training methods, EKF vs SGD.
 5 | 
 6 | """
 7 | # Dependencies
 8 | from __future__ import division
 9 | import numpy as np; npl = np.linalg
10 | import matplotlib.pyplot as plt
11 | import matplotlib.mlab as mlab
12 | import kalmann
13 | 
14 | # Get some noisy training data, a fun compact function
15 | stdev = 0.05
16 | U = np.arange(-10, 10, 0.2)
17 | Y = np.exp(-U**2) + 0.5*np.exp(-(U-3)**2) + np.random.normal(0, stdev, len(U))
18 | 
19 | # Repeat fitting experiment many times
20 | nepochs_ekf = 100; nepochs_sgd = 400
21 | ekf_results = []; sgd_results = []
22 | for i in xrange(50):
23 | 
24 |     # Create two identical KNN's that will be trained differently
25 |     knn_ekf = kalmann.KNN(nu=1, ny=1, nl=10, neuron='logistic')
26 |     knn_sgd = kalmann.KNN(nu=1, ny=1, nl=10, neuron='logistic')
27 | 
28 |     # Train
29 |     RMS_ekf, trcov = knn_ekf.train(nepochs=nepochs_ekf, U=U, Y=Y, method='ekf', P=0.5, Q=0, R=stdev**2, pulse_T=-1)
30 |     RMS_sgd, _ = knn_sgd.train(nepochs=nepochs_sgd, U=U, Y=Y, method='sgd', step=0.05, pulse_T=-1)
31 | 
32 |     # Store results
33 |     ekf_results.append(RMS_ekf[-1])
34 |     sgd_results.append(RMS_sgd[-1])
35 | 
36 | # Evaluation
37 | fig = plt.figure()
38 | xlim = [0.33, 0.36]
39 | fig.suptitle("Histogram of Final RMS Errors", fontsize=22)
40 | ax = fig.add_subplot(2, 1, 1)
41 | ax.hist(ekf_results, 20, normed=1)
42 | ax.set_xlim(xlim)
43 | ax.set_ylabel("Using EKF", fontsize=18)
44 | ax.grid(True)
45 | ax = fig.add_subplot(2, 1, 2)
46 | ax.hist(sgd_results, 20, normed=1)
47 | ax.set_xlim(xlim)
48 | ax.set_ylabel("Using SGD", fontsize=18)
49 | ax.set_xlabel("RMS", fontsize=18)
50 | ax.grid(True)
51 | fig2 = plt.figure()
52 | ax = fig2.add_subplot(1, 1, 1)
53 | ax.set_title("Trace of Covariance During Training", fontsize=22)
54 | ax.plot(trcov)
55 | ax.set_xlabel("Iteration", fontsize=16)
56 | ax.grid(True)
57 | plt.show()
58 | 


--------------------------------------------------------------------------------
/demos/3d_dynamic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Training and using a KNN for 3D-state dynamical prediction.
 4 | 
 5 | """
 6 | # Dependencies
 7 | from __future__ import division
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | from mpl_toolkits.mplot3d import Axes3D
11 | import kalmann
12 | 
13 | # Get some training data from the simulation of a nonlinear system, the Lorenz Attractor!
14 | dt = 0.01  # physical resolution
15 | tf = 100  # experiment duration
16 | T = np.arange(0, tf, dt, dtype=np.float64)  # time record
17 | X = np.zeros((len(T), 3), dtype=np.float64)  # state record
18 | Xdot = np.zeros_like(X)  # state derivative record
19 | x = np.array([1, 1, 1], dtype=np.float64)  # initial condition
20 | for i, t in enumerate(T):
21 |     X[i] = np.copy(x)  # record
22 |     Xdot[i] = np.array((10*(x[1]-x[0]),
23 |                         x[0]*(28-x[2])-x[1],
24 |                         x[0]*x[1]-2.6*x[2]))  # dynamic
25 |     x = x + Xdot[i]*dt  # step simulation
26 | per = 0.01  # training data sampling period
27 | skip = int(per/dt)
28 | 
29 | # Create and train KNN
30 | knn = kalmann.KNN(nu=3, ny=3, nl=20, neuron='tanh')
31 | knn.train(nepochs=1, U=X[::skip], Y=Xdot[::skip], method='ekf', P=0.5, R=0.1, pulse_T=1)
32 | # knn.save("lorenz")
33 | # knn = kalmann.load_knn('lorenz')
34 | 
35 | # Use KNN to simulate system from same initial condition
36 | Xh = np.zeros_like(X)
37 | xh = X[0]
38 | for i, t in enumerate(T):
39 |     Xh[i] = np.copy(xh)
40 |     xh = xh + knn.feedforward(xh)*dt
41 | 
42 | # Evaluation
43 | lim = int(1*len(T))
44 | fig1 = plt.figure()
45 | fig1.suptitle("Evolution", fontsize=22)
46 | ax = fig1.gca(projection='3d')
47 | ax.plot(X[0:lim:skip, 0], X[0:lim:skip, 1], X[0:lim:skip, 2], c='k', lw=1, ls=':', label="True")
48 | ax.plot(Xh[0:lim:skip, 0], Xh[0:lim:skip, 1], Xh[0:lim:skip, 2], c='m', lw=1, label="Predict")
49 | ax.set_xlim([-20, 20])
50 | ax.set_ylim([-20, 30])
51 | ax.set_zlim([0, 50])
52 | ax.set_xlabel("x", fontsize=16)
53 | ax.set_ylabel("y", fontsize=16)
54 | ax.set_zlabel("z", fontsize=16)
55 | plt.legend()
56 | plt.show()
57 | 


--------------------------------------------------------------------------------
/kalmann/knn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Contains a class for EKF-training a feedforward neural-network.
  3 | This is primarily to demonstrate the advantages of EKF-training.
  4 | See the class docstrings for more details.
  5 | This module also includes a function for loading stored KNN objects.
  6 | 
  7 | """
  8 | from __future__ import division
  9 | import numpy as np; npl = np.linalg
 10 | from scipy.linalg import block_diag
 11 | from time import time
 12 | import pickle
 13 | 
 14 | ##########
 15 | 
 16 | def load_knn(filename):
 17 |     """
 18 |     Loads a stored KNN object saved with the string filename.
 19 |     Returns the loaded object.
 20 | 
 21 |     """
 22 |     if not isinstance(filename, str):
 23 |         raise ValueError("The filename must be a string.")
 24 |     if filename[-4:] != '.knn':
 25 |         filename = filename + '.knn'
 26 |     with open(filename, 'rb') as input:
 27 |         W, neuron, P = pickle.load(input)
 28 |     obj = KNN(W[0].shape[1]-1, W[1].shape[0], W[0].shape[0], neuron)
 29 |     obj.W, obj.P = W, P
 30 |     return obj
 31 | 
 32 | ##########
 33 | 
 34 | class KNN:
 35 |     """
 36 |     Class for a feedforward neural network (NN). Currently only handles 1 hidden-layer,
 37 |     is always fully-connected, and uses the same activation function type for every neuron.
 38 |     The NN can be trained by extended kalman filter (EKF) or stochastic gradient descent (SGD).
 39 |     Use the train function to train the NN, the feedforward function to compute the NN output,
 40 |     and the classify function to round a feedforward to the nearest class values. A save function
 41 |     is also provided to store a KNN object in the working directory.
 42 | 
 43 |     """
 44 |     def __init__(self, nu, ny, nl, neuron, sprW=5):
 45 |         """
 46 |             nu: dimensionality of input; positive integer
 47 |             ny: dimensionality of output; positive integer
 48 |             nl: number of hidden-layer neurons; positive integer
 49 |         neuron: activation function type; 'logistic', 'tanh', or 'relu'
 50 |           sprW: spread of initial randomly sampled synapse weights; float scalar
 51 | 
 52 |         """
 53 |         # Function dimensionalities
 54 |         self.nu = int(nu)
 55 |         self.ny = int(ny)
 56 |         self.nl = int(nl)
 57 | 
 58 |         # Neuron type
 59 |         if neuron == 'logistic':
 60 |             self.sig = lambda V: (1 + np.exp(-V))**-1
 61 |             self.dsig = lambda sigV: sigV * (1 - sigV)
 62 |         elif neuron == 'tanh':
 63 |             self.sig = lambda V: np.tanh(V)
 64 |             self.dsig = lambda sigV: 1 - sigV**2
 65 |         elif neuron == 'relu':
 66 |             self.sig = lambda V: np.clip(V, 0, np.inf)
 67 |             self.dsig = lambda sigV: np.float64(sigV > 0)
 68 |         else:
 69 |             raise ValueError("The neuron argument must be 'logistic', 'tanh', or 'relu'.")
 70 |         self.neuron = neuron
 71 | 
 72 |         # Initial synapse weight matrices
 73 |         sprW = np.float64(sprW)
 74 |         self.W = [sprW*(2*np.random.sample((nl, nu+1))-1),
 75 |                   sprW*(2*np.random.sample((ny, nl+1))-1)]
 76 |         self.nW = sum(map(np.size, self.W))
 77 |         self.P = None
 78 | 
 79 |         # Function for pushing signals through a synapse with bias
 80 |         self._affine_dot = lambda W, V: np.dot(np.atleast_1d(V), W[:, :-1].T) + W[:, -1]
 81 | 
 82 |         # Function for computing the RMS error of the current fit to some data set
 83 |         self.compute_rms = lambda U, Y: np.sqrt(np.mean(np.square(Y - self.feedforward(U))))
 84 | 
 85 | ####
 86 | 
 87 |     def save(self, filename):
 88 |         """
 89 |         Saves the current NN to a file with the given string filename.
 90 | 
 91 |         """
 92 |         if not isinstance(filename, str):
 93 |             raise ValueError("The filename must be a string.")
 94 |         if filename[-4:] != '.knn':
 95 |             filename = filename + '.knn'
 96 |         with open(filename, 'wb') as output:
 97 |             pickle.dump((self.W, self.neuron, self.P), output, pickle.HIGHEST_PROTOCOL)
 98 | 
 99 | ####
100 | 
101 |     def feedforward(self, U, get_l=False):
102 |         """
103 |         Feeds forward an (m by nu) array of inputs U through the NN.
104 |         Returns the associated (m by ny) output matrix, and optionally
105 |         the intermediate activations l.
106 | 
107 |         """
108 |         U = np.float64(U)
109 |         if U.ndim == 1 and len(U) > self.nu: U = U[:, np.newaxis]
110 |         l = self.sig(self._affine_dot(self.W[0], U))
111 |         h = self._affine_dot(self.W[1], l)
112 |         if get_l: return h, l
113 |         return h
114 | 
115 | ####
116 | 
117 |     def classify(self, U, high, low=0):
118 |         """
119 |         Feeds forward an (m by nu) array of inputs U through the NN.
120 |         For each associated output, the closest integer between high
121 |         and low is returned as a (m by ny) classification matrix.
122 |         Basically, your training data should be (u, int_between_high_low).
123 | 
124 |         """
125 |         return np.int64(np.clip(np.round(self.feedforward(U), 0), low, high))
126 | 
127 | ####
128 | 
129 |     def train(self, nepochs, U, Y, method, P=None, Q=None, R=None, step=1, dtol=-1, dslew=1, pulse_T=-1):
130 |         """
131 |         nepochs: number of epochs (presentations of the training data); integer
132 |               U: input training data; float array m samples by nu inputs
133 |               Y: output training data; float array m samples by ny outputs
134 |          method: extended kalman filter ('ekf') or stochastic gradient descent ('sgd')
135 |               P: initial weight covariance for ekf; float scalar or (nW by nW) posdef array
136 |               Q: process covariance for ekf; float scalar or (nW by nW) semiposdef array
137 |               R: data covariance for ekf; float scalar or (ny by ny) posdef array
138 |            step: step-size scaling; float scalar
139 |            dtol: finish when RMS error avg change is <dtol (or nepochs exceeded); float scalar
140 |           dslew: how many deltas over which to examine average RMS change; integer
141 |         pulse_T: number of seconds between displaying current training status; float
142 | 
143 |         If method is 'sgd' then P, Q, and R are unused, so carefully choose step.
144 |         If method is 'ekf' then step=1 is "optimal", R must be specified, and:
145 |             P is None: P = self.P if self.P has been created by previous training
146 |             Q is None: Q = 0
147 |         If P, Q, or R are given as scalars, they will scale an identity matrix.
148 |         Set pulse_T to -1 (default) to suppress training status display.
149 | 
150 |         Returns a list of the RMS errors at every epoch and a list of the covariance traces
151 |         at every iteration. The covariance trace list will be empty if using sgd.
152 | 
153 |         """
154 |         # Verify data
155 |         U = np.float64(U)
156 |         Y = np.float64(Y)
157 |         if len(U) != len(Y):
158 |             raise ValueError("Number of input data points must match number of output data points.")
159 |         if (U.ndim == 1 and self.nu != 1) or (U.ndim != 1 and U.shape[-1] != self.nu):
160 |             raise ValueError("Shape of U must be (m by nu).")
161 |         if (Y.ndim == 1 and self.ny != 1) or (Y.ndim != 1 and Y.shape[-1] != self.ny):
162 |             raise ValueError("Shape of Y must be (m by ny).")
163 |         if Y.ndim == 1 and len(Y) > self.ny: Y = Y[:, np.newaxis]
164 | 
165 |         # Set-up
166 |         if method == 'ekf':
167 |             self.update = self._ekf
168 | 
169 |             if P is None:
170 |                 if self.P is None:
171 |                     raise ValueError("Initial P not specified.")
172 |             elif np.isscalar(P):
173 |                 self.P = P*np.eye(self.nW)
174 |             else:
175 |                 if np.shape(P) != (self.nW, self.nW):
176 |                     raise ValueError("P must be a float scalar or (nW by nW) array.")
177 |                 self.P = np.float64(P)
178 | 
179 |             if Q is None:
180 |                 self.Q = np.zeros((self.nW, self.nW))
181 |             elif np.isscalar(Q):
182 |                 self.Q = Q*np.eye(self.nW)
183 |             else:
184 |                 if np.shape(Q) != (self.nW, self.nW):
185 |                     raise ValueError("Q must be a float scalar or (nW by nW) array.")
186 |                 self.Q = np.float64(Q)
187 |             if np.any(self.Q): self.Q_nonzero = True
188 |             else: self.Q_nonzero = False
189 | 
190 |             if R is None:
191 |                 raise ValueError("R must be specified for EKF training.")
192 |             elif np.isscalar(R):
193 |                 self.R = R*np.eye(self.ny)
194 |             else:
195 |                 if np.shape(R) != (self.ny, self.ny):
196 |                     raise ValueError("R must be a float scalar or (ny by ny) array.")
197 |                 self.R = np.float64(R)
198 |             if npl.matrix_rank(self.R) != len(self.R):
199 |                 raise ValueError("R must be positive definite.")
200 | 
201 |         elif method == 'sgd':
202 |             self.update = self._sgd
203 |         else:
204 |             raise ValueError("The method argument must be either 'ekf' or 'sgd'.")
205 |         last_pulse = 0
206 |         RMS = []
207 |         trcov = []
208 | 
209 |         # Shuffle data between epochs
210 |         print("Training...")
211 |         for epoch in range(nepochs):
212 |             rand_idx = np.random.permutation(len(U))
213 |             U_shuffled = U[rand_idx]
214 |             Y_shuffled = Y[rand_idx]
215 |             RMS.append(self.compute_rms(U, Y))
216 | 
217 |             # Check for convergence
218 |             if len(RMS) > dslew and abs(RMS[-1] - RMS[-1-dslew])/dslew < dtol:
219 |                 print("\nConverged after {} epochs!\n\n".format(epoch+1))
220 |                 return RMS, trcov
221 | 
222 |             # Train
223 |             for i, (u, y) in enumerate(zip(U_shuffled, Y_shuffled)):
224 | 
225 |                 # Forward propagation
226 |                 h, l = self.feedforward(u, get_l=True)
227 | 
228 |                 # Do the learning
229 |                 self.update(u, y, h, l, step)
230 |                 if method == 'ekf': trcov.append(np.trace(self.P))
231 | 
232 |                 # Heartbeat
233 |                 if (pulse_T >= 0 and time()-last_pulse > pulse_T) or (epoch == nepochs-1 and i == len(U)-1):
234 |                     print("------------------")
235 |                     print("  Epoch: {}%".format(int(100*(epoch+1)/nepochs)))
236 |                     print("   Iter: {}%".format(int(100*(i+1)/len(U))))
237 |                     print("   RMSE: {}".format(np.round(RMS[-1], 6)))
238 |                     if method == 'ekf': print("tr(Cov): {}".format(np.round(trcov[-1], 6)))
239 |                     print("------------------")
240 |                     last_pulse = time()
241 |         print("\nTraining complete!\n\n")
242 |         RMS.append(self.compute_rms(U, Y))
243 |         return RMS, trcov
244 | 
245 | ####
246 | 
247 |     def _ekf(self, u, y, h, l, step):
248 | 
249 |         # Compute NN jacobian
250 |         D = (self.W[1][:, :-1]*self.dsig(l)).flatten()
251 |         H = np.hstack((np.hstack((np.outer(D, u), D[:, np.newaxis])).reshape(self.ny, self.W[0].size),
252 |                        block_diag(*np.tile(np.concatenate((l, [1])), self.ny).reshape(self.ny, self.nl+1))))
253 | 
254 |         # Kalman gain
255 |         S = H.dot(self.P).dot(H.T) + self.R
256 |         K = self.P.dot(H.T).dot(npl.inv(S))
257 | 
258 |         # Update weight estimates and covariance
259 |         dW = step*K.dot(y-h)
260 |         self.W[0] = self.W[0] + dW[:self.W[0].size].reshape(self.W[0].shape)
261 |         self.W[1] = self.W[1] + dW[self.W[0].size:].reshape(self.W[1].shape)
262 |         self.P = self.P - np.dot(K, H.dot(self.P))
263 |         if self.Q_nonzero: self.P = self.P + self.Q
264 | 
265 | ####
266 | 
267 |     def _sgd(self, u, y, h, l, step):
268 |         e = h - y
269 |         self.W[1] = self.W[1] - step*np.hstack((np.outer(e, l), e[:, np.newaxis]))
270 |         D = (e.dot(self.W[1][:, :-1])*self.dsig(l)).flatten()
271 |         self.W[0] = self.W[0] - step*np.hstack((np.outer(D, u), D[:, np.newaxis]))
272 | 


--------------------------------------------------------------------------------