├── README.md
├── .gitignore
├── LICENSE
├── data.py
└── arma.py


/README.md:
--------------------------------------------------------------------------------
1 | online-ARMA
2 | ===========
3 | 
4 | online learning for time series prediction
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Tianming Lu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import statsmodels.api as sm
  3 | from statsmodels.tsa.arima_process import ArmaProcess, arma_generate_sample
  4 | import matplotlib.pyplot as plt
  5 | import datetime
  6 | import pandas.io.data as web
  7 | 
  8 | 
  9 | def gen_dataset1(n_samples=10000):
 10 |     alpha = np.array([0.6, -0.5, 0.4, -0.4, 0.3])
 11 |     beta = np.array([0.3, -0.2])
 12 |     a = 5
 13 |     b = 2
 14 |     sigma = 0.3
 15 | 
 16 |     noises = [0]*b
 17 |     arma = [0]*a
 18 |     for i in range(n_samples):
 19 |         noise = np.random.normal(0, sigma)
 20 |         x = np.sum(arma[:-a-1:-1] * alpha)
 21 |         x += np.sum(noises[:-b-1:-1] * beta)
 22 |         x += noise
 23 |         arma.append(x)
 24 |         noises.append(noise)
 25 |     arma = np.array(arma[a:])
 26 |     return arma
 27 | 
 28 | 
 29 | def gen_dataset2(n_samples):
 30 |     alpha1 = np.array([-0.4, -0.5, 0.4, 0.4, 0.1])
 31 |     alpha2 = np.array([0.6, -0.4, 0.4, -0.5, 0.4])
 32 |     beta = np.array([0.32, -0.2])
 33 |     a = 5
 34 |     b = 2
 35 | 
 36 |     noises = [0]*b
 37 |     arma = [0]*a
 38 |     for i in range(n_samples):
 39 |         noise = np.random.uniform(-0.5, 0.5)
 40 |         alpha = alpha1*(i/float(n_samples)) + alpha2*(1 - i/float(n_samples))
 41 |         x = np.sum(arma[:-a-1:-1] * alpha)
 42 |         x += np.sum(noises[:-b-1:-1] * beta)
 43 |         x += noise
 44 |         arma.append(x)
 45 |         noises.append(noise)
 46 |     return np.array(arma[a:])
 47 | 
 48 | 
 49 | def gen_dataset3(n_samples=10000):
 50 |     n = n_samples/2
 51 |     alpha1 = np.array([0.6, -0.5, 0.4, -0.4, 0.3])
 52 |     beta1 = np.array([0.3, -0.2])
 53 |     alpha2 = np.array([-0.4, -0.5, 0.4, 0.4, 0.1])
 54 |     beta2 = np.array([-0.3, 0.2])
 55 | 
 56 |     a = 5
 57 |     b = 2
 58 |     noises1 = [0]*b
 59 |     arma1 = [0]*a
 60 |     for i in range(n):
 61 |         noise = np.random.uniform(-0.5, 0.5)
 62 |         x = np.sum(arma1[:-a-1:-1] * alpha1)
 63 |         x += np.sum(noises1[:-b-1:-1] * beta1)
 64 |         x += noise
 65 |         arma1.append(x)
 66 |         noises1.append(noise)
 67 | 
 68 |     noises2 = [0]*b
 69 |     arma2 = [0]*a
 70 |     for i in range(n):
 71 |         noise = np.random.uniform(-0.5, 0.5)
 72 |         x = np.sum(arma2[:-a-1:-1] * alpha2)
 73 |         x += np.sum(noises2[:-b-1:-1] * beta2)
 74 |         x += noise
 75 |         arma2.append(x)
 76 |         noises2.append(noise)
 77 | 
 78 |     arma = arma1[a:] + arma2[a:]
 79 |     return np.array(arma)
 80 | 
 81 | 
 82 | def gen_dataset4(n_samples=10000):
 83 |     alpha = np.array([0.11, -0.5])
 84 |     beta = np.array([0.41, -0.39, -0.685, 0.1])
 85 |     a = 2
 86 |     b = 4
 87 | 
 88 |     noise = 0
 89 |     noises = [0]*b
 90 |     arma = [0]*a
 91 |     for i in range(n_samples):
 92 |         noise = np.random.normal(noise, 0.3)
 93 |         x = np.sum(arma[:-a-1:-1] * alpha)
 94 |         x += np.sum(noises[:-b-1:-1] * beta)
 95 |         x += noise
 96 |         arma.append(x)
 97 |         noises.append(noise)
 98 |     arma = np.array(arma[a:])
 99 |     return arma
100 | 
101 | 
102 | def gen_temperature(n_samples=10000):
103 |     t = sm.datasets.elnino.load()
104 |     temps = []
105 |     for year in t.data.tolist():
106 |         temps.extend(year[1:])
107 |     data = np.array(temps[0:n_samples])
108 |     data = (data-np.mean(data))/(np.max(data)-np.min(data))
109 |     return data
110 | 
111 | 
112 | def gen_stock(n_samples=10000):
113 |     start = datetime.datetime(2000, 1, 1)
114 |     end = datetime.datetime(2014, 1, 1)
115 |     f = web.DataReader('^GSPC', 'yahoo', start, end)
116 |     data = f['Close'].tolist()
117 |     data = np.array(data)
118 |     data = (data-np.mean(data))/(np.max(data)-np.min(data))
119 |     return data
120 | 
121 | if __name__ == '__main__':
122 |     n = 10000
123 |     # dataset = gen_dataset4(n_samples=n)
124 |     # dataset = gen_temperature()
125 |     dataset = gen_stock()
126 |     n = dataset.shape[0]
127 |     plt.plot(range(n), dataset)
128 |     plt.show()
129 | 


--------------------------------------------------------------------------------
/arma.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.linalg import inv
  3 | from scipy.optimize import fmin_bfgs
  4 | import matplotlib.pyplot as plt
  5 | import statsmodels.api as sm
  6 | import pandas as pd
  7 | import data
  8 | from statsmodels.tsa.arima_process import arma_generate_sample
  9 | 
 10 | def K_min(y, A):
 11 |     def f(x):
 12 |         tmp = np.matrix(y).reshape(-1, 1) - np.matrix(x).reshape(-1, 1)
 13 |         result = np.dot(tmp.T, A)
 14 |         result = np.dot(result, tmp)
 15 |         return result[0, 0]
 16 |     return f
 17 | 
 18 | def arma_ons(X, m, k, q):
 19 |     """
 20 |     arma online newton step
 21 |     """
 22 |     D = np.sqrt(2*(m+k))
 23 |     G = 2*np.sqrt(m+k)*D
 24 |     rate = 0.5*min(1./(m+k), 4*G*D)
 25 |     epsilon = 1./(rate**2 * D**2)
 26 |     A = np.diag([1]*(m+k)) * epsilon
 27 |     A = np.matrix(A)
 28 |     T = X.shape[0]
 29 | 
 30 |     L = np.random.uniform(-0.5, 0.5, (m+k, 1))
 31 |     L = np.matrix(L)
 32 | 
 33 |     X_p = np.zeros(T)
 34 |     loss = np.zeros(T)
 35 |     for t in range(T):
 36 |         #predict
 37 |         X_t = 0
 38 |         for i in range(m+k):
 39 |             if t-i-1 < 0:
 40 |                 break
 41 |             X_t += L[i]*X[t-i-1]
 42 |         X_p[t] = X_t
 43 | 
 44 |         #loss
 45 |         loss[t] = (X[t]-X_t)**2
 46 | 
 47 |         #update
 48 |         nabla = np.zeros((m+k, 1))
 49 |         for i in range(m+k):
 50 |             x = X[t-i-1] if t-i-1 >= 0 else 0
 51 |             nabla[i, 0] = -2*(X[t]-X_t)*x
 52 |         A = A + np.dot(nabla, nabla.T)
 53 |         # y = L - 1/rate*np.dot(inv(A), nabla)
 54 |         # L = fmin_bfgs(K_min(y, A), L)
 55 |         # L = np.matrix(L).reshape(-1, 1)
 56 |         L = L - 1/rate*np.dot(inv(A), nabla)
 57 |     return X_p, loss
 58 | 
 59 | 
 60 | def arma_ogd(X, m, k, q):
 61 |     """
 62 |     ARMA online gradient descent
 63 |     """
 64 |     D = np.sqrt(2*(m+k))
 65 |     G = 2*np.sqrt(m+k)*D
 66 |     T = X.shape[0]
 67 |     rate = D/(G*np.sqrt(T))
 68 | 
 69 |     L = np.random.uniform(-0.5, 0.5, (m+k, 1))
 70 |     L = np.matrix(L)
 71 | 
 72 |     X_p = np.zeros(T)
 73 |     loss = np.zeros(T)
 74 |     for t in range(T):
 75 |         #predict
 76 |         X_t = 0
 77 |         for i in range(m+k):
 78 |             if t-i-1 < 0:
 79 |                 break
 80 |             X_t += L[i]*X[t-i-1]
 81 |         X_p[t] = X_t
 82 | 
 83 |         #loss
 84 |         loss[t] = (X[t]-X_t)**2
 85 | 
 86 |         #update
 87 |         nabla = np.zeros((m+k, 1))
 88 |         for i in range(m+k):
 89 |             x = X[t-i-1] if t-i-1 >= 0 else 0
 90 |             nabla[i, 0] = -2*(X[t]-X_t)*x
 91 |         L = L - rate*nabla
 92 |     return X_p, loss
 93 | 
 94 | 
 95 | def gen_errors(loss):
 96 |     n = len(loss)
 97 |     errors = np.zeros(n)
 98 |     for i in range(n):
 99 |         errors[i] = np.sum(loss[0:i+1])/(i+1)
100 |     return errors
101 | 
102 | 
103 | def average(datagen, N, arma, n):
104 |     avg = np.zeros(N)
105 |     for i in range(n):
106 |         X = datagen(N)
107 |         X_p, loss = arma(X, 5, 5, 0)
108 |         avg += loss
109 |     avg = avg / n
110 |     return avg
111 | 
112 | if __name__ == '__main__':
113 |     n = 10000
114 |     iters = 2
115 |     t = range(n)
116 |     X = data.gen_dataset1(n)
117 | 
118 |     plt.subplot(221)
119 |     loss = average(data.gen_dataset1, n, arma_ons, iters)
120 |     e = gen_errors(loss)
121 |     plt.plot(t, e, label="ARMA-ONS")
122 | 
123 |     loss = average(data.gen_dataset1, n, arma_ogd, iters)
124 |     e = gen_errors(loss)
125 |     plt.plot(t, e, label="ARMA-OGD")
126 |     plt.legend()
127 |     plt.title("Sanity check")
128 | 
129 |     plt.subplot(222)
130 |     loss = average(data.gen_dataset2, n, arma_ons, iters)
131 |     e = gen_errors(loss)
132 |     plt.plot(t, e, label="ARMA-ONS")
133 | 
134 |     loss = average(data.gen_dataset2, n, arma_ogd, iters)
135 |     e = gen_errors(loss)
136 |     plt.plot(t, e, label="ARMA-OGD")
137 |     plt.legend()
138 |     plt.title("Slowly changing coefficients")
139 | 
140 |     plt.subplot(223)
141 |     loss = average(data.gen_dataset3, n, arma_ons, iters)
142 |     e = gen_errors(loss)
143 |     plt.plot(t, e, label="ARMA-ONS")
144 | 
145 |     loss = average(data.gen_dataset3, n, arma_ogd, iters)
146 |     e = gen_errors(loss)
147 |     plt.plot(t, e, label="ARMA-OGD")
148 |     plt.legend()
149 |     plt.title("Abrupt change")
150 | 
151 |     plt.subplot(224)
152 |     loss = average(data.gen_dataset4, n, arma_ons, iters)
153 |     e = gen_errors(loss)
154 |     plt.plot(t, e, label="ARMA-ONS")
155 | 
156 |     # loss = average(data.gen_dataset4, n, arma_ogd, iters)
157 |     # e = gen_errors(loss)
158 |     # plt.plot(t, e, label="ARMA-OGD")
159 |     plt.legend()
160 |     plt.title("Correlated noise")
161 | 
162 |     plt.show()
163 | 
164 |     #for real data
165 |     plt.subplot(121)
166 |     X = data.gen_temperature()
167 |     n = X.shape[0]
168 |     t = range(n)
169 | 
170 |     X_p, loss = arma_ons(X, 5, 5, 0)
171 |     e = gen_errors(loss)
172 |     plt.plot(t, e, label="ARMA-ONS")
173 | 
174 |     X_p, loss = arma_ogd(X, 5, 5, 0)
175 |     e = gen_errors(loss)
176 |     plt.plot(t, e, label='AMRA-OGD')
177 |     plt.legend()
178 | 
179 |     plt.subplot(122)
180 |     X = data.gen_stock()
181 |     n = X.shape[0]
182 |     t = range(n)
183 | 
184 |     X_p, loss = arma_ons(X, 5, 5, 0)
185 |     e = gen_errors(loss)
186 |     plt.plot(t, e, label='ARMA-ONS')
187 | 
188 |     X_p, loss = arma_ogd(X, 5, 5, 0)
189 |     e = gen_errors(loss)
190 |     plt.plot(t, e, label='ARMA-OGD')
191 |     plt.legend()
192 |     plt.show()
193 | 


--------------------------------------------------------------------------------