├── README.md ├── .gitignore ├── LICENSE ├── data.py └── arma.py /README.md: -------------------------------------------------------------------------------- 1 | online-ARMA 2 | =========== 3 | 4 | online learning for time series prediction 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Tianming Lu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | from statsmodels.tsa.arima_process import ArmaProcess, arma_generate_sample 4 | import matplotlib.pyplot as plt 5 | import datetime 6 | import pandas.io.data as web 7 | 8 | 9 | def gen_dataset1(n_samples=10000): 10 | alpha = np.array([0.6, -0.5, 0.4, -0.4, 0.3]) 11 | beta = np.array([0.3, -0.2]) 12 | a = 5 13 | b = 2 14 | sigma = 0.3 15 | 16 | noises = [0]*b 17 | arma = [0]*a 18 | for i in range(n_samples): 19 | noise = np.random.normal(0, sigma) 20 | x = np.sum(arma[:-a-1:-1] * alpha) 21 | x += np.sum(noises[:-b-1:-1] * beta) 22 | x += noise 23 | arma.append(x) 24 | noises.append(noise) 25 | arma = np.array(arma[a:]) 26 | return arma 27 | 28 | 29 | def gen_dataset2(n_samples): 30 | alpha1 = np.array([-0.4, -0.5, 0.4, 0.4, 0.1]) 31 | alpha2 = np.array([0.6, -0.4, 0.4, -0.5, 0.4]) 32 | beta = np.array([0.32, -0.2]) 33 | a = 5 34 | b = 2 35 | 36 | noises = [0]*b 37 | arma = [0]*a 38 | for i in range(n_samples): 39 | noise = np.random.uniform(-0.5, 0.5) 40 | alpha = alpha1*(i/float(n_samples)) + alpha2*(1 - i/float(n_samples)) 41 | x = np.sum(arma[:-a-1:-1] * alpha) 42 | x += np.sum(noises[:-b-1:-1] * beta) 43 | x += noise 44 | arma.append(x) 45 | noises.append(noise) 46 | return np.array(arma[a:]) 47 | 48 | 49 | def gen_dataset3(n_samples=10000): 50 | n = n_samples/2 51 | alpha1 = np.array([0.6, -0.5, 0.4, -0.4, 0.3]) 52 | beta1 = np.array([0.3, -0.2]) 53 | alpha2 = np.array([-0.4, -0.5, 0.4, 0.4, 0.1]) 54 | beta2 = np.array([-0.3, 0.2]) 55 | 56 | a = 5 57 | b = 2 58 | noises1 = [0]*b 59 | arma1 = [0]*a 60 | for i in range(n): 61 | noise = np.random.uniform(-0.5, 0.5) 62 | x = np.sum(arma1[:-a-1:-1] * alpha1) 63 | x += np.sum(noises1[:-b-1:-1] * beta1) 64 | x += noise 65 | arma1.append(x) 66 | noises1.append(noise) 67 | 68 | noises2 = [0]*b 69 | arma2 = [0]*a 70 | for i in range(n): 71 | noise = np.random.uniform(-0.5, 0.5) 72 | x = np.sum(arma2[:-a-1:-1] * alpha2) 73 | x += np.sum(noises2[:-b-1:-1] * beta2) 74 | x += noise 75 | arma2.append(x) 76 | noises2.append(noise) 77 | 78 | arma = arma1[a:] + arma2[a:] 79 | return np.array(arma) 80 | 81 | 82 | def gen_dataset4(n_samples=10000): 83 | alpha = np.array([0.11, -0.5]) 84 | beta = np.array([0.41, -0.39, -0.685, 0.1]) 85 | a = 2 86 | b = 4 87 | 88 | noise = 0 89 | noises = [0]*b 90 | arma = [0]*a 91 | for i in range(n_samples): 92 | noise = np.random.normal(noise, 0.3) 93 | x = np.sum(arma[:-a-1:-1] * alpha) 94 | x += np.sum(noises[:-b-1:-1] * beta) 95 | x += noise 96 | arma.append(x) 97 | noises.append(noise) 98 | arma = np.array(arma[a:]) 99 | return arma 100 | 101 | 102 | def gen_temperature(n_samples=10000): 103 | t = sm.datasets.elnino.load() 104 | temps = [] 105 | for year in t.data.tolist(): 106 | temps.extend(year[1:]) 107 | data = np.array(temps[0:n_samples]) 108 | data = (data-np.mean(data))/(np.max(data)-np.min(data)) 109 | return data 110 | 111 | 112 | def gen_stock(n_samples=10000): 113 | start = datetime.datetime(2000, 1, 1) 114 | end = datetime.datetime(2014, 1, 1) 115 | f = web.DataReader('^GSPC', 'yahoo', start, end) 116 | data = f['Close'].tolist() 117 | data = np.array(data) 118 | data = (data-np.mean(data))/(np.max(data)-np.min(data)) 119 | return data 120 | 121 | if __name__ == '__main__': 122 | n = 10000 123 | # dataset = gen_dataset4(n_samples=n) 124 | # dataset = gen_temperature() 125 | dataset = gen_stock() 126 | n = dataset.shape[0] 127 | plt.plot(range(n), dataset) 128 | plt.show() 129 | -------------------------------------------------------------------------------- /arma.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv 3 | from scipy.optimize import fmin_bfgs 4 | import matplotlib.pyplot as plt 5 | import statsmodels.api as sm 6 | import pandas as pd 7 | import data 8 | from statsmodels.tsa.arima_process import arma_generate_sample 9 | 10 | def K_min(y, A): 11 | def f(x): 12 | tmp = np.matrix(y).reshape(-1, 1) - np.matrix(x).reshape(-1, 1) 13 | result = np.dot(tmp.T, A) 14 | result = np.dot(result, tmp) 15 | return result[0, 0] 16 | return f 17 | 18 | def arma_ons(X, m, k, q): 19 | """ 20 | arma online newton step 21 | """ 22 | D = np.sqrt(2*(m+k)) 23 | G = 2*np.sqrt(m+k)*D 24 | rate = 0.5*min(1./(m+k), 4*G*D) 25 | epsilon = 1./(rate**2 * D**2) 26 | A = np.diag([1]*(m+k)) * epsilon 27 | A = np.matrix(A) 28 | T = X.shape[0] 29 | 30 | L = np.random.uniform(-0.5, 0.5, (m+k, 1)) 31 | L = np.matrix(L) 32 | 33 | X_p = np.zeros(T) 34 | loss = np.zeros(T) 35 | for t in range(T): 36 | #predict 37 | X_t = 0 38 | for i in range(m+k): 39 | if t-i-1 < 0: 40 | break 41 | X_t += L[i]*X[t-i-1] 42 | X_p[t] = X_t 43 | 44 | #loss 45 | loss[t] = (X[t]-X_t)**2 46 | 47 | #update 48 | nabla = np.zeros((m+k, 1)) 49 | for i in range(m+k): 50 | x = X[t-i-1] if t-i-1 >= 0 else 0 51 | nabla[i, 0] = -2*(X[t]-X_t)*x 52 | A = A + np.dot(nabla, nabla.T) 53 | # y = L - 1/rate*np.dot(inv(A), nabla) 54 | # L = fmin_bfgs(K_min(y, A), L) 55 | # L = np.matrix(L).reshape(-1, 1) 56 | L = L - 1/rate*np.dot(inv(A), nabla) 57 | return X_p, loss 58 | 59 | 60 | def arma_ogd(X, m, k, q): 61 | """ 62 | ARMA online gradient descent 63 | """ 64 | D = np.sqrt(2*(m+k)) 65 | G = 2*np.sqrt(m+k)*D 66 | T = X.shape[0] 67 | rate = D/(G*np.sqrt(T)) 68 | 69 | L = np.random.uniform(-0.5, 0.5, (m+k, 1)) 70 | L = np.matrix(L) 71 | 72 | X_p = np.zeros(T) 73 | loss = np.zeros(T) 74 | for t in range(T): 75 | #predict 76 | X_t = 0 77 | for i in range(m+k): 78 | if t-i-1 < 0: 79 | break 80 | X_t += L[i]*X[t-i-1] 81 | X_p[t] = X_t 82 | 83 | #loss 84 | loss[t] = (X[t]-X_t)**2 85 | 86 | #update 87 | nabla = np.zeros((m+k, 1)) 88 | for i in range(m+k): 89 | x = X[t-i-1] if t-i-1 >= 0 else 0 90 | nabla[i, 0] = -2*(X[t]-X_t)*x 91 | L = L - rate*nabla 92 | return X_p, loss 93 | 94 | 95 | def gen_errors(loss): 96 | n = len(loss) 97 | errors = np.zeros(n) 98 | for i in range(n): 99 | errors[i] = np.sum(loss[0:i+1])/(i+1) 100 | return errors 101 | 102 | 103 | def average(datagen, N, arma, n): 104 | avg = np.zeros(N) 105 | for i in range(n): 106 | X = datagen(N) 107 | X_p, loss = arma(X, 5, 5, 0) 108 | avg += loss 109 | avg = avg / n 110 | return avg 111 | 112 | if __name__ == '__main__': 113 | n = 10000 114 | iters = 2 115 | t = range(n) 116 | X = data.gen_dataset1(n) 117 | 118 | plt.subplot(221) 119 | loss = average(data.gen_dataset1, n, arma_ons, iters) 120 | e = gen_errors(loss) 121 | plt.plot(t, e, label="ARMA-ONS") 122 | 123 | loss = average(data.gen_dataset1, n, arma_ogd, iters) 124 | e = gen_errors(loss) 125 | plt.plot(t, e, label="ARMA-OGD") 126 | plt.legend() 127 | plt.title("Sanity check") 128 | 129 | plt.subplot(222) 130 | loss = average(data.gen_dataset2, n, arma_ons, iters) 131 | e = gen_errors(loss) 132 | plt.plot(t, e, label="ARMA-ONS") 133 | 134 | loss = average(data.gen_dataset2, n, arma_ogd, iters) 135 | e = gen_errors(loss) 136 | plt.plot(t, e, label="ARMA-OGD") 137 | plt.legend() 138 | plt.title("Slowly changing coefficients") 139 | 140 | plt.subplot(223) 141 | loss = average(data.gen_dataset3, n, arma_ons, iters) 142 | e = gen_errors(loss) 143 | plt.plot(t, e, label="ARMA-ONS") 144 | 145 | loss = average(data.gen_dataset3, n, arma_ogd, iters) 146 | e = gen_errors(loss) 147 | plt.plot(t, e, label="ARMA-OGD") 148 | plt.legend() 149 | plt.title("Abrupt change") 150 | 151 | plt.subplot(224) 152 | loss = average(data.gen_dataset4, n, arma_ons, iters) 153 | e = gen_errors(loss) 154 | plt.plot(t, e, label="ARMA-ONS") 155 | 156 | # loss = average(data.gen_dataset4, n, arma_ogd, iters) 157 | # e = gen_errors(loss) 158 | # plt.plot(t, e, label="ARMA-OGD") 159 | plt.legend() 160 | plt.title("Correlated noise") 161 | 162 | plt.show() 163 | 164 | #for real data 165 | plt.subplot(121) 166 | X = data.gen_temperature() 167 | n = X.shape[0] 168 | t = range(n) 169 | 170 | X_p, loss = arma_ons(X, 5, 5, 0) 171 | e = gen_errors(loss) 172 | plt.plot(t, e, label="ARMA-ONS") 173 | 174 | X_p, loss = arma_ogd(X, 5, 5, 0) 175 | e = gen_errors(loss) 176 | plt.plot(t, e, label='AMRA-OGD') 177 | plt.legend() 178 | 179 | plt.subplot(122) 180 | X = data.gen_stock() 181 | n = X.shape[0] 182 | t = range(n) 183 | 184 | X_p, loss = arma_ons(X, 5, 5, 0) 185 | e = gen_errors(loss) 186 | plt.plot(t, e, label='ARMA-ONS') 187 | 188 | X_p, loss = arma_ogd(X, 5, 5, 0) 189 | e = gen_errors(loss) 190 | plt.plot(t, e, label='ARMA-OGD') 191 | plt.legend() 192 | plt.show() 193 | --------------------------------------------------------------------------------