├── README.md
├── arimavariations_autoarima_istanbul.ipynb
├── arimavariations_autoarima_pems.ipynb
├── arimavariations_istanbul_data_del.py
├── arimavariations_istanbul_data_mean_sdsh.py
├── arimavariations_istanbul_mean_sh.py
├── arimavariations_pems_716933.py
├── arimavariations_pems_717087.py
├── evaluatemodels_istanbul_data_del.ipynb
├── evaluatemodels_istanbul_data_mean_sdsh.ipynb
├── evaluatemodels_istanbul_data_mean_sh.ipynb
├── evaluatemodels_pems_716933.ipynb
├── evaluatemodels_pems_717087.ipynb
├── evaluationmetrics_pointforecasts.py
├── evaluationmetrics_qrapproaches.py
├── hybridmodels_istanbul.py
├── hybridmodels_pems.py
├── naive_and_average_methods.py
├── plots_istanbul.ipynb
├── plots_pems.ipynb
├── qrapproaches.py
├── qrapproaches_istanbul.ipynb
├── slstm_istanbul_data_del.py
├── slstm_istanbul_data_mean_sdsh.py
├── slstm_istanbul_data_mean_sh.py
├── slstm_pems_716933.py
├── slstm_pems_717087.py
├── ssvr_istanbul_data_del.py
├── ssvr_istanbul_data_mean_sdsh.py
├── ssvr_istanbul_data_mean_sh.py
├── ssvr_pems_716933.py
├── ssvr_pems_717087.py
├── xgboost_istanbul_data_del.py
├── xgboost_istanbul_data_mean_sdsh.py
├── xgboost_istanbul_data_mean_sh.py
├── xgboost_oems_717087.py
└── xgboost_pems_716933.py
/README.md:
--------------------------------------------------------------------------------
1 | # Traffic Flow Forecasting Methods
2 | The repository gives case studies on short-term traffic flow forecasting strategies within the scope of my master thesis. After implementing the traditional (AR, ARMA, ARIMA and SARIMA), machine learning (SXGBoost and SSVR) and deep learning methods (SLSTM), one of main goals is to experiment on the uses of hybrid methods (SSVRARIMA, SSLSTMARIMA and SXGBoostARIMA). Besides analyzing approaches that were already used in the traffic flow literature, distinct strategies are also introduced and tested. Further, the point forecast results are supplemented with interval forecasts. In particular, quantile regression based intervals such as quantile regression averaging (QRA), quantile regression neural network (QRNN) and quantile regression long short-term memory (QRLSTM) are implemented. Both point and interval forecasts are evaluated via several evaluation metrics, and an extensive comparison is provided among the methodologies studied.
3 |
4 | You can reach my master thesis by [this link](https://tez.yok.gov.tr/UlusalTezMerkezi/TezGoster?key=qVqOZFj2DwNmvdf1oGFYiNVa87wIIBf5zByl7jQFtF29p6cfiQrQFFbSu-EjVTEn).
5 |
--------------------------------------------------------------------------------
/arimavariations_autoarima_istanbul.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 7,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pmdarima.arima import auto_arima\n",
10 | "import pandas as pd\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 8,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/html": [
22 | "
\n",
23 | "\n",
36 | "
\n",
37 | " \n",
38 | " \n",
39 | " | \n",
40 | " Unnamed: 0 | \n",
41 | " _id | \n",
42 | " DATE_TIME | \n",
43 | " LONGITUDE | \n",
44 | " LATITUDE | \n",
45 | " GEOHASH | \n",
46 | " MINIMUM_SPEED | \n",
47 | " MAXIMUM_SPEED | \n",
48 | " AVERAGE_SPEED | \n",
49 | " NUMBER_OF_VEHICLES | \n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " \n",
54 | " 0 | \n",
55 | " 12913 | \n",
56 | " 12914 | \n",
57 | " 2020-01-01 00:00:00 | \n",
58 | " 29.108276 | \n",
59 | " 41.069641 | \n",
60 | " sxk9wk | \n",
61 | " 58 | \n",
62 | " 144 | \n",
63 | " 94 | \n",
64 | " 114 | \n",
65 | "
\n",
66 | " \n",
67 | " 1 | \n",
68 | " 621904 | \n",
69 | " 621905 | \n",
70 | " 2020-01-01 01:00:00 | \n",
71 | " 29.108276 | \n",
72 | " 41.069641 | \n",
73 | " sxk9wk | \n",
74 | " 64 | \n",
75 | " 136 | \n",
76 | " 95 | \n",
77 | " 105 | \n",
78 | "
\n",
79 | " \n",
80 | " 2 | \n",
81 | " 160509 | \n",
82 | " 160510 | \n",
83 | " 2020-01-01 02:00:00 | \n",
84 | " 29.108276 | \n",
85 | " 41.069641 | \n",
86 | " sxk9wk | \n",
87 | " 68 | \n",
88 | " 181 | \n",
89 | " 105 | \n",
90 | " 97 | \n",
91 | "
\n",
92 | " \n",
93 | " 3 | \n",
94 | " 136167 | \n",
95 | " 136168 | \n",
96 | " 2020-01-01 03:00:00 | \n",
97 | " 29.108276 | \n",
98 | " 41.069641 | \n",
99 | " sxk9wk | \n",
100 | " 76 | \n",
101 | " 158 | \n",
102 | " 107 | \n",
103 | " 63 | \n",
104 | "
\n",
105 | " \n",
106 | " 4 | \n",
107 | " 684343 | \n",
108 | " 684344 | \n",
109 | " 2020-01-01 04:00:00 | \n",
110 | " 29.108276 | \n",
111 | " 41.069641 | \n",
112 | " sxk9wk | \n",
113 | " 61 | \n",
114 | " 196 | \n",
115 | " 102 | \n",
116 | " 53 | \n",
117 | "
\n",
118 | " \n",
119 | " ... | \n",
120 | " ... | \n",
121 | " ... | \n",
122 | " ... | \n",
123 | " ... | \n",
124 | " ... | \n",
125 | " ... | \n",
126 | " ... | \n",
127 | " ... | \n",
128 | " ... | \n",
129 | " ... | \n",
130 | "
\n",
131 | " \n",
132 | " 11060 | \n",
133 | " 1179972 | \n",
134 | " 1179973 | \n",
135 | " 2021-04-30 18:00:00 | \n",
136 | " 29.108276 | \n",
137 | " 41.069641 | \n",
138 | " sxk9wk | \n",
139 | " 50 | \n",
140 | " 170 | \n",
141 | " 97 | \n",
142 | " 273 | \n",
143 | "
\n",
144 | " \n",
145 | " 11061 | \n",
146 | " 1197195 | \n",
147 | " 1197196 | \n",
148 | " 2021-04-30 19:00:00 | \n",
149 | " 29.108276 | \n",
150 | " 41.069641 | \n",
151 | " sxk9wk | \n",
152 | " 58 | \n",
153 | " 177 | \n",
154 | " 109 | \n",
155 | " 198 | \n",
156 | "
\n",
157 | " \n",
158 | " 11062 | \n",
159 | " 551362 | \n",
160 | " 551363 | \n",
161 | " 2021-04-30 20:00:00 | \n",
162 | " 29.108276 | \n",
163 | " 41.069641 | \n",
164 | " sxk9wk | \n",
165 | " 64 | \n",
166 | " 164 | \n",
167 | " 113 | \n",
168 | " 139 | \n",
169 | "
\n",
170 | " \n",
171 | " 11063 | \n",
172 | " 444616 | \n",
173 | " 444617 | \n",
174 | " 2021-04-30 21:00:00 | \n",
175 | " 29.108276 | \n",
176 | " 41.069641 | \n",
177 | " sxk9wk | \n",
178 | " 64 | \n",
179 | " 182 | \n",
180 | " 100 | \n",
181 | " 128 | \n",
182 | "
\n",
183 | " \n",
184 | " 11064 | \n",
185 | " 914968 | \n",
186 | " 914969 | \n",
187 | " 2021-04-30 22:00:00 | \n",
188 | " 29.108276 | \n",
189 | " 41.069641 | \n",
190 | " sxk9wk | \n",
191 | " 50 | \n",
192 | " 168 | \n",
193 | " 102 | \n",
194 | " 109 | \n",
195 | "
\n",
196 | " \n",
197 | "
\n",
198 | "
11065 rows × 10 columns
\n",
199 | "
"
200 | ],
201 | "text/plain": [
202 | " Unnamed: 0 _id DATE_TIME LONGITUDE LATITUDE GEOHASH \\\n",
203 | "0 12913 12914 2020-01-01 00:00:00 29.108276 41.069641 sxk9wk \n",
204 | "1 621904 621905 2020-01-01 01:00:00 29.108276 41.069641 sxk9wk \n",
205 | "2 160509 160510 2020-01-01 02:00:00 29.108276 41.069641 sxk9wk \n",
206 | "3 136167 136168 2020-01-01 03:00:00 29.108276 41.069641 sxk9wk \n",
207 | "4 684343 684344 2020-01-01 04:00:00 29.108276 41.069641 sxk9wk \n",
208 | "... ... ... ... ... ... ... \n",
209 | "11060 1179972 1179973 2021-04-30 18:00:00 29.108276 41.069641 sxk9wk \n",
210 | "11061 1197195 1197196 2021-04-30 19:00:00 29.108276 41.069641 sxk9wk \n",
211 | "11062 551362 551363 2021-04-30 20:00:00 29.108276 41.069641 sxk9wk \n",
212 | "11063 444616 444617 2021-04-30 21:00:00 29.108276 41.069641 sxk9wk \n",
213 | "11064 914968 914969 2021-04-30 22:00:00 29.108276 41.069641 sxk9wk \n",
214 | "\n",
215 | " MINIMUM_SPEED MAXIMUM_SPEED AVERAGE_SPEED NUMBER_OF_VEHICLES \n",
216 | "0 58 144 94 114 \n",
217 | "1 64 136 95 105 \n",
218 | "2 68 181 105 97 \n",
219 | "3 76 158 107 63 \n",
220 | "4 61 196 102 53 \n",
221 | "... ... ... ... ... \n",
222 | "11060 50 170 97 273 \n",
223 | "11061 58 177 109 198 \n",
224 | "11062 64 164 113 139 \n",
225 | "11063 64 182 100 128 \n",
226 | "11064 50 168 102 109 \n",
227 | "\n",
228 | "[11065 rows x 10 columns]"
229 | ]
230 | },
231 | "execution_count": 8,
232 | "metadata": {},
233 | "output_type": "execute_result"
234 | }
235 | ],
236 | "source": [
237 | "data = pd.read_csv(\"data/istanbul/istanbul_data_del.csv\")\n",
238 | "data"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 3,
244 | "metadata": {},
245 | "outputs": [],
246 | "source": [
247 | "train_data = data[\"NUMBER_OF_VEHICLES\"].values[:-3500]\n",
248 | "test_data = data[\"NUMBER_OF_VEHICLES\"].values[-3500:]"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 4,
254 | "metadata": {},
255 | "outputs": [
256 | {
257 | "name": "stdout",
258 | "output_type": "stream",
259 | "text": [
260 | "Performing stepwise search to minimize aic\n",
261 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=84063.193, Time=2.02 sec\n",
262 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=103809.100, Time=0.09 sec\n",
263 | " ARIMA(2,0,0)(0,0,0)[0] : AIC=83822.416, Time=0.31 sec\n",
264 | " ARIMA(3,0,0)(0,0,0)[0] : AIC=83816.720, Time=0.39 sec\n",
265 | " ARIMA(4,0,0)(0,0,0)[0] : AIC=83812.823, Time=0.47 sec\n",
266 | " ARIMA(5,0,0)(0,0,0)[0] : AIC=83777.511, Time=0.65 sec\n",
267 | " ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=83055.995, Time=1.49 sec\n",
268 | " ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=83059.645, Time=1.22 sec\n",
269 | "\n",
270 | "Best model: ARIMA(5,0,0)(0,0,0)[0] intercept\n",
271 | "Total fit time: 6.632 seconds\n"
272 | ]
273 | }
274 | ],
275 | "source": [
276 | "model = auto_arima(train_data, start_p=1, start_q=0,\n",
277 | " max_p=5, max_q=0, \n",
278 | " d=0, \n",
279 | " seasonal=False, \n",
280 | " start_P=0, \n",
281 | " D=None, \n",
282 | " trace=True)"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 9,
288 | "metadata": {},
289 | "outputs": [
290 | {
291 | "name": "stdout",
292 | "output_type": "stream",
293 | "text": [
294 | "Performing stepwise search to minimize aic\n",
295 | " ARIMA(1,0,1)(0,0,0)[0] : AIC=83816.739, Time=0.45 sec\n",
296 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=103809.100, Time=0.08 sec\n",
297 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=84063.193, Time=0.16 sec\n",
298 | " ARIMA(0,0,1)(0,0,0)[0] : AIC=95706.917, Time=0.68 sec\n",
299 | " ARIMA(1,0,1)(0,0,0)[0] intercept : AIC=83196.692, Time=2.42 sec\n",
300 | " ARIMA(0,0,1)(0,0,0)[0] intercept : AIC=87293.626, Time=1.39 sec\n",
301 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=83609.892, Time=0.41 sec\n",
302 | " ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=93226.640, Time=0.12 sec\n",
303 | "\n",
304 | "Best model: ARIMA(1,0,1)(0,0,0)[0] intercept\n",
305 | "Total fit time: 5.717 seconds\n"
306 | ]
307 | }
308 | ],
309 | "source": [
310 | "model = auto_arima(train_data, start_p=1, start_q=1,\n",
311 | " max_p=1, max_q=1, \n",
312 | " d=0, \n",
313 | " seasonal=False, \n",
314 | " start_P=0, \n",
315 | " D=None, \n",
316 | " trace=True)"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": 10,
322 | "metadata": {},
323 | "outputs": [
324 | {
325 | "name": "stdout",
326 | "output_type": "stream",
327 | "text": [
328 | "Performing stepwise search to minimize aic\n",
329 | " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=83994.220, Time=0.93 sec\n",
330 | " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=84192.277, Time=0.16 sec\n",
331 | " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=84008.590, Time=0.41 sec\n",
332 | " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=83993.884, Time=0.71 sec\n",
333 | " ARIMA(0,1,0)(0,0,0)[0] : AIC=84190.279, Time=0.09 sec\n",
334 | " ARIMA(0,1,2)(0,0,0)[0] intercept : AIC=83993.439, Time=0.72 sec\n",
335 | " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=inf, Time=7.01 sec\n",
336 | " ARIMA(0,1,3)(0,0,0)[0] intercept : AIC=83975.918, Time=1.03 sec\n",
337 | " ARIMA(1,1,3)(0,0,0)[0] intercept : AIC=inf, Time=17.39 sec\n",
338 | " ARIMA(0,1,4)(0,0,0)[0] intercept : AIC=inf, Time=12.40 sec\n",
339 | " ARIMA(1,1,4)(0,0,0)[0] intercept : AIC=inf, Time=10.19 sec\n",
340 | " ARIMA(0,1,3)(0,0,0)[0] : AIC=83973.919, Time=0.64 sec\n",
341 | " ARIMA(0,1,2)(0,0,0)[0] : AIC=83991.440, Time=0.46 sec\n",
342 | " ARIMA(1,1,3)(0,0,0)[0] : AIC=inf, Time=6.96 sec\n",
343 | " ARIMA(0,1,4)(0,0,0)[0] : AIC=inf, Time=6.19 sec\n",
344 | " ARIMA(1,1,2)(0,0,0)[0] : AIC=inf, Time=4.73 sec\n",
345 | " ARIMA(1,1,4)(0,0,0)[0] : AIC=inf, Time=13.72 sec\n",
346 | "\n",
347 | "Best model: ARIMA(0,1,3)(0,0,0)[0] \n",
348 | "Total fit time: 83.753 seconds\n"
349 | ]
350 | }
351 | ],
352 | "source": [
353 | "model = auto_arima(train_data, start_p=1, start_q=1,\n",
354 | " max_p=5, max_q=5, \n",
355 | " d=1, \n",
356 | " seasonal=False, \n",
357 | " start_P=0, \n",
358 | " D=None, \n",
359 | " trace=True)"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": null,
365 | "metadata": {},
366 | "outputs": [],
367 | "source": []
368 | }
369 | ],
370 | "metadata": {
371 | "interpreter": {
372 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
373 | },
374 | "kernelspec": {
375 | "display_name": "Python 3.7.9",
376 | "language": "python",
377 | "name": "python3"
378 | },
379 | "language_info": {
380 | "codemirror_mode": {
381 | "name": "ipython",
382 | "version": 3
383 | },
384 | "file_extension": ".py",
385 | "mimetype": "text/x-python",
386 | "name": "python",
387 | "nbconvert_exporter": "python",
388 | "pygments_lexer": "ipython3",
389 | "version": "3.7.9"
390 | },
391 | "orig_nbformat": 4
392 | },
393 | "nbformat": 4,
394 | "nbformat_minor": 2
395 | }
396 |
--------------------------------------------------------------------------------
/arimavariations_autoarima_pems.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pmdarima.arima import auto_arima\n",
10 | "import pandas as pd\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# Station 716933"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/html": [
29 | "\n",
30 | "\n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " Unnamed: 0 | \n",
48 | " Total Flow | \n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " \n",
53 | " 0 | \n",
54 | " 0 | \n",
55 | " 119.333333 | \n",
56 | "
\n",
57 | " \n",
58 | " 1 | \n",
59 | " 1 | \n",
60 | " 182.333333 | \n",
61 | "
\n",
62 | " \n",
63 | " 2 | \n",
64 | " 2 | \n",
65 | " 241.666667 | \n",
66 | "
\n",
67 | " \n",
68 | " 3 | \n",
69 | " 3 | \n",
70 | " 262.666667 | \n",
71 | "
\n",
72 | " \n",
73 | " 4 | \n",
74 | " 4 | \n",
75 | " 261.666667 | \n",
76 | "
\n",
77 | " \n",
78 | " ... | \n",
79 | " ... | \n",
80 | " ... | \n",
81 | "
\n",
82 | " \n",
83 | " 26203 | \n",
84 | " 26203 | \n",
85 | " 389.000000 | \n",
86 | "
\n",
87 | " \n",
88 | " 26204 | \n",
89 | " 26204 | \n",
90 | " 357.666667 | \n",
91 | "
\n",
92 | " \n",
93 | " 26205 | \n",
94 | " 26205 | \n",
95 | " 373.333333 | \n",
96 | "
\n",
97 | " \n",
98 | " 26206 | \n",
99 | " 26206 | \n",
100 | " 340.000000 | \n",
101 | "
\n",
102 | " \n",
103 | " 26207 | \n",
104 | " 26207 | \n",
105 | " 332.333333 | \n",
106 | "
\n",
107 | " \n",
108 | "
\n",
109 | "
26208 rows × 2 columns
\n",
110 | "
"
111 | ],
112 | "text/plain": [
113 | " Unnamed: 0 Total Flow\n",
114 | "0 0 119.333333\n",
115 | "1 1 182.333333\n",
116 | "2 2 241.666667\n",
117 | "3 3 262.666667\n",
118 | "4 4 261.666667\n",
119 | "... ... ...\n",
120 | "26203 26203 389.000000\n",
121 | "26204 26204 357.666667\n",
122 | "26205 26205 373.333333\n",
123 | "26206 26206 340.000000\n",
124 | "26207 26207 332.333333\n",
125 | "\n",
126 | "[26208 rows x 2 columns]"
127 | ]
128 | },
129 | "execution_count": 2,
130 | "metadata": {},
131 | "output_type": "execute_result"
132 | }
133 | ],
134 | "source": [
135 | "data = pd.read_csv(\"data/pems/pems-d07-9months-2021-station716933-15min.csv\")\n",
136 | "data"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 3,
142 | "metadata": {},
143 | "outputs": [],
144 | "source": [
145 | "train_data = data[\"Total Flow\"].values[:-7863]\n",
146 | "test_data = data[\"Total Flow\"].values[-7863:]"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 4,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "Performing stepwise search to minimize aic\n",
159 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=1.67 sec\n",
160 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=280647.034, Time=0.20 sec\n",
161 | " ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=241280.816, Time=0.37 sec\n",
162 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=192746.376, Time=1.07 sec\n",
163 | " ARIMA(2,0,0)(0,0,0)[0] intercept : AIC=192720.428, Time=2.11 sec\n",
164 | " ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=192716.566, Time=2.17 sec\n",
165 | " ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=192430.904, Time=2.90 sec\n",
166 | " ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=192099.290, Time=3.63 sec\n",
167 | " ARIMA(5,0,0)(0,0,0)[0] : AIC=inf, Time=0.96 sec\n",
168 | "\n",
169 | "Best model: ARIMA(5,0,0)(0,0,0)[0] intercept\n",
170 | "Total fit time: 15.085 seconds\n"
171 | ]
172 | }
173 | ],
174 | "source": [
175 | "model = auto_arima(train_data, start_p=1, start_q=0,\n",
176 | " max_p=5, max_q=0, \n",
177 | " d=0, \n",
178 | " seasonal=False, \n",
179 | " start_P=0, \n",
180 | " D=None, \n",
181 | " trace=True)"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 5,
187 | "metadata": {},
188 | "outputs": [
189 | {
190 | "name": "stdout",
191 | "output_type": "stream",
192 | "text": [
193 | "Performing stepwise search to minimize aic\n",
194 | " ARIMA(1,0,1)(0,0,0)[0] : AIC=193031.946, Time=0.53 sec\n",
195 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=280647.034, Time=0.17 sec\n",
196 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=0.24 sec\n",
197 | " ARIMA(0,0,1)(0,0,0)[0] : AIC=257525.493, Time=1.47 sec\n",
198 | " ARIMA(2,0,1)(0,0,0)[0] : AIC=193033.753, Time=1.43 sec\n",
199 | " ARIMA(1,0,2)(0,0,0)[0] : AIC=193033.808, Time=0.87 sec\n",
200 | " ARIMA(0,0,2)(0,0,0)[0] : AIC=240320.015, Time=4.67 sec\n",
201 | " ARIMA(2,0,0)(0,0,0)[0] : AIC=inf, Time=0.42 sec\n",
202 | " ARIMA(2,0,2)(0,0,0)[0] : AIC=193035.852, Time=3.88 sec\n",
203 | " ARIMA(1,0,1)(0,0,0)[0] intercept : AIC=192721.259, Time=5.00 sec\n",
204 | " ARIMA(0,0,1)(0,0,0)[0] intercept : AIC=222190.468, Time=6.11 sec\n",
205 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=192746.376, Time=1.07 sec\n",
206 | " ARIMA(2,0,1)(0,0,0)[0] intercept : AIC=191808.976, Time=27.34 sec\n",
207 | " ARIMA(2,0,0)(0,0,0)[0] intercept : AIC=192720.428, Time=2.55 sec\n",
208 | " ARIMA(3,0,1)(0,0,0)[0] intercept : AIC=191811.256, Time=37.37 sec\n",
209 | " ARIMA(2,0,2)(0,0,0)[0] intercept : AIC=192724.408, Time=32.17 sec\n",
210 | " ARIMA(1,0,2)(0,0,0)[0] intercept : AIC=192721.705, Time=17.40 sec\n",
211 | " ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=192716.566, Time=2.95 sec\n",
212 | " ARIMA(3,0,2)(0,0,0)[0] intercept : AIC=192661.699, Time=38.07 sec\n",
213 | "\n",
214 | "Best model: ARIMA(2,0,1)(0,0,0)[0] intercept\n",
215 | "Total fit time: 183.720 seconds\n"
216 | ]
217 | }
218 | ],
219 | "source": [
220 | "model = auto_arima(train_data, start_p=1, start_q=1,\n",
221 | " max_p=5, max_q=5, \n",
222 | " d=0, \n",
223 | " seasonal=False, \n",
224 | " start_P=0, \n",
225 | " D=None, \n",
226 | " trace=True)"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": 6,
232 | "metadata": {},
233 | "outputs": [
234 | {
235 | "name": "stdout",
236 | "output_type": "stream",
237 | "text": [
238 | "Performing stepwise search to minimize aic\n",
239 | " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=193051.060, Time=3.12 sec\n",
240 | " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=193063.542, Time=0.46 sec\n",
241 | " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=193058.007, Time=0.59 sec\n",
242 | " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=193057.955, Time=1.08 sec\n",
243 | " ARIMA(0,1,0)(0,0,0)[0] : AIC=193061.542, Time=0.27 sec\n",
244 | " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=193050.716, Time=9.28 sec\n",
245 | " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=193059.961, Time=1.66 sec\n",
246 | " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=192728.883, Time=12.69 sec\n",
247 | " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=192866.723, Time=2.23 sec\n",
248 | " ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=192659.660, Time=8.77 sec\n",
249 | " ARIMA(4,1,0)(0,0,0)[0] intercept : AIC=192658.990, Time=2.84 sec\n",
250 | " ARIMA(5,1,0)(0,0,0)[0] intercept : AIC=192659.118, Time=4.01 sec\n",
251 | " ARIMA(5,1,1)(0,0,0)[0] intercept : AIC=192659.334, Time=20.38 sec\n",
252 | " ARIMA(4,1,0)(0,0,0)[0] : AIC=192656.990, Time=1.07 sec\n",
253 | " ARIMA(3,1,0)(0,0,0)[0] : AIC=192864.723, Time=0.88 sec\n",
254 | " ARIMA(5,1,0)(0,0,0)[0] : AIC=192657.118, Time=1.48 sec\n",
255 | " ARIMA(4,1,1)(0,0,0)[0] : AIC=192657.660, Time=3.08 sec\n",
256 | " ARIMA(3,1,1)(0,0,0)[0] : AIC=192726.882, Time=4.45 sec\n",
257 | " ARIMA(5,1,1)(0,0,0)[0] : AIC=192657.334, Time=6.43 sec\n",
258 | "\n",
259 | "Best model: ARIMA(4,1,0)(0,0,0)[0] \n",
260 | "Total fit time: 84.764 seconds\n"
261 | ]
262 | }
263 | ],
264 | "source": [
265 | "model = auto_arima(train_data, start_p=1, start_q=1,\n",
266 | " max_p=5, max_q=5, \n",
267 | " d=1, \n",
268 | " seasonal=False, \n",
269 | " start_P=0, \n",
270 | " D=None, \n",
271 | " trace=True)"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {},
277 | "source": [
278 | "# Station 717087"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 12,
284 | "metadata": {},
285 | "outputs": [
286 | {
287 | "data": {
288 | "text/html": [
289 | "\n",
290 | "\n",
303 | "
\n",
304 | " \n",
305 | " \n",
306 | " | \n",
307 | " Unnamed: 0 | \n",
308 | " Total Flow | \n",
309 | "
\n",
310 | " \n",
311 | " \n",
312 | " \n",
313 | " 0 | \n",
314 | " 0 | \n",
315 | " 51.000000 | \n",
316 | "
\n",
317 | " \n",
318 | " 1 | \n",
319 | " 1 | \n",
320 | " 93.666667 | \n",
321 | "
\n",
322 | " \n",
323 | " 2 | \n",
324 | " 2 | \n",
325 | " 136.000000 | \n",
326 | "
\n",
327 | " \n",
328 | " 3 | \n",
329 | " 3 | \n",
330 | " 124.000000 | \n",
331 | "
\n",
332 | " \n",
333 | " 4 | \n",
334 | " 4 | \n",
335 | " 124.333333 | \n",
336 | "
\n",
337 | " \n",
338 | " ... | \n",
339 | " ... | \n",
340 | " ... | \n",
341 | "
\n",
342 | " \n",
343 | " 26203 | \n",
344 | " 26203 | \n",
345 | " 156.333333 | \n",
346 | "
\n",
347 | " \n",
348 | " 26204 | \n",
349 | " 26204 | \n",
350 | " 159.333333 | \n",
351 | "
\n",
352 | " \n",
353 | " 26205 | \n",
354 | " 26205 | \n",
355 | " 150.666667 | \n",
356 | "
\n",
357 | " \n",
358 | " 26206 | \n",
359 | " 26206 | \n",
360 | " 136.333333 | \n",
361 | "
\n",
362 | " \n",
363 | " 26207 | \n",
364 | " 26207 | \n",
365 | " 120.333333 | \n",
366 | "
\n",
367 | " \n",
368 | "
\n",
369 | "
26208 rows × 2 columns
\n",
370 | "
"
371 | ],
372 | "text/plain": [
373 | " Unnamed: 0 Total Flow\n",
374 | "0 0 51.000000\n",
375 | "1 1 93.666667\n",
376 | "2 2 136.000000\n",
377 | "3 3 124.000000\n",
378 | "4 4 124.333333\n",
379 | "... ... ...\n",
380 | "26203 26203 156.333333\n",
381 | "26204 26204 159.333333\n",
382 | "26205 26205 150.666667\n",
383 | "26206 26206 136.333333\n",
384 | "26207 26207 120.333333\n",
385 | "\n",
386 | "[26208 rows x 2 columns]"
387 | ]
388 | },
389 | "execution_count": 12,
390 | "metadata": {},
391 | "output_type": "execute_result"
392 | }
393 | ],
394 | "source": [
395 | "data2 = pd.read_csv(\"data/pems/pems-d07-9months-2021-station717087-15min.csv\")\n",
396 | "data2"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": 13,
402 | "metadata": {},
403 | "outputs": [],
404 | "source": [
405 | "train_data2 = data2[\"Total Flow\"].values[:-7863]\n",
406 | "test_data2 = data2[\"Total Flow\"].values[-7863:]"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": 14,
412 | "metadata": {},
413 | "outputs": [
414 | {
415 | "name": "stdout",
416 | "output_type": "stream",
417 | "text": [
418 | "Performing stepwise search to minimize aic\n",
419 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=0.40 sec\n",
420 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=267850.493, Time=0.27 sec\n",
421 | " ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=236188.702, Time=0.46 sec\n",
422 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=178483.149, Time=0.86 sec\n",
423 | " ARIMA(2,0,0)(0,0,0)[0] intercept : AIC=178480.515, Time=3.17 sec\n",
424 | " ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=178269.972, Time=2.36 sec\n",
425 | " ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=177788.848, Time=3.45 sec\n",
426 | " ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=176351.117, Time=3.91 sec\n",
427 | " ARIMA(5,0,0)(0,0,0)[0] : AIC=176806.000, Time=1.48 sec\n",
428 | "\n",
429 | "Best model: ARIMA(5,0,0)(0,0,0)[0] intercept\n",
430 | "Total fit time: 16.377 seconds\n"
431 | ]
432 | }
433 | ],
434 | "source": [
435 | "model = auto_arima(train_data2, start_p=1, start_q=0,\n",
436 | " max_p=5, max_q=0, \n",
437 | " d=0, \n",
438 | " seasonal=False, \n",
439 | " start_P=0, \n",
440 | " D=None, \n",
441 | " trace=True)"
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": 15,
447 | "metadata": {},
448 | "outputs": [
449 | {
450 | "name": "stdout",
451 | "output_type": "stream",
452 | "text": [
453 | "Performing stepwise search to minimize aic\n",
454 | " ARIMA(1,0,1)(0,0,0)[0] : AIC=178637.992, Time=0.98 sec\n",
455 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=267850.493, Time=0.31 sec\n",
456 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=0.41 sec\n",
457 | " ARIMA(0,0,1)(0,0,0)[0] : AIC=245047.435, Time=2.34 sec\n",
458 | " ARIMA(2,0,1)(0,0,0)[0] : AIC=inf, Time=10.72 sec\n",
459 | " ARIMA(1,0,2)(0,0,0)[0] : AIC=178497.477, Time=1.54 sec\n",
460 | " ARIMA(0,0,2)(0,0,0)[0] : AIC=227697.044, Time=4.65 sec\n",
461 | " ARIMA(2,0,2)(0,0,0)[0] : AIC=inf, Time=12.43 sec\n",
462 | " ARIMA(1,0,3)(0,0,0)[0] : AIC=177927.824, Time=3.10 sec\n",
463 | " ARIMA(0,0,3)(0,0,0)[0] : AIC=216130.891, Time=7.23 sec\n",
464 | " ARIMA(2,0,3)(0,0,0)[0] : AIC=176948.176, Time=8.09 sec\n",
465 | " ARIMA(3,0,3)(0,0,0)[0] : AIC=176946.427, Time=11.82 sec\n",
466 | " ARIMA(3,0,2)(0,0,0)[0] : AIC=inf, Time=16.52 sec\n",
467 | " ARIMA(4,0,3)(0,0,0)[0] : AIC=176760.440, Time=15.73 sec\n",
468 | " ARIMA(4,0,2)(0,0,0)[0] : AIC=178594.325, Time=12.01 sec\n",
469 | " ARIMA(5,0,3)(0,0,0)[0] : AIC=176508.266, Time=18.60 sec\n",
470 | " ARIMA(5,0,2)(0,0,0)[0] : AIC=177014.134, Time=14.45 sec\n",
471 | " ARIMA(5,0,4)(0,0,0)[0] : AIC=176395.141, Time=21.28 sec\n",
472 | " ARIMA(4,0,4)(0,0,0)[0] : AIC=176418.285, Time=21.85 sec\n",
473 | " ARIMA(5,0,5)(0,0,0)[0] : AIC=inf, Time=31.80 sec\n",
474 | " ARIMA(4,0,5)(0,0,0)[0] : AIC=176405.872, Time=25.80 sec\n",
475 | " ARIMA(5,0,4)(0,0,0)[0] intercept : AIC=inf, Time=65.17 sec\n",
476 | "\n",
477 | "Best model: ARIMA(5,0,4)(0,0,0)[0] \n",
478 | "Total fit time: 306.848 seconds\n"
479 | ]
480 | }
481 | ],
482 | "source": [
483 | "model = auto_arima(train_data2, start_p=1, start_q=1,\n",
484 | " max_p=5, max_q=5, \n",
485 | " d=0, \n",
486 | " seasonal=False, \n",
487 | " start_P=0, \n",
488 | " D=None, \n",
489 | " trace=True)"
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | "execution_count": 16,
495 | "metadata": {},
496 | "outputs": [
497 | {
498 | "name": "stdout",
499 | "output_type": "stream",
500 | "text": [
501 | "Performing stepwise search to minimize aic\n",
502 | " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=inf, Time=12.30 sec\n",
503 | " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=178667.167, Time=0.59 sec\n",
504 | " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=178656.210, Time=0.79 sec\n",
505 | " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=178658.366, Time=2.53 sec\n",
506 | " ARIMA(0,1,0)(0,0,0)[0] : AIC=178665.167, Time=0.49 sec\n",
507 | " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=178489.001, Time=2.53 sec\n",
508 | " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=178093.359, Time=2.56 sec\n",
509 | " ARIMA(4,1,0)(0,0,0)[0] intercept : AIC=176885.338, Time=3.93 sec\n",
510 | " ARIMA(5,1,0)(0,0,0)[0] intercept : AIC=176693.059, Time=4.28 sec\n",
511 | " ARIMA(5,1,1)(0,0,0)[0] intercept : AIC=176681.538, Time=19.26 sec\n",
512 | " ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=176749.959, Time=21.70 sec\n",
513 | " ARIMA(5,1,2)(0,0,0)[0] intercept : AIC=176644.909, Time=26.23 sec\n",
514 | " ARIMA(4,1,2)(0,0,0)[0] intercept : AIC=176677.889, Time=22.52 sec\n",
515 | " ARIMA(5,1,3)(0,0,0)[0] intercept : AIC=inf, Time=83.99 sec\n",
516 | " ARIMA(4,1,3)(0,0,0)[0] intercept : AIC=176610.537, Time=26.95 sec\n",
517 | " ARIMA(3,1,3)(0,0,0)[0] intercept : AIC=176855.717, Time=29.81 sec\n",
518 | " ARIMA(4,1,4)(0,0,0)[0] intercept : AIC=176361.318, Time=74.64 sec\n",
519 | " ARIMA(3,1,4)(0,0,0)[0] intercept : AIC=176383.006, Time=67.48 sec\n",
520 | " ARIMA(5,1,4)(0,0,0)[0] intercept : AIC=inf, Time=80.68 sec\n",
521 | " ARIMA(4,1,5)(0,0,0)[0] intercept : AIC=inf, Time=98.36 sec\n",
522 | " ARIMA(3,1,5)(0,0,0)[0] intercept : AIC=176352.357, Time=90.69 sec\n",
523 | " ARIMA(2,1,5)(0,0,0)[0] intercept : AIC=176811.663, Time=40.08 sec\n",
524 | " ARIMA(2,1,4)(0,0,0)[0] intercept : AIC=176918.415, Time=19.27 sec\n",
525 | " ARIMA(3,1,5)(0,0,0)[0] : AIC=inf, Time=36.53 sec\n",
526 | "\n",
527 | "Best model: ARIMA(3,1,5)(0,0,0)[0] intercept\n",
528 | "Total fit time: 768.222 seconds\n"
529 | ]
530 | }
531 | ],
532 | "source": [
533 | "model = auto_arima(train_data2, start_p=1, start_q=1,\n",
534 | " max_p=5, max_q=5, \n",
535 | " d=1, \n",
536 | " seasonal=False,\n",
537 | " start_P=0, \n",
538 | " D=None, \n",
539 | " trace=True)"
540 | ]
541 | },
542 | {
543 | "cell_type": "code",
544 | "execution_count": null,
545 | "metadata": {},
546 | "outputs": [],
547 | "source": []
548 | }
549 | ],
550 | "metadata": {
551 | "interpreter": {
552 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
553 | },
554 | "kernelspec": {
555 | "display_name": "Python 3.7.9",
556 | "language": "python",
557 | "name": "python3"
558 | },
559 | "language_info": {
560 | "codemirror_mode": {
561 | "name": "ipython",
562 | "version": 3
563 | },
564 | "file_extension": ".py",
565 | "mimetype": "text/x-python",
566 | "name": "python",
567 | "nbconvert_exporter": "python",
568 | "pygments_lexer": "ipython3",
569 | "version": "3.7.9"
570 | },
571 | "orig_nbformat": 4
572 | },
573 | "nbformat": 4,
574 | "nbformat_minor": 2
575 | }
576 |
--------------------------------------------------------------------------------
/arimavariations_istanbul_data_del.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 | import pandas as pd
3 |
4 | def armodel(train_data, test_data):
5 | arima = ARIMA(train_data, order=(5,0,0))
6 | arima_fit = arima.fit()
7 | print(arima_fit.summary())
8 | parameters = arima_fit.params
9 | a1 = parameters[1]
10 | a2 = parameters[2]
11 | a3 = parameters[3]
12 | a4 = parameters[4]
13 | a5 = parameters[5]
14 | train_predictions = []
15 | for t in range(4,len(train_data)):
16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1)
17 | train_predictions.append(output_train)
18 |
19 | test_data2=[]
20 | test_data2.append(train_data[-5])
21 | test_data2.append(train_data[-4])
22 | test_data2.append(train_data[-3])
23 | test_data2.append(train_data[-2])
24 | test_data2.append(train_data[-1])
25 | for i in range(len(test_data)-1):
26 | test_data2.append(test_data[i])
27 |
28 | test_predictions = []
29 | for t in range(4,len(test_data2)):
30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1)
31 | test_predictions.append(output_test)
32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_istanbul_data_del_train.csv")
33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_istanbul_data_del_train.csv")
34 | return train_predictions, test_predictions
35 |
36 | def armamodel(train_data, test_data):
37 | arima = ARIMA(train_data, order=(1,0,1))
38 | arima_fit = arima.fit()
39 | print(arima_fit.summary())
40 | parameters = arima_fit.params
41 | a = parameters[1]
42 | b = parameters[2]
43 | output_train = arima_fit.forecast()
44 | train_predictions = []
45 | for t in range(len(train_data)):
46 | output_train = (train_data[t] * a) + ((train_data[t] - output_train[0]) * b)
47 | train_predictions.append(output_train[0])
48 |
49 | output_test = arima_fit.forecast()
50 | test_predictions = []
51 | test_predictions.append(output_test[0][0])
52 | for t in range(len(test_data)-1):
53 | output_test = (test_data[t] * a) + ((test_data[t] - output_test[0]) * b)
54 | test_predictions.append(output_test[0])
55 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_istanbul_data_del_train.csv")
56 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_istanbul_data_del_test.csv")
57 | return train_predictions, test_predictions
58 |
59 |
60 | def arimamodel(train_data, test_data):
61 | arima = ARIMA(train_data, order=(0,1,3))
62 | arima_fit = arima.fit()
63 | print(arima_fit.summary())
64 |
65 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all())
66 | train_predictions2 = []
67 | for t in range(len(train_data)):
68 | output_train = train_predictions[t] + train_data[t]
69 | train_predictions2.append(output_train)
70 |
71 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all())
72 | test_predictions2 = []
73 | test_data2=[]
74 | test_data2.append(train_data[-1])
75 | for i in range(len(test_data)-1):
76 | test_data2.append(test_data[i])
77 | for t in range(len(test_data2)):
78 | output_test = test_predictions[t] + test_data2[t]
79 | test_predictions2.append(output_test)
80 |
81 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_istanbul_data_del_train.csv")
82 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_istanbul_data_del_test.csv")
83 | return train_predictions2, test_predictions2
84 |
85 | def sarimamodel(data):
86 | data2 = pd.DataFrame(data)
87 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(25),data2.shift(24),data2], axis=1)
88 | data3.columns = ['t-169','t-168','t-25','t-24','t']
89 | data4 = data3.values
90 | train_size = 7565
91 | train, test = data4[169:train_size], data4[train_size:]
92 | train_X, train_y = train[:,:4], train[:,-1]
93 | test_X, test_y = test[:,:4], test[:,-1]
94 |
95 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X)
96 | sarima_fit = sarima.fit()
97 | print(sarima_fit.summary())
98 |
99 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X)
100 | train_predictions2 = []
101 | for t in range(len(train_y)):
102 | output_train = train_predictions[t] + train_y[t]
103 | train_predictions2.append(output_train)
104 |
105 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X)
106 | test_predictions2 = []
107 | test_y2=[]
108 | test_y2.append(train_y[-1])
109 | for i in range(len(test_y)-1):
110 | test_y2.append(test_y[i])
111 | for t in range(len(test_y2)):
112 | output_test = test_predictions[t] + test_y2[t]
113 | test_predictions2.append(output_test)
114 |
115 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_istanbul_data_del_train.csv")
116 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_istanbul_data_del_test.csv")
117 | return train_predictions2, test_predictions2
118 |
119 |
120 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']]
121 | data = data.values
122 | train_size = 7565
123 | train_data, test_data = data[:train_size], data[train_size:]
124 | armamodel(train_data, test_data)
125 | armodel(train_data, test_data)
126 | arimamodel(train_data, test_data)
127 | sarimamodel(data)
128 |
--------------------------------------------------------------------------------
/arimavariations_istanbul_data_mean_sdsh.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 | import pandas as pd
3 |
4 | def armodel(train_data, test_data):
5 | arima = ARIMA(train_data, order=(5,0,0))
6 | arima_fit = arima.fit()
7 | print(arima_fit.summary())
8 | parameters = arima_fit.params
9 | a1 = parameters[1]
10 | a2 = parameters[2]
11 | a3 = parameters[3]
12 | a4 = parameters[4]
13 | a5 = parameters[5]
14 | train_predictions = []
15 | for t in range(4,len(train_data)):
16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1)
17 | train_predictions.append(output_train)
18 |
19 | test_data2=[]
20 | test_data2.append(train_data[-5])
21 | test_data2.append(train_data[-4])
22 | test_data2.append(train_data[-3])
23 | test_data2.append(train_data[-2])
24 | test_data2.append(train_data[-1])
25 | for i in range(len(test_data)-1):
26 | test_data2.append(test_data[i])
27 |
28 | test_predictions = []
29 | for t in range(4,len(test_data2)):
30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1)
31 | test_predictions.append(output_test)
32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sdsh_train.csv")
33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sdsh_test.csv")
34 | return train_predictions, test_predictions
35 |
36 | def armamodel(train_data, test_data):
37 | arima = ARIMA(train_data, order=(4,0,5))
38 | arima_fit = arima.fit()
39 | print(arima_fit.summary())
40 | parameters = arima_fit.params
41 | a1 = parameters[1]
42 | a2 = parameters[2]
43 | a3 = parameters[3]
44 | a4 = parameters[4]
45 | b1 = parameters[5]
46 | b2 = parameters[6]
47 | b3 = parameters[7]
48 | b4 = parameters[8]
49 | b5 = parameters[9]
50 |
51 | train_predictions = []
52 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+4,dynamic=test_data.all())
53 | for i in range(len(outputs)):
54 | train_predictions.append(outputs[i])
55 | for t in range(4,len(train_data)):
56 | output_train = (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) + ((train_data[t-4] - train_predictions[-5]) * b5) + ((train_data[t-3] - train_predictions[-4]) * b4) + ((train_data[t-2] - train_predictions[-3]) * b3) + ((train_data[t-1] - train_predictions[-2]) * b2) + ((train_data[t] - train_predictions[-1]) * b1)
57 | train_predictions.append(output_train)
58 |
59 | test_data2=[]
60 | test_data2.append(train_data[-4])
61 | test_data2.append(train_data[-3])
62 | test_data2.append(train_data[-2])
63 | test_data2.append(train_data[-1])
64 | for i in range(len(test_data)-1):
65 | test_data2.append(test_data[i])
66 |
67 | test_predictions = []
68 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+4,dynamic=test_data.all())
69 | for i in range(len(outputs)):
70 | test_predictions.append(outputs[i])
71 |
72 | for t in range(4,len(test_data2)):
73 | output_test = (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) + ((test_data2[t-4] - test_predictions[-5]) * b5) + ((test_data2[t-3] - test_predictions[-4]) * b4) + ((test_data2[t-2] - test_predictions[-3]) * b3) + ((test_data2[t-1] - test_predictions[-2]) * b2) + ((test_data2[t] - test_predictions[-1]) * b1)
74 | test_predictions.append(output_test)
75 | test_predictions = test_predictions[4:]
76 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sdsh_train.csv")
77 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sdsh_test.csv")
78 | return train_predictions, test_predictions
79 |
80 |
81 | def arimamodel(train_data, test_data):
82 | arima = ARIMA(train_data, order=(0,1,3))
83 | arima_fit = arima.fit()
84 | print(arima_fit.summary())
85 |
86 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all())
87 | train_predictions2 = []
88 | for t in range(len(train_data)):
89 | output_train = train_predictions[t] + train_data[t]
90 | train_predictions2.append(output_train)
91 |
92 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all())
93 | test_predictions2 = []
94 | test_data2=[]
95 | test_data2.append(train_data[-1])
96 | for i in range(len(test_data)-1):
97 | test_data2.append(test_data[i])
98 | for t in range(len(test_data2)):
99 | output_test = test_predictions[t] + test_data2[t]
100 | test_predictions2.append(output_test)
101 |
102 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sdsh_train.csv")
103 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sdsh_test.csv")
104 | return train_predictions2, test_predictions2
105 |
106 | def sarimamodel(data):
107 | data2 = pd.DataFrame(data)
108 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(25),data2.shift(24),data2], axis=1)
109 | data3.columns = ['t-169','t-168','t-25','t-24','t']
110 | data4 = data3.values
111 | train_size = int(len(data4) * 0.70)
112 | train, test = data4[169:train_size], data4[train_size:]
113 | train_X, train_y = train[:,:4], train[:,-1]
114 | test_X, test_y = test[:,:4], test[:,-1]
115 |
116 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X)
117 | sarima_fit = sarima.fit()
118 | print(sarima_fit.summary())
119 |
120 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X)
121 | train_predictions2 = []
122 | for t in range(len(train_y)):
123 | output_train = train_predictions[t] + train_y[t]
124 | train_predictions2.append(output_train)
125 |
126 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X)
127 | test_predictions2 = []
128 | test_y2=[]
129 | test_y2.append(train_y[-1])
130 | for i in range(len(test_y)-1):
131 | test_y2.append(test_y[i])
132 | for t in range(len(test_y2)):
133 | output_test = test_predictions[t] + test_y2[t]
134 | test_predictions2.append(output_test)
135 |
136 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sdsh_train.csv")
137 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sdsh_test.csv")
138 | return train_predictions2, test_predictions2
139 |
140 |
141 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']]
142 | data = data.values
143 | train_size = int(len(data) * 0.70)
144 | train_data, test_data = data[:train_size], data[train_size:]
145 | armamodel(train_data, test_data)
146 | armodel(train_data, test_data)
147 | arimamodel(train_data, test_data)
148 | sarimamodel(data)
149 |
--------------------------------------------------------------------------------
/arimavariations_istanbul_mean_sh.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 | import pandas as pd
3 |
4 | def armodel(train_data, test_data):
5 | arima = ARIMA(train_data, order=(5,0,0))
6 | arima_fit = arima.fit()
7 | print(arima_fit.summary())
8 | parameters = arima_fit.params
9 | a1 = parameters[1]
10 | a2 = parameters[2]
11 | a3 = parameters[3]
12 | a4 = parameters[4]
13 | a5 = parameters[5]
14 | train_predictions = []
15 | for t in range(4,len(train_data)):
16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1)
17 | train_predictions.append(output_train)
18 |
19 | test_data2=[]
20 | test_data2.append(train_data[-5])
21 | test_data2.append(train_data[-4])
22 | test_data2.append(train_data[-3])
23 | test_data2.append(train_data[-2])
24 | test_data2.append(train_data[-1])
25 | for i in range(len(test_data)-1):
26 | test_data2.append(test_data[i])
27 |
28 | test_predictions = []
29 | for t in range(4,len(test_data2)):
30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1)
31 | test_predictions.append(output_test)
32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sh_train.csv")
33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sh_test.csv")
34 | return train_predictions, test_predictions
35 |
36 | def armamodel(train_data, test_data):
37 | arima = ARIMA(train_data, order=(1,0,1))
38 | arima_fit = arima.fit()
39 | print(arima_fit.summary())
40 | parameters = arima_fit.params
41 | a = parameters[1]
42 | b = parameters[2]
43 | output_train = arima_fit.forecast()
44 | train_predictions = []
45 | for t in range(len(train_data)):
46 | output_train = (train_data[t] * a) + ((train_data[t] - output_train[0]) * b)
47 | train_predictions.append(output_train[0])
48 |
49 | output_test = arima_fit.forecast()
50 | test_predictions = []
51 | test_predictions.append(output_test[0][0])
52 | for t in range(len(test_data)-1):
53 | output_test = (test_data[t] * a) + ((test_data[t] - output_test[0]) * b)
54 | test_predictions.append(output_test[0])
55 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sh_train.csv")
56 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sh_test.csv")
57 | return train_predictions, test_predictions
58 |
59 |
60 | def arimamodel(train_data, test_data):
61 | arima = ARIMA(train_data, order=(0,1,3))
62 | arima_fit = arima.fit()
63 | print(arima_fit.summary())
64 |
65 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all())
66 | train_predictions2 = []
67 | for t in range(len(train_data)):
68 | output_train = train_predictions[t] + train_data[t]
69 | train_predictions2.append(output_train)
70 |
71 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all())
72 | test_predictions2 = []
73 | test_data2=[]
74 | test_data2.append(train_data[-1])
75 | for i in range(len(test_data)-1):
76 | test_data2.append(test_data[i])
77 | for t in range(len(test_data2)):
78 | output_test = test_predictions[t] + test_data2[t]
79 | test_predictions2.append(output_test)
80 |
81 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sh_train.csv")
82 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sh_test.csv")
83 | return train_predictions2, test_predictions2
84 |
85 | def sarimamodel(data):
86 | data2 = pd.DataFrame(data)
87 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(25),data2.shift(24),data2], axis=1)
88 | data3.columns = ['t-169','t-168','t-25','t-24','t']
89 | data4 = data3.values
90 | train_size = int(len(data4) * 0.70)
91 | train, test = data4[169:train_size], data4[train_size:]
92 | train_X, train_y = train[:,:4], train[:,-1]
93 | test_X, test_y = test[:,:4], test[:,-1]
94 |
95 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X)
96 | sarima_fit = sarima.fit()
97 | print(sarima_fit.summary())
98 |
99 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X)
100 | train_predictions2 = []
101 | for t in range(len(train_y)):
102 | output_train = train_predictions[t] + train_y[t]
103 | train_predictions2.append(output_train)
104 |
105 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X)
106 | test_predictions2 = []
107 | test_y2=[]
108 | test_y2.append(train_y[-1])
109 | for i in range(len(test_y)-1):
110 | test_y2.append(test_y[i])
111 | for t in range(len(test_y2)):
112 | output_test = test_predictions[t] + test_y2[t]
113 | test_predictions2.append(output_test)
114 |
115 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sh_train.csv")
116 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sh_test.csv")
117 | return train_predictions2, test_predictions2
118 |
119 |
120 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']]
121 | data = data.values
122 | train_size = int(len(data) * 0.70)
123 | train_data, test_data = data[:train_size], data[train_size:]
124 | armamodel(train_data, test_data)
125 | armodel(train_data, test_data)
126 | arimamodel(train_data, test_data)
127 | sarimamodel(data)
128 |
--------------------------------------------------------------------------------
/arimavariations_pems_716933.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 | import pandas as pd
3 | import numpy as np
4 |
5 | def armodel(train_data, test_data):
6 | arima = ARIMA(train_data, order=(5,0,0))
7 | arima_fit = arima.fit()
8 | print(arima_fit.summary())
9 | parameters = arima_fit.params
10 | a1 = parameters[1]
11 | a2 = parameters[2]
12 | a3 = parameters[3]
13 | a4 = parameters[4]
14 | a5 = parameters[5]
15 | train_predictions = []
16 | for t in range(4,len(train_data)):
17 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1)
18 | train_predictions.append(output_train)
19 |
20 | test_data2=[]
21 | test_data2.append(train_data[-5])
22 | test_data2.append(train_data[-4])
23 | test_data2.append(train_data[-3])
24 | test_data2.append(train_data[-2])
25 | test_data2.append(train_data[-1])
26 | for i in range(len(test_data)-1):
27 | test_data2.append(test_data[i])
28 |
29 | test_predictions = []
30 | for t in range(4,len(test_data2)):
31 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1)
32 | test_predictions.append(output_test)
33 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_pems_716933_train.csv")
34 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_pems_716933_test.csv")
35 | return train_predictions, test_predictions
36 |
37 | def armamodel(train_data, test_data):
38 | arima = ARIMA(train_data, order=(1,0,1))
39 | arima_fit = arima.fit()
40 | print(arima_fit.summary())
41 | parameters = arima_fit.params
42 | a = parameters[1]
43 | b = parameters[2]
44 | output_train = arima_fit.forecast()
45 | train_predictions = []
46 | for t in range(len(train_data)):
47 | output_train = (train_data[t] * a) + ((train_data[t] - output_train[0]) * b)
48 | train_predictions.append(output_train[0])
49 |
50 | output_test = arima_fit.forecast()
51 | test_predictions = []
52 | test_predictions.append(output_test[0][0])
53 | for t in range(len(test_data)-1):
54 | output_test = (test_data[t] * a) + ((test_data[t] - output_test[0]) * b)
55 | test_predictions.append(output_test[0])
56 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_pems_716933_train.csv")
57 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_pems_716933_test.csv")
58 | return train_predictions, test_predictions
59 |
60 |
61 | def arimamodel(train_data, test_data):
62 | arima = ARIMA(train_data, order=(4,1,0))
63 | arima_fit = arima.fit()
64 | print(arima_fit.summary())
65 |
66 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all())
67 | train_predictions2 = []
68 | for t in range(len(train_data)):
69 | output_train = train_predictions[t] + train_data[t]
70 | train_predictions2.append(output_train)
71 |
72 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all())
73 | test_predictions2 = []
74 | test_data2=[]
75 | test_data2.append(train_data[-1])
76 | for i in range(len(test_data)-1):
77 | test_data2.append(test_data[i])
78 | for t in range(len(test_data2)):
79 | output_test = test_predictions[t] + test_data2[t]
80 | test_predictions2.append(output_test)
81 |
82 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_pems_716933_train.csv")
83 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_pems_716933_test.csv")
84 | return train_predictions2, test_predictions2
85 |
86 | def sarimamodel(data):
87 | data2 = pd.DataFrame(data)
88 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(97),data2.shift(96),data2], axis=1)
89 | data3.columns = ["t-673", "t-672", "t-97", "t-96", "t"]
90 | data4 = data3.values
91 | train_size = int(len(data4) * 0.70)
92 | train, test = data4[673:train_size], data4[train_size:]
93 | train_X, train_y = train[:,:4], train[:,-1]
94 | test_X, test_y = test[:,:4], test[:,-1]
95 |
96 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X)
97 | sarima_fit = sarima.fit()
98 | print(sarima_fit.summary())
99 |
100 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_y.all(),exog=train_X)
101 | train_predictions2 = []
102 | for t in range(len(train_y)):
103 | output_train = train_predictions[t] + train_y[t]
104 | train_predictions2.append(output_train)
105 |
106 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_y.all(),exog=test_X)
107 | test_predictions2 = []
108 | test_y2=[]
109 | test_y2.append(train_y[-1])
110 | for i in range(len(test_y)-1):
111 | test_y2.append(test_y[i])
112 | for t in range(len(test_y2)):
113 | output_test = test_predictions[t] + test_y2[t]
114 | test_predictions2.append(output_test)
115 |
116 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_pems_716933_train.csv")
117 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_pems_716933_test.csv")
118 | return train_predictions2, test_predictions2
119 |
120 |
121 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']]
122 | data = data.values
123 | train_size = int(len(data) * 0.70)
124 | train_data, test_data = data[:train_size], data[train_size:]
125 | armamodel(train_data, test_data)
126 | armodel(train_data, test_data)
127 | arimamodel(train_data, test_data)
128 | sarimamodel(data)
--------------------------------------------------------------------------------
/arimavariations_pems_717087.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 | import pandas as pd
3 |
4 | def armodel(train_data, test_data):
5 | arima = ARIMA(train_data, order=(5,0,0))
6 | arima_fit = arima.fit()
7 | print(arima_fit.summary())
8 | parameters = arima_fit.params
9 | a1 = parameters[1]
10 | a2 = parameters[2]
11 | a3 = parameters[3]
12 | a4 = parameters[4]
13 | a5 = parameters[5]
14 | train_predictions = []
15 | for t in range(4,len(train_data)):
16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1)
17 | train_predictions.append(output_train)
18 |
19 | test_data2=[]
20 | test_data2.append(train_data[-5])
21 | test_data2.append(train_data[-4])
22 | test_data2.append(train_data[-3])
23 | test_data2.append(train_data[-2])
24 | test_data2.append(train_data[-1])
25 | for i in range(len(test_data)-1):
26 | test_data2.append(test_data[i])
27 |
28 | test_predictions = []
29 | for t in range(4,len(test_data2)):
30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1)
31 | test_predictions.append(output_test)
32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_pems_717087_train.csv")
33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_pems_717087_test.csv")
34 | return train_predictions, test_predictions
35 |
36 | def armamodel(train_data, test_data):
37 | arima = ARIMA(train_data, order=(5,0,4))
38 | arima_fit = arima.fit()
39 | print(arima_fit.summary())
40 | parameters = arima_fit.params
41 | a1 = parameters[1]
42 | a2 = parameters[2]
43 | a3 = parameters[3]
44 | a4 = parameters[4]
45 | a5 = parameters[5]
46 | b1 = parameters[6]
47 | b2 = parameters[7]
48 | b3 = parameters[8]
49 | b4 = parameters[9]
50 |
51 |
52 | train_predictions = []
53 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+3,dynamic=test_data.all())
54 | for i in range(len(outputs)):
55 | train_predictions.append(outputs[i])
56 | for t in range(4,len(train_data)):
57 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) + ((train_data[t-3] - train_predictions[-4]) * b4) + ((train_data[t-2] - train_predictions[-3]) * b3) + ((train_data[t-1] - train_predictions[-2]) * b2) + ((train_data[t] - train_predictions[-1]) * b1)
58 | train_predictions.append(output_train[0])
59 |
60 | test_data2=[]
61 | test_data2.append(train_data[-5])
62 | test_data2.append(train_data[-4])
63 | test_data2.append(train_data[-3])
64 | test_data2.append(train_data[-2])
65 | test_data2.append(train_data[-1])
66 | for i in range(len(test_data)-1):
67 | test_data2.append(test_data[i])
68 |
69 | test_predictions = []
70 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+3,dynamic=test_data.all())
71 | for i in range(len(outputs)):
72 | test_predictions.append(outputs[i])
73 |
74 | for t in range(4,len(test_data2)):
75 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) + ((test_data2[t-3] - test_predictions[-4]) * b4) + ((test_data2[t-2] - test_predictions[-3]) * b3) + ((test_data2[t-1] - test_predictions[-2]) * b2) + ((test_data2[t] - test_predictions[-1]) * b1)
76 | test_predictions.append(output_test[0])
77 | test_predictions = test_predictions[4:]
78 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_pems_717087_train.csv")
79 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_pems_717087_test.csv")
80 | return train_predictions, test_predictions
81 |
82 |
83 | def arimamodel(train_data, test_data):
84 | arima = ARIMA(train_data, order=(3,1,5))
85 | arima_fit = arima.fit()
86 | print(arima_fit.summary())
87 |
88 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all())
89 | train_predictions2 = []
90 | for t in range(len(train_data)):
91 | output_train = train_predictions[t] + train_data[t]
92 | train_predictions2.append(output_train)
93 |
94 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all())
95 | test_predictions2 = []
96 | test_data2=[]
97 | test_data2.append(train_data[-1])
98 | for i in range(len(test_data)-1):
99 | test_data2.append(test_data[i])
100 | for t in range(len(test_data2)):
101 | output_test = test_predictions[t] + test_data2[t]
102 | test_predictions2.append(output_test)
103 |
104 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_pems_717087_train.csv")
105 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_pems_717087_test.csv")
106 | return train_predictions2, test_predictions2
107 |
108 | def sarimamodel(data):
109 | data2 = pd.DataFrame(data)
110 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(97),data2.shift(96),data2], axis=1)
111 | data3.columns = ["t-673", "t-672", "t-97", "t-96", "t"]
112 | data4 = data3.values
113 | train_size = int(len(data4) * 0.70)
114 | train, test = data4[673:train_size], data4[train_size:]
115 | train_X, train_y = train[:,:4], train[:,-1]
116 | test_X, test_y = test[:,:4], test[:,-1]
117 |
118 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X)
119 | sarima_fit = sarima.fit()
120 | print(sarima_fit.summary())
121 |
122 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X)
123 | train_predictions2 = []
124 | for t in range(len(train_y)):
125 | output_train = train_predictions[t] + train_y[t]
126 | train_predictions2.append(output_train)
127 |
128 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X)
129 | test_predictions2 = []
130 | test_y2=[]
131 | test_y2.append(train_y[-1])
132 | for i in range(len(test_y)-1):
133 | test_y2.append(test_y[i])
134 | for t in range(len(test_y2)):
135 | output_test = test_predictions[t] + test_y2[t]
136 | test_predictions2.append(output_test)
137 |
138 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_pems_717087_train.csv")
139 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_pems_717087_test.csv")
140 | return train_predictions2, test_predictions2
141 |
142 |
143 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']]
144 | data = data.values
145 | train_size = int(len(data) * 0.70)
146 | train_data, test_data = data[:train_size], data[train_size:]
147 | armamodel(train_data, test_data)
148 | armodel(train_data, test_data)
149 | arimamodel(train_data, test_data)
150 | sarimamodel(data)
151 |
--------------------------------------------------------------------------------
/evaluatemodels_istanbul_data_del.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 10,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "\n",
12 | "#Defining MAPE function\n",
13 | "def MAPE(actual_values,predicted_values):\n",
14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n",
17 | " return mape\n",
18 | "\n",
19 | "#Defining MAPE_100 function\n",
20 | "def MAPE_100(actual_values,predicted_values):\n",
21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
24 | " x_100 = x[x[:,0]>100]\n",
25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n",
26 | " return mape\n",
27 | "\n",
28 | "#Defining MAPE_250 function\n",
29 | "def MAPE_250(actual_values,predicted_values):\n",
30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
33 | " x_250 = x[x[:,0]>250]\n",
34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n",
35 | " return mape\n",
36 | "\n",
37 | "#Defining MAE function\n",
38 | "def MAE(actual_values,predicted_values):\n",
39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n",
42 | " return mae"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 11,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "testdata_istanbul_data_del = np.array(pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']][-3500:])\n",
52 | "ar_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/ar_istanbul_data_del_test.csv\")[\"0\"])\n",
53 | "arma_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/arma_istanbul_data_del_test.csv\")[\"0\"])\n",
54 | "arima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/arima_istanbul_data_del_test.csv\")[\"0\"])\n",
55 | "sarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/sarima_istanbul_data_del_test.csv\")[\"0\"])\n",
56 | "slstm_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/slstm_istanbul_data_del_test.csv\")[\"0\"])\n",
57 | "ssvr_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/ssvr_istanbul_data_del_test.csv\")[\"0\"])\n",
58 | "sxgboost_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/sxgboost_istanbul_data_del_test.csv\")[\"0\"])\n",
59 | "slstmarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/slstmarima_istanbul_data_del_test.csv\")[\"0\"])\n",
60 | "ssvrarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/ssvrarima_istanbul_data_del_test.csv\")[\"0\"])\n",
61 | "sxgboostarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_istanbul_data_del_test.csv\")[\"0\"])"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 12,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "name": "stdout",
71 | "output_type": "stream",
72 | "text": [
73 | "28.86974512582298\n",
74 | "29.060338232637356\n",
75 | "31.04630704707781\n",
76 | "27.644744318481546\n",
77 | "26.083904765431708\n",
78 | "22.78030832823932\n",
79 | "24.796676785320347\n",
80 | "26.030395358255294\n",
81 | "22.728086954168397\n",
82 | "24.838684592834568\n"
83 | ]
84 | }
85 | ],
86 | "source": [
87 | "mape_ar_istanbul_data_del = MAPE(testdata_istanbul_data_del, ar_istanbul_data_del)\n",
88 | "mape_arma_istanbul_data_del = MAPE(testdata_istanbul_data_del, arma_istanbul_data_del)\n",
89 | "mape_arima_istanbul_data_del = MAPE(testdata_istanbul_data_del, arima_istanbul_data_del)\n",
90 | "mape_sarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, sarima_istanbul_data_del)\n",
91 | "mape_slstm_istanbul_data_del = MAPE(testdata_istanbul_data_del, slstm_istanbul_data_del)\n",
92 | "mape_ssvr_istanbul_data_del = MAPE(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n",
93 | "mape_sxgboost_istanbul_data_del = MAPE(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n",
94 | "mape_slstmarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n",
95 | "mape_ssvrarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n",
96 | "mape_sxgboostarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n",
97 | "\n",
98 | "print(mape_ar_istanbul_data_del)\n",
99 | "print(mape_arma_istanbul_data_del)\n",
100 | "print(mape_arima_istanbul_data_del)\n",
101 | "print(mape_sarima_istanbul_data_del)\n",
102 | "print(mape_slstm_istanbul_data_del)\n",
103 | "print(mape_ssvr_istanbul_data_del)\n",
104 | "print(mape_sxgboost_istanbul_data_del)\n",
105 | "print(mape_slstmarima_istanbul_data_del)\n",
106 | "print(mape_ssvrarima_istanbul_data_del)\n",
107 | "print(mape_sxgboostarima_istanbul_data_del)"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 13,
113 | "metadata": {},
114 | "outputs": [
115 | {
116 | "name": "stdout",
117 | "output_type": "stream",
118 | "text": [
119 | "23.712783205267975\n",
120 | "24.1882174224178\n",
121 | "22.971047404769376\n",
122 | "20.600738845263685\n",
123 | "16.022873063955497\n",
124 | "16.454937256236214\n",
125 | "16.7834587509336\n",
126 | "16.0409316683468\n",
127 | "16.416078548446443\n",
128 | "16.832564889685074\n"
129 | ]
130 | }
131 | ],
132 | "source": [
133 | "mape_100_ar_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, ar_istanbul_data_del)\n",
134 | "mape_100_arma_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, arma_istanbul_data_del)\n",
135 | "mape_100_arima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, arima_istanbul_data_del)\n",
136 | "mape_100_sarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, sarima_istanbul_data_del)\n",
137 | "mape_100_slstm_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, slstm_istanbul_data_del)\n",
138 | "mape_100_ssvr_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n",
139 | "mape_100_sxgboost_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n",
140 | "mape_100_slstmarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n",
141 | "mape_100_ssvrarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n",
142 | "mape_100_sxgboostarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n",
143 | "\n",
144 | "print(mape_100_ar_istanbul_data_del)\n",
145 | "print(mape_100_arma_istanbul_data_del)\n",
146 | "print(mape_100_arima_istanbul_data_del)\n",
147 | "print(mape_100_sarima_istanbul_data_del)\n",
148 | "print(mape_100_slstm_istanbul_data_del)\n",
149 | "print(mape_100_ssvr_istanbul_data_del)\n",
150 | "print(mape_100_sxgboost_istanbul_data_del)\n",
151 | "print(mape_100_slstmarima_istanbul_data_del)\n",
152 | "print(mape_100_ssvrarima_istanbul_data_del)\n",
153 | "print(mape_100_sxgboostarima_istanbul_data_del)"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 14,
159 | "metadata": {},
160 | "outputs": [
161 | {
162 | "name": "stdout",
163 | "output_type": "stream",
164 | "text": [
165 | "21.841070095474745\n",
166 | "22.600807335632386\n",
167 | "17.584517478461393\n",
168 | "16.529284045080818\n",
169 | "11.65226367899847\n",
170 | "11.99353751711907\n",
171 | "12.014449828425033\n",
172 | "11.615596940182671\n",
173 | "12.060929432100648\n",
174 | "12.000875495862601\n"
175 | ]
176 | }
177 | ],
178 | "source": [
179 | "mape_250_ar_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, ar_istanbul_data_del)\n",
180 | "mape_250_arma_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, arma_istanbul_data_del)\n",
181 | "mape_250_arima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, arima_istanbul_data_del)\n",
182 | "mape_250_sarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, sarima_istanbul_data_del)\n",
183 | "mape_250_slstm_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, slstm_istanbul_data_del)\n",
184 | "mape_250_ssvr_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n",
185 | "mape_250_sxgboost_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n",
186 | "mape_250_slstmarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n",
187 | "mape_250_ssvrarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n",
188 | "mape_250_sxgboostarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n",
189 | "\n",
190 | "print(mape_250_ar_istanbul_data_del)\n",
191 | "print(mape_250_arma_istanbul_data_del)\n",
192 | "print(mape_250_arima_istanbul_data_del)\n",
193 | "print(mape_250_sarima_istanbul_data_del)\n",
194 | "print(mape_250_slstm_istanbul_data_del)\n",
195 | "print(mape_250_ssvr_istanbul_data_del)\n",
196 | "print(mape_250_sxgboost_istanbul_data_del)\n",
197 | "print(mape_250_slstmarima_istanbul_data_del)\n",
198 | "print(mape_250_ssvrarima_istanbul_data_del)\n",
199 | "print(mape_250_sxgboostarima_istanbul_data_del)"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 15,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "45.41822357559246\n",
212 | "46.25644379087454\n",
213 | "42.692464063978946\n",
214 | "38.92137252502022\n",
215 | "31.360570671221716\n",
216 | "30.76783905214454\n",
217 | "31.62889098894286\n",
218 | "31.355986601823076\n",
219 | "30.723100894109738\n",
220 | "31.694861789309655\n"
221 | ]
222 | }
223 | ],
224 | "source": [
225 | "mae_ar_istanbul_data_del = MAE(testdata_istanbul_data_del, ar_istanbul_data_del)\n",
226 | "mae_arma_istanbul_data_del = MAE(testdata_istanbul_data_del, arma_istanbul_data_del)\n",
227 | "mae_arima_istanbul_data_del = MAE(testdata_istanbul_data_del, arima_istanbul_data_del)\n",
228 | "mae_sarima_istanbul_data_del = MAE(testdata_istanbul_data_del, sarima_istanbul_data_del)\n",
229 | "mae_slstm_istanbul_data_del = MAE(testdata_istanbul_data_del, slstm_istanbul_data_del)\n",
230 | "mae_ssvr_istanbul_data_del = MAE(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n",
231 | "mae_sxgboost_istanbul_data_del = MAE(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n",
232 | "mae_slstmarima_istanbul_data_del = MAE(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n",
233 | "mae_ssvrarima_istanbul_data_del = MAE(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n",
234 | "mae_sxgboostarima_istanbul_data_del = MAE(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n",
235 | "\n",
236 | "print(mae_ar_istanbul_data_del)\n",
237 | "print(mae_arma_istanbul_data_del)\n",
238 | "print(mae_arima_istanbul_data_del)\n",
239 | "print(mae_sarima_istanbul_data_del)\n",
240 | "print(mae_slstm_istanbul_data_del)\n",
241 | "print(mae_ssvr_istanbul_data_del)\n",
242 | "print(mae_sxgboost_istanbul_data_del)\n",
243 | "print(mae_slstmarima_istanbul_data_del)\n",
244 | "print(mae_ssvrarima_istanbul_data_del)\n",
245 | "print(mae_sxgboostarima_istanbul_data_del)"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": null,
251 | "metadata": {},
252 | "outputs": [],
253 | "source": []
254 | }
255 | ],
256 | "metadata": {
257 | "interpreter": {
258 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
259 | },
260 | "kernelspec": {
261 | "display_name": "Python 3.7.9",
262 | "language": "python",
263 | "name": "python3"
264 | },
265 | "language_info": {
266 | "codemirror_mode": {
267 | "name": "ipython",
268 | "version": 3
269 | },
270 | "file_extension": ".py",
271 | "mimetype": "text/x-python",
272 | "name": "python",
273 | "nbconvert_exporter": "python",
274 | "pygments_lexer": "ipython3",
275 | "version": "3.7.9"
276 | },
277 | "orig_nbformat": 4
278 | },
279 | "nbformat": 4,
280 | "nbformat_minor": 2
281 | }
282 |
--------------------------------------------------------------------------------
/evaluatemodels_istanbul_data_mean_sdsh.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "\n",
12 | "#Defining MAPE function\n",
13 | "def MAPE(actual_values,predicted_values):\n",
14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n",
17 | " return mape\n",
18 | "\n",
19 | "#Defining MAPE_100 function\n",
20 | "def MAPE_100(actual_values,predicted_values):\n",
21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
24 | " x_100 = x[x[:,0]>100]\n",
25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n",
26 | " return mape\n",
27 | "\n",
28 | "#Defining MAPE_250 function\n",
29 | "def MAPE_250(actual_values,predicted_values):\n",
30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
33 | " x_250 = x[x[:,0]>250]\n",
34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n",
35 | " return mape\n",
36 | "\n",
37 | "#Defining MAE function\n",
38 | "def MAE(actual_values,predicted_values):\n",
39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n",
42 | " return mae"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 2,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "testdata_istanbul_data_mean_sdsh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']][-3500:])\n",
52 | "ar_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/ar_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
53 | "arma_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/arma_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
54 | "arima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/arima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
55 | "sarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/sarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
56 | "slstm_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/slstm_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
57 | "ssvr_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/ssvr_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
58 | "sxgboost_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/sxgboost_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
59 | "slstmarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/slstmarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
60 | "ssvrarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/ssvrarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n",
61 | "sxgboostarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 3,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "name": "stdout",
71 | "output_type": "stream",
72 | "text": [
73 | "32.28522825674658\n",
74 | "29.421164709879783\n",
75 | "31.072668155397913\n",
76 | "23.330365809285443\n",
77 | "17.936230435083743\n",
78 | "16.750979864766805\n",
79 | "18.13295639807828\n",
80 | "17.907516554919198\n",
81 | "16.752112486882776\n",
82 | "18.138081388324558\n"
83 | ]
84 | }
85 | ],
86 | "source": [
87 | "mape_ar_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n",
88 | "mape_arma_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n",
89 | "mape_arima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n",
90 | "mape_sarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n",
91 | "mape_slstm_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n",
92 | "mape_ssvr_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n",
93 | "mape_sxgboost_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n",
94 | "mape_slstmarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n",
95 | "mape_ssvrarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n",
96 | "mape_sxgboostarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n",
97 | "\n",
98 | "print(mape_ar_istanbul_data_mean_sdsh)\n",
99 | "print(mape_arma_istanbul_data_mean_sdsh)\n",
100 | "print(mape_arima_istanbul_data_mean_sdsh)\n",
101 | "print(mape_sarima_istanbul_data_mean_sdsh)\n",
102 | "print(mape_slstm_istanbul_data_mean_sdsh)\n",
103 | "print(mape_ssvr_istanbul_data_mean_sdsh)\n",
104 | "print(mape_sxgboost_istanbul_data_mean_sdsh)\n",
105 | "print(mape_slstmarima_istanbul_data_mean_sdsh)\n",
106 | "print(mape_ssvrarima_istanbul_data_mean_sdsh)\n",
107 | "print(mape_sxgboostarima_istanbul_data_mean_sdsh)"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 4,
113 | "metadata": {},
114 | "outputs": [
115 | {
116 | "name": "stdout",
117 | "output_type": "stream",
118 | "text": [
119 | "27.492503060389602\n",
120 | "24.628539402099292\n",
121 | "23.077257114198847\n",
122 | "16.90765610289425\n",
123 | "12.364571240132793\n",
124 | "11.773044608936988\n",
125 | "12.609899198448616\n",
126 | "12.382148093388146\n",
127 | "11.776840396405351\n",
128 | "12.616524462824188\n"
129 | ]
130 | }
131 | ],
132 | "source": [
133 | "mape_100_ar_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n",
134 | "mape_100_arma_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n",
135 | "mape_100_arima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n",
136 | "mape_100_sarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n",
137 | "mape_100_slstm_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n",
138 | "mape_100_ssvr_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n",
139 | "mape_100_sxgboost_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n",
140 | "mape_100_slstmarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n",
141 | "mape_100_ssvrarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n",
142 | "mape_100_sxgboostarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n",
143 | "\n",
144 | "print(mape_100_ar_istanbul_data_mean_sdsh)\n",
145 | "print(mape_100_arma_istanbul_data_mean_sdsh)\n",
146 | "print(mape_100_arima_istanbul_data_mean_sdsh)\n",
147 | "print(mape_100_sarima_istanbul_data_mean_sdsh)\n",
148 | "print(mape_100_slstm_istanbul_data_mean_sdsh)\n",
149 | "print(mape_100_ssvr_istanbul_data_mean_sdsh)\n",
150 | "print(mape_100_sxgboost_istanbul_data_mean_sdsh)\n",
151 | "print(mape_100_slstmarima_istanbul_data_mean_sdsh)\n",
152 | "print(mape_100_ssvrarima_istanbul_data_mean_sdsh)\n",
153 | "print(mape_100_sxgboostarima_istanbul_data_mean_sdsh)"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 5,
159 | "metadata": {},
160 | "outputs": [
161 | {
162 | "name": "stdout",
163 | "output_type": "stream",
164 | "text": [
165 | "26.359933440174725\n",
166 | "23.55588242842907\n",
167 | "17.101445941329548\n",
168 | "13.139766020671567\n",
169 | "9.528538117688552\n",
170 | "9.110524556053173\n",
171 | "9.925149527667402\n",
172 | "9.515685702854562\n",
173 | "9.104299224136252\n",
174 | "9.920039238024659\n"
175 | ]
176 | }
177 | ],
178 | "source": [
179 | "mape_250_ar_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n",
180 | "mape_250_arma_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n",
181 | "mape_250_arima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n",
182 | "mape_250_sarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n",
183 | "mape_250_slstm_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n",
184 | "mape_250_ssvr_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n",
185 | "mape_250_sxgboost_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n",
186 | "mape_250_slstmarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n",
187 | "mape_250_ssvrarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n",
188 | "mape_250_sxgboostarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n",
189 | "\n",
190 | "print(mape_250_ar_istanbul_data_mean_sdsh)\n",
191 | "print(mape_250_arma_istanbul_data_mean_sdsh)\n",
192 | "print(mape_250_arima_istanbul_data_mean_sdsh)\n",
193 | "print(mape_250_sarima_istanbul_data_mean_sdsh)\n",
194 | "print(mape_250_slstm_istanbul_data_mean_sdsh)\n",
195 | "print(mape_250_ssvr_istanbul_data_mean_sdsh)\n",
196 | "print(mape_250_sxgboost_istanbul_data_mean_sdsh)\n",
197 | "print(mape_250_slstmarima_istanbul_data_mean_sdsh)\n",
198 | "print(mape_250_ssvrarima_istanbul_data_mean_sdsh)\n",
199 | "print(mape_250_sxgboostarima_istanbul_data_mean_sdsh)"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 6,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "52.29404862398487\n",
212 | "47.1007412386391\n",
213 | "42.39015327603023\n",
214 | "31.579195499410115\n",
215 | "23.470105247747735\n",
216 | "22.230927819831624\n",
217 | "23.976517753869476\n",
218 | "23.475449282581536\n",
219 | "22.233469339131968\n",
220 | "23.98729810076995\n"
221 | ]
222 | }
223 | ],
224 | "source": [
225 | "mae_ar_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n",
226 | "mae_arma_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n",
227 | "mae_arima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n",
228 | "mae_sarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n",
229 | "mae_slstm_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n",
230 | "mae_ssvr_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n",
231 | "mae_sxgboost_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n",
232 | "mae_slstmarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n",
233 | "mae_ssvrarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n",
234 | "mae_sxgboostarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n",
235 | "\n",
236 | "print(mae_ar_istanbul_data_mean_sdsh)\n",
237 | "print(mae_arma_istanbul_data_mean_sdsh)\n",
238 | "print(mae_arima_istanbul_data_mean_sdsh)\n",
239 | "print(mae_sarima_istanbul_data_mean_sdsh)\n",
240 | "print(mae_slstm_istanbul_data_mean_sdsh)\n",
241 | "print(mae_ssvr_istanbul_data_mean_sdsh)\n",
242 | "print(mae_sxgboost_istanbul_data_mean_sdsh)\n",
243 | "print(mae_slstmarima_istanbul_data_mean_sdsh)\n",
244 | "print(mae_ssvrarima_istanbul_data_mean_sdsh)\n",
245 | "print(mae_sxgboostarima_istanbul_data_mean_sdsh)"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": null,
251 | "metadata": {},
252 | "outputs": [],
253 | "source": []
254 | }
255 | ],
256 | "metadata": {
257 | "interpreter": {
258 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
259 | },
260 | "kernelspec": {
261 | "display_name": "Python 3.7.9",
262 | "language": "python",
263 | "name": "python3"
264 | },
265 | "language_info": {
266 | "codemirror_mode": {
267 | "name": "ipython",
268 | "version": 3
269 | },
270 | "file_extension": ".py",
271 | "mimetype": "text/x-python",
272 | "name": "python",
273 | "nbconvert_exporter": "python",
274 | "pygments_lexer": "ipython3",
275 | "version": "3.7.9"
276 | },
277 | "orig_nbformat": 4
278 | },
279 | "nbformat": 4,
280 | "nbformat_minor": 2
281 | }
282 |
--------------------------------------------------------------------------------
/evaluatemodels_istanbul_data_mean_sh.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "\n",
12 | "#Defining MAPE function\n",
13 | "def MAPE(actual_values,predicted_values):\n",
14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n",
17 | " return mape\n",
18 | "\n",
19 | "#Defining MAPE_100 function\n",
20 | "def MAPE_100(actual_values,predicted_values):\n",
21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
24 | " x_100 = x[x[:,0]>100]\n",
25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n",
26 | " return mape\n",
27 | "\n",
28 | "#Defining MAPE_250 function\n",
29 | "def MAPE_250(actual_values,predicted_values):\n",
30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
33 | " x_250 = x[x[:,0]>250]\n",
34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n",
35 | " return mape\n",
36 | "\n",
37 | "#Defining MAE function\n",
38 | "def MAE(actual_values,predicted_values):\n",
39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n",
42 | " return mae"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 2,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "testdata_istanbul_data_mean_sh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']][-3500:])\n",
52 | "ar_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/ar_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
53 | "arma_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/arma_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
54 | "arima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/arima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
55 | "sarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/sarima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
56 | "slstm_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/slstm_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
57 | "ssvr_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/ssvr_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
58 | "sxgboost_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/sxgboost_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
59 | "slstmarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/slstmarima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
60 | "ssvrarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/ssvrarima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n",
61 | "sxgboostarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_istanbul_data_mean_sh_test.csv\")[\"0\"])"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 3,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "name": "stdout",
71 | "output_type": "stream",
72 | "text": [
73 | "31.639771004700513\n",
74 | "28.572032803815915\n",
75 | "29.968686582552234\n",
76 | "21.774666788498752\n",
77 | "17.65213224379523\n",
78 | "17.155343621539714\n",
79 | "18.51083359724517\n",
80 | "17.670485960850637\n",
81 | "17.16216537051296\n",
82 | "18.51645262368806\n"
83 | ]
84 | }
85 | ],
86 | "source": [
87 | "mape_ar_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n",
88 | "mape_arma_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n",
89 | "mape_arima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n",
90 | "mape_sarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n",
91 | "mape_slstm_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n",
92 | "mape_ssvr_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n",
93 | "mape_sxgboost_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n",
94 | "mape_slstmarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n",
95 | "mape_ssvrarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n",
96 | "mape_sxgboostarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n",
97 | "\n",
98 | "print(mape_ar_istanbul_data_mean_sh)\n",
99 | "print(mape_arma_istanbul_data_mean_sh)\n",
100 | "print(mape_arima_istanbul_data_mean_sh)\n",
101 | "print(mape_sarima_istanbul_data_mean_sh)\n",
102 | "print(mape_slstm_istanbul_data_mean_sh)\n",
103 | "print(mape_ssvr_istanbul_data_mean_sh)\n",
104 | "print(mape_sxgboost_istanbul_data_mean_sh)\n",
105 | "print(mape_slstmarima_istanbul_data_mean_sh)\n",
106 | "print(mape_ssvrarima_istanbul_data_mean_sh)\n",
107 | "print(mape_sxgboostarima_istanbul_data_mean_sh)"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 4,
113 | "metadata": {},
114 | "outputs": [
115 | {
116 | "name": "stdout",
117 | "output_type": "stream",
118 | "text": [
119 | "26.65815858456\n",
120 | "23.500346531481295\n",
121 | "21.68985263196856\n",
122 | "15.153660727371493\n",
123 | "12.373201752239357\n",
124 | "12.024076054376485\n",
125 | "12.980894248875982\n",
126 | "12.391141607796829\n",
127 | "12.036228092861402\n",
128 | "12.99737582199011\n"
129 | ]
130 | }
131 | ],
132 | "source": [
133 | "mape_100_ar_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n",
134 | "mape_100_arma_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n",
135 | "mape_100_arima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n",
136 | "mape_100_sarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n",
137 | "mape_100_slstm_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n",
138 | "mape_100_ssvr_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n",
139 | "mape_100_sxgboost_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n",
140 | "mape_100_slstmarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n",
141 | "mape_100_ssvrarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n",
142 | "mape_100_sxgboostarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n",
143 | "\n",
144 | "print(mape_100_ar_istanbul_data_mean_sh)\n",
145 | "print(mape_100_arma_istanbul_data_mean_sh)\n",
146 | "print(mape_100_arima_istanbul_data_mean_sh)\n",
147 | "print(mape_100_sarima_istanbul_data_mean_sh)\n",
148 | "print(mape_100_slstm_istanbul_data_mean_sh)\n",
149 | "print(mape_100_ssvr_istanbul_data_mean_sh)\n",
150 | "print(mape_100_sxgboost_istanbul_data_mean_sh)\n",
151 | "print(mape_100_slstmarima_istanbul_data_mean_sh)\n",
152 | "print(mape_100_ssvrarima_istanbul_data_mean_sh)\n",
153 | "print(mape_100_sxgboostarima_istanbul_data_mean_sh)"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 5,
159 | "metadata": {},
160 | "outputs": [
161 | {
162 | "name": "stdout",
163 | "output_type": "stream",
164 | "text": [
165 | "25.311646310380866\n",
166 | "22.605266190396456\n",
167 | "16.743081708139183\n",
168 | "12.363706488522883\n",
169 | "10.107239766679745\n",
170 | "9.710289104268918\n",
171 | "10.599377377177987\n",
172 | "10.107467632206035\n",
173 | "9.705971697418457\n",
174 | "10.613657190766002\n"
175 | ]
176 | }
177 | ],
178 | "source": [
179 | "mape_250_ar_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n",
180 | "mape_250_arma_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n",
181 | "mape_250_arima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n",
182 | "mape_250_sarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n",
183 | "mape_250_slstm_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n",
184 | "mape_250_ssvr_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n",
185 | "mape_250_sxgboost_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n",
186 | "mape_250_slstmarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n",
187 | "mape_250_ssvrarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n",
188 | "mape_250_sxgboostarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n",
189 | "\n",
190 | "print(mape_250_ar_istanbul_data_mean_sh)\n",
191 | "print(mape_250_arma_istanbul_data_mean_sh)\n",
192 | "print(mape_250_arima_istanbul_data_mean_sh)\n",
193 | "print(mape_250_sarima_istanbul_data_mean_sh)\n",
194 | "print(mape_250_slstm_istanbul_data_mean_sh)\n",
195 | "print(mape_250_ssvr_istanbul_data_mean_sh)\n",
196 | "print(mape_250_sxgboost_istanbul_data_mean_sh)\n",
197 | "print(mape_250_slstmarima_istanbul_data_mean_sh)\n",
198 | "print(mape_250_ssvrarima_istanbul_data_mean_sh)\n",
199 | "print(mape_250_sxgboostarima_istanbul_data_mean_sh)"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 6,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "50.59338322306562\n",
212 | "45.1325882067358\n",
213 | "40.46508991283195\n",
214 | "29.04744789522173\n",
215 | "23.601114571248978\n",
216 | "22.836079943066032\n",
217 | "24.71005580608951\n",
218 | "23.643619065937603\n",
219 | "22.850270792773518\n",
220 | "24.732252707828827\n"
221 | ]
222 | }
223 | ],
224 | "source": [
225 | "mae_ar_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n",
226 | "mae_arma_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n",
227 | "mae_arima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n",
228 | "mae_sarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n",
229 | "mae_slstm_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n",
230 | "mae_ssvr_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n",
231 | "mae_sxgboost_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n",
232 | "mae_slstmarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n",
233 | "mae_ssvrarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n",
234 | "mae_sxgboostarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n",
235 | "\n",
236 | "print(mae_ar_istanbul_data_mean_sh)\n",
237 | "print(mae_arma_istanbul_data_mean_sh)\n",
238 | "print(mae_arima_istanbul_data_mean_sh)\n",
239 | "print(mae_sarima_istanbul_data_mean_sh)\n",
240 | "print(mae_slstm_istanbul_data_mean_sh)\n",
241 | "print(mae_ssvr_istanbul_data_mean_sh)\n",
242 | "print(mae_sxgboost_istanbul_data_mean_sh)\n",
243 | "print(mae_slstmarima_istanbul_data_mean_sh)\n",
244 | "print(mae_ssvrarima_istanbul_data_mean_sh)\n",
245 | "print(mae_sxgboostarima_istanbul_data_mean_sh)"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": null,
251 | "metadata": {},
252 | "outputs": [],
253 | "source": []
254 | }
255 | ],
256 | "metadata": {
257 | "interpreter": {
258 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
259 | },
260 | "kernelspec": {
261 | "display_name": "Python 3.7.9",
262 | "language": "python",
263 | "name": "python3"
264 | },
265 | "language_info": {
266 | "codemirror_mode": {
267 | "name": "ipython",
268 | "version": 3
269 | },
270 | "file_extension": ".py",
271 | "mimetype": "text/x-python",
272 | "name": "python",
273 | "nbconvert_exporter": "python",
274 | "pygments_lexer": "ipython3",
275 | "version": "3.7.9"
276 | },
277 | "orig_nbformat": 4
278 | },
279 | "nbformat": 4,
280 | "nbformat_minor": 2
281 | }
282 |
--------------------------------------------------------------------------------
/evaluatemodels_pems_716933.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 27,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "\n",
12 | "#Defining MAPE function\n",
13 | "def MAPE(actual_values,predicted_values):\n",
14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n",
17 | " return mape\n",
18 | "\n",
19 | "#Defining MAPE_100 function\n",
20 | "def MAPE_100(actual_values,predicted_values):\n",
21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
24 | " x_100 = x[x[:,0]>100]\n",
25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n",
26 | " return mape\n",
27 | "\n",
28 | "#Defining MAPE_250 function\n",
29 | "def MAPE_250(actual_values,predicted_values):\n",
30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
33 | " x_250 = x[x[:,0]>250]\n",
34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n",
35 | " return mape\n",
36 | "\n",
37 | "#Defining MAE function\n",
38 | "def MAE(actual_values,predicted_values):\n",
39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n",
42 | " return mae"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 28,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "testdata_716933 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']][-7863:])\n",
52 | "ar_716933 = np.array(pd.read_csv(\"point_forecasts/ar_pems_716933_test.csv\")[\"0\"])\n",
53 | "arma_716933 = np.array(pd.read_csv(\"point_forecasts/arma_pems_716933_test.csv\")[\"0\"])\n",
54 | "arima_716933 = np.array(pd.read_csv(\"point_forecasts/arima_pems_716933_test.csv\")[\"0\"])\n",
55 | "sarima_716933 = np.array(pd.read_csv(\"point_forecasts/sarima_pems_716933_test.csv\")[\"0\"])\n",
56 | "slstm_716933 = np.array(pd.read_csv(\"point_forecasts/slstm_pems_716933_test.csv\")[\"0\"])\n",
57 | "ssvr_716933 = np.array(pd.read_csv(\"point_forecasts/ssvr_pems_716933_test.csv\")[\"0\"])\n",
58 | "sxgboost_716933 = np.array(pd.read_csv(\"point_forecasts/sxgboost_pems_716933_test.csv\")[\"0\"])\n",
59 | "slstmarima_716933 = np.array(pd.read_csv(\"point_forecasts/slstmarima_pems_716933_test.csv\")[\"0\"])\n",
60 | "ssvrarima_716933 = np.array(pd.read_csv(\"point_forecasts/ssvrarima_pems_716933_test.csv\")[\"0\"])\n",
61 | "sxgboostarima_716933 = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_pems_716933_test.csv\")[\"0\"])"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 29,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "mape_ar_716933 = MAPE(testdata_716933, ar_716933)\n",
71 | "mape_arma_716933 = MAPE(testdata_716933, arma_716933)\n",
72 | "mape_arima_716933 = MAPE(testdata_716933, arima_716933)\n",
73 | "mape_sarima_716933 = MAPE(testdata_716933, sarima_716933)\n",
74 | "mape_slstm_716933 = MAPE(testdata_716933, slstm_716933)\n",
75 | "mape_ssvr_716933 = MAPE(testdata_716933, ssvr_716933)\n",
76 | "mape_sxgboost_716933 = MAPE(testdata_716933, sxgboost_716933)\n",
77 | "mape_slstmarima_716933 = MAPE(testdata_716933, slstmarima_716933)\n",
78 | "mape_ssvrarima_716933 = MAPE(testdata_716933, ssvrarima_716933)\n",
79 | "mape_sxgboostarima_716933 = MAPE(testdata_716933, sxgboostarima_716933)\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 30,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "name": "stdout",
89 | "output_type": "stream",
90 | "text": [
91 | "8.047163203140883\n",
92 | "7.74001675364598\n",
93 | "7.3660278247569115\n",
94 | "6.648892868326587\n",
95 | "6.253069644402445\n",
96 | "6.000669052475197\n",
97 | "6.099251378263277\n",
98 | "6.253429810578235\n",
99 | "6.000702024746338\n",
100 | "6.099321286450661\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "print(mape_ar_716933)\n",
106 | "print(mape_arma_716933)\n",
107 | "print(mape_arima_716933)\n",
108 | "print(mape_sarima_716933)\n",
109 | "print(mape_slstm_716933 )\n",
110 | "print(mape_ssvr_716933)\n",
111 | "print(mape_sxgboost_716933)\n",
112 | "print(mape_slstmarima_716933)\n",
113 | "print(mape_ssvrarima_716933)\n",
114 | "print(mape_sxgboostarima_716933)"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 31,
120 | "metadata": {},
121 | "outputs": [
122 | {
123 | "name": "stdout",
124 | "output_type": "stream",
125 | "text": [
126 | "8.047163203140883\n",
127 | "7.74001675364598\n",
128 | "7.3660278247569115\n",
129 | "6.648892868326587\n",
130 | "6.253069644402445\n",
131 | "6.000669052475197\n",
132 | "6.099251378263277\n",
133 | "6.253429810578235\n",
134 | "6.000702024746338\n",
135 | "6.099321286450661\n"
136 | ]
137 | }
138 | ],
139 | "source": [
140 | "mape_100_ar_716933 = MAPE_100(testdata_716933, ar_716933)\n",
141 | "mape_100_arma_716933 = MAPE_100(testdata_716933, arma_716933)\n",
142 | "mape_100_arima_716933 = MAPE_100(testdata_716933, arima_716933)\n",
143 | "mape_100_sarima_716933 = MAPE_100(testdata_716933, sarima_716933)\n",
144 | "mape_100_slstm_716933 = MAPE_100(testdata_716933, slstm_716933)\n",
145 | "mape_100_ssvr_716933 = MAPE_100(testdata_716933, ssvr_716933)\n",
146 | "mape_100_sxgboost_716933 = MAPE_100(testdata_716933, sxgboost_716933)\n",
147 | "mape_100_slstmarima_716933 = MAPE_100(testdata_716933, slstmarima_716933)\n",
148 | "mape_100_ssvrarima_716933 = MAPE_100(testdata_716933, ssvrarima_716933)\n",
149 | "mape_100_sxgboostarima_716933 = MAPE_100(testdata_716933, sxgboostarima_716933)\n",
150 | "print(mape_100_ar_716933)\n",
151 | "print(mape_100_arma_716933)\n",
152 | "print(mape_100_arima_716933)\n",
153 | "print(mape_100_sarima_716933)\n",
154 | "print(mape_100_slstm_716933 )\n",
155 | "print(mape_100_ssvr_716933)\n",
156 | "print(mape_100_sxgboost_716933)\n",
157 | "print(mape_100_slstmarima_716933)\n",
158 | "print(mape_100_ssvrarima_716933)\n",
159 | "print(mape_100_sxgboostarima_716933)\n"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 32,
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "name": "stdout",
169 | "output_type": "stream",
170 | "text": [
171 | "7.69157771661967\n",
172 | "7.333505018200419\n",
173 | "6.816440535576364\n",
174 | "6.248341274074011\n",
175 | "5.619165660371625\n",
176 | "5.469034648723104\n",
177 | "5.559938973342971\n",
178 | "5.619172036305884\n",
179 | "5.469042891030165\n",
180 | "5.559936891963197\n"
181 | ]
182 | }
183 | ],
184 | "source": [
185 | "mape_250_ar_716933 = MAPE_250(testdata_716933, ar_716933)\n",
186 | "mape_250_arma_716933 = MAPE_250(testdata_716933, arma_716933)\n",
187 | "mape_250_arima_716933 = MAPE_250(testdata_716933, arima_716933)\n",
188 | "mape_250_sarima_716933 = MAPE_250(testdata_716933, sarima_716933)\n",
189 | "mape_250_slstm_716933 = MAPE_250(testdata_716933, slstm_716933)\n",
190 | "mape_250_ssvr_716933 = MAPE_250(testdata_716933, ssvr_716933)\n",
191 | "mape_250_sxgboost_716933 = MAPE_250(testdata_716933, sxgboost_716933)\n",
192 | "mape_250_slstmarima_716933 = MAPE_250(testdata_716933, slstmarima_716933)\n",
193 | "mape_250_ssvrarima_716933 = MAPE_250(testdata_716933, ssvrarima_716933)\n",
194 | "mape_250_sxgboostarima_716933 = MAPE_250(testdata_716933, sxgboostarima_716933)\n",
195 | "print(mape_250_ar_716933)\n",
196 | "print(mape_250_arma_716933)\n",
197 | "print(mape_250_arima_716933)\n",
198 | "print(mape_250_sarima_716933)\n",
199 | "print(mape_250_slstm_716933 )\n",
200 | "print(mape_250_ssvr_716933)\n",
201 | "print(mape_250_sxgboost_716933)\n",
202 | "print(mape_250_slstmarima_716933)\n",
203 | "print(mape_250_ssvrarima_716933)\n",
204 | "print(mape_250_sxgboostarima_716933)\n"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 33,
210 | "metadata": {},
211 | "outputs": [
212 | {
213 | "name": "stdout",
214 | "output_type": "stream",
215 | "text": [
216 | "36.61291387491914\n",
217 | "34.706397687562195\n",
218 | "31.70197713784382\n",
219 | "29.394136701895967\n",
220 | "26.841926865064224\n",
221 | "26.015406145713285\n",
222 | "26.435141600449363\n",
223 | "26.842688512959494\n",
224 | "26.015615351192313\n",
225 | "26.43538085374002\n"
226 | ]
227 | }
228 | ],
229 | "source": [
230 | "mae_ar_716933 = MAE(testdata_716933, ar_716933)\n",
231 | "mae_arma_716933 = MAE(testdata_716933, arma_716933)\n",
232 | "mae_arima_716933 = MAE(testdata_716933, arima_716933)\n",
233 | "mae_sarima_716933 = MAE(testdata_716933, sarima_716933)\n",
234 | "mae_slstm_716933 = MAE(testdata_716933, slstm_716933)\n",
235 | "mae_ssvr_716933 = MAE(testdata_716933, ssvr_716933)\n",
236 | "mae_sxgboost_716933 = MAE(testdata_716933, sxgboost_716933)\n",
237 | "mae_slstmarima_716933 = MAE(testdata_716933, slstmarima_716933)\n",
238 | "mae_ssvrarima_716933 = MAE(testdata_716933, ssvrarima_716933)\n",
239 | "mae_sxgboostarima_716933 = MAE(testdata_716933, sxgboostarima_716933)\n",
240 | "print(mae_ar_716933)\n",
241 | "print(mae_arma_716933)\n",
242 | "print(mae_arima_716933)\n",
243 | "print(mae_sarima_716933)\n",
244 | "print(mae_slstm_716933 )\n",
245 | "print(mae_ssvr_716933)\n",
246 | "print(mae_sxgboost_716933)\n",
247 | "print(mae_slstmarima_716933)\n",
248 | "print(mae_ssvrarima_716933)\n",
249 | "print(mae_sxgboostarima_716933)"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {},
256 | "outputs": [],
257 | "source": []
258 | }
259 | ],
260 | "metadata": {
261 | "interpreter": {
262 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
263 | },
264 | "kernelspec": {
265 | "display_name": "Python 3.7.9",
266 | "language": "python",
267 | "name": "python3"
268 | },
269 | "language_info": {
270 | "codemirror_mode": {
271 | "name": "ipython",
272 | "version": 3
273 | },
274 | "file_extension": ".py",
275 | "mimetype": "text/x-python",
276 | "name": "python",
277 | "nbconvert_exporter": "python",
278 | "pygments_lexer": "ipython3",
279 | "version": "3.7.9"
280 | },
281 | "orig_nbformat": 4
282 | },
283 | "nbformat": 4,
284 | "nbformat_minor": 2
285 | }
286 |
--------------------------------------------------------------------------------
/evaluatemodels_pems_717087.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 32,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "\n",
12 | "#Defining MAPE function\n",
13 | "def MAPE(actual_values,predicted_values):\n",
14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n",
17 | " return mape\n",
18 | "\n",
19 | "#Defining MAPE_100 function\n",
20 | "def MAPE_100(actual_values,predicted_values):\n",
21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
24 | " x_100 = x[x[:,0]>100]\n",
25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n",
26 | " return mape\n",
27 | "\n",
28 | "#Defining MAPE_250 function\n",
29 | "def MAPE_250(actual_values,predicted_values):\n",
30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n",
33 | " x_250 = x[x[:,0]>250]\n",
34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n",
35 | " return mape\n",
36 | "\n",
37 | "#Defining MAE function\n",
38 | "def MAE(actual_values,predicted_values):\n",
39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n",
40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n",
41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n",
42 | " return mae"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 33,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "testdata_717087 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']][-7863:])\n",
52 | "ar_717087 = np.array(pd.read_csv(\"point_forecasts/ar_pems_717087_test.csv\")[\"0\"])\n",
53 | "arma_717087 = np.array(pd.read_csv(\"point_forecasts/arma_pems_717087_test.csv\")[\"0\"])\n",
54 | "arima_717087 = np.array(pd.read_csv(\"point_forecasts/arima_pems_717087_test.csv\")[\"0\"])\n",
55 | "sarima_717087 = np.array(pd.read_csv(\"point_forecasts/sarima_pems_717087_test.csv\")[\"0\"])\n",
56 | "slstm_717087 = np.array(pd.read_csv(\"point_forecasts/slstm_pems_717087_test.csv\")[\"0\"])\n",
57 | "ssvr_717087 = np.array(pd.read_csv(\"point_forecasts/ssvr_pems_717087_test.csv\")[\"0\"])\n",
58 | "sxgboost_717087 = np.array(pd.read_csv(\"point_forecasts/sxgboost_pems_717087_test.csv\")[\"0\"])\n",
59 | "slstmarima_717087 = np.array(pd.read_csv(\"point_forecasts/slstmarima_pems_717087_test.csv\")[\"0\"])\n",
60 | "ssvrarima_717087 = np.array(pd.read_csv(\"point_forecasts/ssvrarima_pems_717087_test.csv\")[\"0\"])\n",
61 | "sxgboostarima_717087 = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_pems_717087_test.csv\")[\"0\"])"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 34,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "mape_ar_717087 = MAPE(testdata_717087, ar_717087)\n",
71 | "mape_arma_717087 = MAPE(testdata_717087, arma_717087)\n",
72 | "mape_arima_717087 = MAPE(testdata_717087, arima_717087)\n",
73 | "mape_sarima_717087 = MAPE(testdata_717087, sarima_717087)\n",
74 | "mape_slstm_717087 = MAPE(testdata_717087, slstm_717087)\n",
75 | "mape_ssvr_717087 = MAPE(testdata_717087, ssvr_717087)\n",
76 | "mape_sxgboost_717087 = MAPE(testdata_717087, sxgboost_717087)\n",
77 | "mape_slstmarima_717087 = MAPE(testdata_717087, slstmarima_717087)\n",
78 | "mape_ssvrarima_717087 = MAPE(testdata_717087, ssvrarima_717087)\n",
79 | "mape_sxgboostarima_717087 = MAPE(testdata_717087, sxgboostarima_717087)\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 35,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "name": "stdout",
89 | "output_type": "stream",
90 | "text": [
91 | "8.11551399417616\n",
92 | "9.038353131930679\n",
93 | "8.22873553313542\n",
94 | "7.3760212002738506\n",
95 | "7.580976751711058\n",
96 | "7.2004394447703\n",
97 | "6.978562813843733\n",
98 | "7.5813706923892905\n",
99 | "7.200232182800511\n",
100 | "6.979441088429994\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "print(mape_ar_717087)\n",
106 | "print(mape_arma_717087)\n",
107 | "print(mape_arima_717087)\n",
108 | "print(mape_sarima_717087)\n",
109 | "print(mape_slstm_717087 )\n",
110 | "print(mape_ssvr_717087)\n",
111 | "print(mape_sxgboost_717087)\n",
112 | "print(mape_slstmarima_717087)\n",
113 | "print(mape_ssvrarima_717087)\n",
114 | "print(mape_sxgboostarima_717087)"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 36,
120 | "metadata": {},
121 | "outputs": [
122 | {
123 | "name": "stdout",
124 | "output_type": "stream",
125 | "text": [
126 | "7.391720458354506\n",
127 | "8.06109426125678\n",
128 | "7.314031730896983\n",
129 | "6.567314554629439\n",
130 | "6.1178471735164255\n",
131 | "5.774518050729633\n",
132 | "5.8093474466635975\n",
133 | "6.118317316487666\n",
134 | "5.774260536340519\n",
135 | "5.809826020231966\n"
136 | ]
137 | }
138 | ],
139 | "source": [
140 | "mape_100_ar_717087 = MAPE_100(testdata_717087, ar_717087)\n",
141 | "mape_100_arma_717087 = MAPE_100(testdata_717087, arma_717087)\n",
142 | "mape_100_arima_717087 = MAPE_100(testdata_717087, arima_717087)\n",
143 | "mape_100_sarima_717087 = MAPE_100(testdata_717087, sarima_717087)\n",
144 | "mape_100_slstm_717087 = MAPE_100(testdata_717087, slstm_717087)\n",
145 | "mape_100_ssvr_717087 = MAPE_100(testdata_717087, ssvr_717087)\n",
146 | "mape_100_sxgboost_717087 = MAPE_100(testdata_717087, sxgboost_717087)\n",
147 | "mape_100_slstmarima_717087 = MAPE_100(testdata_717087, slstmarima_717087)\n",
148 | "mape_100_ssvrarima_717087 = MAPE_100(testdata_717087, ssvrarima_717087)\n",
149 | "mape_100_sxgboostarima_717087 = MAPE_100(testdata_717087, sxgboostarima_717087)\n",
150 | "print(mape_100_ar_717087)\n",
151 | "print(mape_100_arma_717087)\n",
152 | "print(mape_100_arima_717087)\n",
153 | "print(mape_100_sarima_717087)\n",
154 | "print(mape_100_slstm_717087 )\n",
155 | "print(mape_100_ssvr_717087)\n",
156 | "print(mape_100_sxgboost_717087)\n",
157 | "print(mape_100_slstmarima_717087)\n",
158 | "print(mape_100_ssvrarima_717087)\n",
159 | "print(mape_100_sxgboostarima_717087)\n"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 37,
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "name": "stdout",
169 | "output_type": "stream",
170 | "text": [
171 | "6.267276452561113\n",
172 | "6.899966861590215\n",
173 | "5.873963240467723\n",
174 | "5.781455762366732\n",
175 | "4.977597259106809\n",
176 | "4.768709853556574\n",
177 | "4.850612193880713\n",
178 | "4.977597820989974\n",
179 | "4.76871911521475\n",
180 | "4.8506181468472676\n"
181 | ]
182 | }
183 | ],
184 | "source": [
185 | "mape_250_ar_717087 = MAPE_250(testdata_717087, ar_717087)\n",
186 | "mape_250_arma_717087 = MAPE_250(testdata_717087, arma_717087)\n",
187 | "mape_250_arima_717087 = MAPE_250(testdata_717087, arima_717087)\n",
188 | "mape_250_sarima_717087 = MAPE_250(testdata_717087, sarima_717087)\n",
189 | "mape_250_slstm_717087 = MAPE_250(testdata_717087, slstm_717087)\n",
190 | "mape_250_ssvr_717087 = MAPE_250(testdata_717087, ssvr_717087)\n",
191 | "mape_250_sxgboost_717087 = MAPE_250(testdata_717087, sxgboost_717087)\n",
192 | "mape_250_slstmarima_717087 = MAPE_250(testdata_717087, slstmarima_717087)\n",
193 | "mape_250_ssvrarima_717087 = MAPE_250(testdata_717087, ssvrarima_717087)\n",
194 | "mape_250_sxgboostarima_717087 = MAPE_250(testdata_717087, sxgboostarima_717087)\n",
195 | "print(mape_250_ar_717087)\n",
196 | "print(mape_250_arma_717087)\n",
197 | "print(mape_250_arima_717087)\n",
198 | "print(mape_250_sarima_717087)\n",
199 | "print(mape_250_slstm_717087 )\n",
200 | "print(mape_250_ssvr_717087)\n",
201 | "print(mape_250_sxgboost_717087)\n",
202 | "print(mape_250_slstmarima_717087)\n",
203 | "print(mape_250_ssvrarima_717087)\n",
204 | "print(mape_250_sxgboostarima_717087)\n"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 38,
210 | "metadata": {},
211 | "outputs": [
212 | {
213 | "name": "stdout",
214 | "output_type": "stream",
215 | "text": [
216 | "22.2411687656336\n",
217 | "24.498942450758474\n",
218 | "21.234435065529222\n",
219 | "20.023570872855185\n",
220 | "18.291041833100174\n",
221 | "17.444472161772747\n",
222 | "17.4993064141761\n",
223 | "18.291404048993336\n",
224 | "17.44422946646671\n",
225 | "17.500098465644356\n"
226 | ]
227 | }
228 | ],
229 | "source": [
230 | "mae_ar_717087 = MAE(testdata_717087, ar_717087)\n",
231 | "mae_arma_717087 = MAE(testdata_717087, arma_717087)\n",
232 | "mae_arima_717087 = MAE(testdata_717087, arima_717087)\n",
233 | "mae_sarima_717087 = MAE(testdata_717087, sarima_717087)\n",
234 | "mae_slstm_717087 = MAE(testdata_717087, slstm_717087)\n",
235 | "mae_ssvr_717087 = MAE(testdata_717087, ssvr_717087)\n",
236 | "mae_sxgboost_717087 = MAE(testdata_717087, sxgboost_717087)\n",
237 | "mae_slstmarima_717087 = MAE(testdata_717087, slstmarima_717087)\n",
238 | "mae_ssvrarima_717087 = MAE(testdata_717087, ssvrarima_717087)\n",
239 | "mae_sxgboostarima_717087 = MAE(testdata_717087, sxgboostarima_717087)\n",
240 | "print(mae_ar_717087)\n",
241 | "print(mae_arma_717087)\n",
242 | "print(mae_arima_717087)\n",
243 | "print(mae_sarima_717087)\n",
244 | "print(mae_slstm_717087 )\n",
245 | "print(mae_ssvr_717087)\n",
246 | "print(mae_sxgboost_717087)\n",
247 | "print(mae_slstmarima_717087)\n",
248 | "print(mae_ssvrarima_717087)\n",
249 | "print(mae_sxgboostarima_717087)"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {},
256 | "outputs": [],
257 | "source": []
258 | }
259 | ],
260 | "metadata": {
261 | "interpreter": {
262 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
263 | },
264 | "kernelspec": {
265 | "display_name": "Python 3.7.9",
266 | "language": "python",
267 | "name": "python3"
268 | },
269 | "language_info": {
270 | "codemirror_mode": {
271 | "name": "ipython",
272 | "version": 3
273 | },
274 | "file_extension": ".py",
275 | "mimetype": "text/x-python",
276 | "name": "python",
277 | "nbconvert_exporter": "python",
278 | "pygments_lexer": "ipython3",
279 | "version": "3.7.9"
280 | },
281 | "orig_nbformat": 4
282 | },
283 | "nbformat": 4,
284 | "nbformat_minor": 2
285 | }
286 |
--------------------------------------------------------------------------------
/evaluationmetrics_pointforecasts.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #Defining MAPE function
4 | def MAPE(actual_values,predicted_values):
5 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))
6 | actual_values = np.array(actual_values).reshape((len(actual_values), 1))
7 | mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100
8 | return mape
9 |
10 | #Defining MAPE_100 function
11 | def MAPE_100(actual_values,predicted_values):
12 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))
13 | actual_values = np.array(actual_values).reshape((len(actual_values), 1))
14 | x = np.concatenate((actual_values,predicted_values), axis=1)
15 | x_100 = x[x[:,0]>100]
16 | mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100
17 | return mape
18 |
19 | #Defining MAPE_250 function
20 | def MAPE_250(actual_values,predicted_values):
21 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))
22 | actual_values = np.array(actual_values).reshape((len(actual_values), 1))
23 | x = np.concatenate((actual_values,predicted_values), axis=1)
24 | x_250 = x[x[:,0]>250]
25 | mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100
26 | return mape
27 |
28 | #Defining MAE function
29 | def MAE(actual_values,predicted_values):
30 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))
31 | actual_values = np.array(actual_values).reshape((len(actual_values), 1))
32 | mae = np.mean(np.abs(actual_values - predicted_values))
33 | return mae
--------------------------------------------------------------------------------
/evaluationmetrics_qrapproaches.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #implement onconditional coverage
4 | def unconditional_coverage(PIs, data_test):
5 | indicator = []
6 | for i in range(len(PIs)):
7 | if PIs[i][0] < data_test[i] < PIs[i][1]:
8 | indicator.append(1)
9 | else:
10 | indicator.append(0)
11 | uc = np.sum(indicator)/len(indicator)
12 | return uc
13 |
14 | #implement Winkler's score
15 | def winlers_score(PIs, data_test, tau):
16 | indicators = []
17 | for i in range(len(PIs)):
18 | if PIs[i][0] < data_test[i] < PIs[i][1]:
19 | score = PIs[i][1]-PIs[i][0]
20 | indicators.append(score)
21 | elif data_test[i] < PIs[i][0]:
22 | score = (PIs[i][1]-PIs[i][0]) + 2 / tau * (PIs[i][0]-data_test[i])
23 | indicators.append(score)
24 | elif PIs[i][1] < data_test[i]:
25 | score = (PIs[i][1]-PIs[i][0]) + 2 / tau * (data_test[i]-PIs[i][1])
26 | indicators.append(score)
27 | wc = np.sum(indicators)/len(indicators)
28 | return wc
29 |
--------------------------------------------------------------------------------
/hybridmodels_istanbul.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from statsmodels.tsa.arima_model import ARIMA
3 | import pandas as pd
4 |
5 | def armodel(train_data, test_data):
6 | arima = ARIMA(train_data, order=(1,0,0))
7 | arima_fit = arima.fit()
8 | print(arima_fit.summary())
9 | parameters = arima_fit.params
10 | a = parameters[1]
11 | output_test = arima_fit.forecast()
12 | test_predictions = []
13 | test_predictions.append(output_test[0][0])
14 | for t in range(len(test_data)-1):
15 | output_test = (test_data[t] * a)
16 | test_predictions.append(output_test[0])
17 | return test_predictions
18 |
19 | def hybrid_model(train_predictions, train_data, test_predictions, test_data):
20 | train_data = np.array(train_data).reshape((len(train_data),1))
21 | train_predictions = np.array(train_predictions).reshape((len(train_predictions),1))
22 | test_data = np.array(test_data).reshape((len(test_data),1))
23 | test_predictions = np.array(test_predictions).reshape((len(test_predictions),1))
24 | train_error_series = train_data - train_predictions
25 | test_error_series = test_data - test_predictions
26 | #model residuals
27 | testerror_predictions = armodel(train_error_series, test_error_series)
28 | testerror_predictions = np.array(testerror_predictions).reshape((len(testerror_predictions),1))
29 | output = test_predictions + testerror_predictions
30 | output = np.array(output).reshape((len(output),1))
31 | return output
32 |
33 | traindata_istanbul_data_del = np.array(pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']][169:7565])
34 | testdata_istanbul_data_del = np.array(pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']][7565:])
35 | trainslstm_istanbul_data_del = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_del_train.csv")["0"])
36 | trainssvr_istanbul_data_del = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_del_train.csv")["0"])
37 | trainsxgboost_istanbul_data_del = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_del_train.csv")["0"])
38 | testslstm_istanbul_data_del = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_del_test.csv")["0"])
39 | testssvr_istanbul_data_del = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_del_test.csv")["0"])
40 | testsxgboost_istanbul_data_del = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_del_test.csv")["0"])
41 |
42 | slstmarima_istanbul_data_del = hybrid_model(trainslstm_istanbul_data_del, traindata_istanbul_data_del, testslstm_istanbul_data_del, testdata_istanbul_data_del)
43 | ssvrarima_istanbul_data_del = hybrid_model(trainssvr_istanbul_data_del, traindata_istanbul_data_del, testssvr_istanbul_data_del, testdata_istanbul_data_del)
44 | sxgboostarima_istanbul_data_del = hybrid_model(trainsxgboost_istanbul_data_del, traindata_istanbul_data_del, testsxgboost_istanbul_data_del, testdata_istanbul_data_del)
45 | pd.DataFrame(slstmarima_istanbul_data_del).to_csv("point_forecasts/slstmarima_istanbul_data_del_test.csv")
46 | pd.DataFrame(ssvrarima_istanbul_data_del).to_csv("point_forecasts/ssvrarima_istanbul_data_del_test.csv")
47 | pd.DataFrame(sxgboostarima_istanbul_data_del).to_csv("point_forecasts/sxgboostarima_istanbul_data_del_test.csv")
48 |
49 | traindata_istanbul_data_mean_sh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']][169:8164])
50 | testdata_istanbul_data_mean_sh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']][8164:])
51 | trainslstm_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sh_train.csv")["0"])
52 | trainssvr_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sh_train.csv")["0"])
53 | trainsxgboost_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sh_train.csv")["0"])
54 | testslstm_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sh_test.csv")["0"])
55 | testssvr_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sh_test.csv")["0"])
56 | testsxgboost_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sh_test.csv")["0"])
57 |
58 | slstmarima_istanbul_data_mean_sh = hybrid_model(trainslstm_istanbul_data_mean_sh, traindata_istanbul_data_mean_sh, testslstm_istanbul_data_mean_sh, testdata_istanbul_data_mean_sh)
59 | ssvrarima_istanbul_data_mean_sh = hybrid_model(trainssvr_istanbul_data_mean_sh, traindata_istanbul_data_mean_sh, testssvr_istanbul_data_mean_sh, testdata_istanbul_data_mean_sh)
60 | sxgboostarima_istanbul_data_mean_sh = hybrid_model(trainsxgboost_istanbul_data_mean_sh, traindata_istanbul_data_mean_sh, testsxgboost_istanbul_data_mean_sh, testdata_istanbul_data_mean_sh)
61 | pd.DataFrame(slstmarima_istanbul_data_mean_sh).to_csv("point_forecasts/slstmarima_istanbul_data_mean_sh_test.csv")
62 | pd.DataFrame(ssvrarima_istanbul_data_mean_sh).to_csv("point_forecasts/ssvrarima_istanbul_data_mean_sh_test.csv")
63 | pd.DataFrame(sxgboostarima_istanbul_data_mean_sh).to_csv("point_forecasts/sxgboostarima_istanbul_data_mean_sh_test.csv")
64 |
65 |
66 | traindata_istanbul_data_mean_sdsh= np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']][169:8164])
67 | testdata_istanbul_data_mean_sdsh= np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']][8164:])
68 | trainslstm_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sdsh_train.csv")["0"])
69 | trainssvr_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_train.csv")["0"])
70 | trainsxgboost_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sdsh_train.csv")["0"])
71 | testslstm_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sdsh_test.csv")["0"])
72 | testssvr_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_test.csv")["0"])
73 | testsxgboost_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sdsh_test.csv")["0"])
74 |
75 | slstmarima_istanbul_data_mean_sdsh= hybrid_model(trainslstm_istanbul_data_mean_sdsh, traindata_istanbul_data_mean_sdsh, testslstm_istanbul_data_mean_sdsh, testdata_istanbul_data_mean_sdsh)
76 | ssvrarima_istanbul_data_mean_sdsh= hybrid_model(trainssvr_istanbul_data_mean_sdsh, traindata_istanbul_data_mean_sdsh, testssvr_istanbul_data_mean_sdsh, testdata_istanbul_data_mean_sdsh)
77 | sxgboostarima_istanbul_data_mean_sdsh= hybrid_model(trainsxgboost_istanbul_data_mean_sdsh, traindata_istanbul_data_mean_sdsh, testsxgboost_istanbul_data_mean_sdsh, testdata_istanbul_data_mean_sdsh)
78 | pd.DataFrame(slstmarima_istanbul_data_mean_sdsh).to_csv("point_forecasts/slstmarima_istanbul_data_mean_sdsh_test.csv")
79 | pd.DataFrame(ssvrarima_istanbul_data_mean_sdsh).to_csv("point_forecasts/ssvrarima_istanbul_data_mean_sdsh_test.csv")
80 | pd.DataFrame(sxgboostarima_istanbul_data_mean_sdsh).to_csv("point_forecasts/sxgboostarima_istanbul_data_mean_sdsh_test.csv")
81 |
--------------------------------------------------------------------------------
/hybridmodels_pems.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from statsmodels.tsa.arima_model import ARIMA
3 | import pandas as pd
4 |
5 | def arimamodel(train_data, test_data):
6 | arima = ARIMA(train_data, order=(1,1,0))
7 | arima_fit = arima.fit()
8 | print(arima_fit.summary())
9 |
10 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all())
11 | train_predictions2 = []
12 | for t in range(len(train_data)):
13 | output_train = train_predictions[t] + train_data[t]
14 | train_predictions2.append(output_train)
15 |
16 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all())
17 | test_predictions2 = []
18 | test_data2=[]
19 | test_data2.append(train_data[-1])
20 | for i in range(len(test_data)-1):
21 | test_data2.append(test_data[i])
22 | for t in range(len(test_data2)):
23 | output_test = test_predictions[t] + test_data2[t]
24 | test_predictions2.append(output_test)
25 | return test_predictions
26 |
27 | def hybrid_model(train_predictions, train_data, test_predictions, test_data):
28 | train_data = np.array(train_data).reshape((len(train_data),1))
29 | train_predictions = np.array(train_predictions).reshape((len(train_predictions),1))
30 | test_data = np.array(test_data).reshape((len(test_data),1))
31 | test_predictions = np.array(test_predictions).reshape((len(test_predictions),1))
32 | train_error_series = train_data - train_predictions
33 | test_error_series = test_data - test_predictions
34 | #model residuals
35 | testerror_predictions = arimamodel(train_error_series, test_error_series)
36 | testerror_predictions = np.array(testerror_predictions).reshape((len(testerror_predictions),1))
37 | output = test_predictions + testerror_predictions
38 | output = np.array(output).reshape((len(output),1))
39 | return output
40 |
41 | traindata_716933 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']][673:18345])
42 | testdata_716933 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']][18345:])
43 | trainslstm_716933 = np.array(pd.read_csv("point_forecasts/slstm_pems_716933_train.csv")["0"])
44 | trainssvr_716933 = np.array(pd.read_csv("point_forecasts/ssvr_pems_716933_train.csv")["0"])
45 | trainsxgboost_716933 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_716933_train.csv")["0"])
46 | testslstm_716933 = np.array(pd.read_csv("point_forecasts/slstm_pems_716933_test.csv")["0"])
47 | testssvr_716933 = np.array(pd.read_csv("point_forecasts/ssvr_pems_716933_test.csv")["0"])
48 | testsxgboost_716933 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_716933_test.csv")["0"])
49 |
50 | slstmarima_716933 = hybrid_model(trainslstm_716933, traindata_716933, testslstm_716933, testdata_716933)
51 | ssvrarima_716933 = hybrid_model(trainssvr_716933, traindata_716933, testssvr_716933, testdata_716933)
52 | sxgboostarima_716933 = hybrid_model(trainsxgboost_716933, traindata_716933, testsxgboost_716933, testdata_716933)
53 | pd.DataFrame(slstmarima_716933).to_csv("point_forecasts/slstmarima_pems_716933_test.csv")
54 | pd.DataFrame(ssvrarima_716933).to_csv("point_forecasts/ssvrarima_pems_716933_test.csv")
55 | pd.DataFrame(sxgboostarima_716933).to_csv("point_forecasts/sxgboostarima_pems_716933_test.csv")
56 |
57 | traindata_717087 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']][673:18345])
58 | testdata_717087 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']][18345:])
59 | trainslstm_717087 = np.array(pd.read_csv("point_forecasts/slstm_pems_717087_train.csv")["0"])
60 | trainssvr_717087 = np.array(pd.read_csv("point_forecasts/ssvr_pems_717087_train.csv")["0"])
61 | trainsxgboost_717087 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_717087_train.csv")["0"])
62 | testslstm_717087 = np.array(pd.read_csv("point_forecasts/slstm_pems_717087_test.csv")["0"])
63 | testssvr_717087 = np.array(pd.read_csv("point_forecasts/ssvr_pems_717087_test.csv")["0"])
64 | testsxgboost_717087 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_717087_test.csv")["0"])
65 |
66 | slstmarima_717087 = hybrid_model(trainslstm_717087, traindata_717087, testslstm_717087, testdata_717087)
67 | ssvrarima_717087 = hybrid_model(trainssvr_717087, traindata_717087, testssvr_717087, testdata_717087)
68 | sxgboostarima_717087 = hybrid_model(trainsxgboost_717087, traindata_717087, testsxgboost_717087, testdata_717087)
69 | pd.DataFrame(slstmarima_717087).to_csv("point_forecasts/slstmarima_pems_717087_test.csv")
70 | pd.DataFrame(ssvrarima_717087).to_csv("point_forecasts/ssvrarima_pems_717087_test.csv")
71 | pd.DataFrame(sxgboostarima_717087).to_csv("point_forecasts/sxgboostarima_pems_717087_test.csv")
72 |
--------------------------------------------------------------------------------
/naive_and_average_methods.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | def naive_method(data):
5 | data2 = pd.DataFrame(data.values)
6 | data3 = pd.concat([data2.shift(1), data2], axis=1)
7 | data3.columns = ['t-1', 't']
8 | data4 = data3.values
9 | train_size = int(len( data4) * 0.70)
10 | train, test = data4[1:train_size], data4[train_size:]
11 | test_predictions, actual_values = test[:,0], test[:,1]
12 | return test_predictions, actual_values
13 |
14 | def average_method(data):
15 | data2 = pd.DataFrame(data.values)
16 | data3 = pd.concat([data2.shift(1), data2], axis=1)
17 | data3.columns = ['t-1', 't']
18 | avg_values = []
19 | for i in range(len(data3)):
20 | avg_value = np.mean(data3['t-1'][:(i+1)])
21 | avg_values.append(avg_value)
22 | avg_values = pd.DataFrame(avg_values)
23 | data4 = pd.concat([data3, avg_values], axis = 1)
24 | data4['avg'] = data4[0]
25 | data4 = data4[['avg', 't']]
26 | data4 = data4.values
27 | train_size = int(len( data4) * 0.70)
28 | train, test = data4[1:train_size], data4[train_size:]
29 | test_predictions, actual_values = test[:,0], test[:,1]
30 | return test_predictions, actual_values
--------------------------------------------------------------------------------
/qrapproaches.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import statsmodels.formula.api as smf
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 | from sklearn.preprocessing import MinMaxScaler
7 |
8 | ## historical PI implementation
9 | def get_corridor(corridor_size, error_series):
10 | corridor = []
11 | length = len(error_series) - corridor_size
12 | for i in range(length):
13 | errors_in_corridor = error_series[i:i+corridor_size]
14 | ordered_errors_in_corridor = np.sort(errors_in_corridor)
15 | corridor.append({"OrderedErrors": ordered_errors_in_corridor})
16 | return corridor
17 |
18 | def get_lower_upper_bounds(corridor_size, predictions, error_series):
19 | PIs = []
20 | corridor = get_corridor(corridor_size, error_series)
21 | predictions2 = np.array(predictions[corridor_size:])
22 | percent5_index = 1
23 | percent95_index = 19
24 | for i in range(len(corridor)):
25 | OrderedErrors = corridor[i]["OrderedErrors"]
26 | PointForecast = predictions2[i]
27 | lower_bound = OrderedErrors[percent5_index] + PointForecast
28 | upper_bound = OrderedErrors[percent95_index] + PointForecast
29 | PIs.append([lower_bound, upper_bound])
30 | return PIs
31 |
32 | ## implement distribution-based PI using AR
33 | def distribution_based_PI(test_data, a, sigma, z_alpha, z_1minalpha):
34 | PIs = []
35 | for i in range(len(test_data)):
36 | lower_bound = (a * test_data[i]) + (sigma * z_alpha)
37 | upper_bound = (a * test_data[i]) + (sigma * z_1minalpha)
38 | PIs.append([lower_bound, upper_bound])
39 | return PIs
40 |
41 | #implement QRA
42 | def qra(train_dataframe, test_dataframe, tau1, tau2):
43 | #tau1=0.95
44 | #tau2=0.05
45 | #since the best 3 models are ssvr, slstm and sxgboost
46 | #we use these models in QRA.
47 | model1 = smf.quantreg('NUMBER_OF_VEHICLES ~ ssvr + slstm + sxgboost', train_dataframe).fit(q=tau1)
48 | get_y = lambda a, b, c, d: a + b * test_dataframe.ssvr + c * test_dataframe.slstm + d * test_dataframe.sxgboost
49 | y_upper = get_y(model1.params['Intercept'], model1.params['ssvr'], model1.params['slstm'], model1.params['sxgboost'])
50 | model2 = smf.quantreg('NUMBER_OF_VEHICLES ~ ssvr + slstm + sxgboost', train_dataframe).fit(q=tau2)
51 | y_lower = get_y(model2.params['Intercept'], model2.params['ssvr'], model2.params['slstm'], model1.params['sxgboost'])
52 | y_upper = np.array(y_upper)
53 | y_lower = np.array(y_lower)
54 |
55 | PIs_qra = []
56 | for i in range(len(y_upper)):
57 | PIs_qra.append([y_lower[i], y_upper[i]])
58 | return PIs_qra
59 |
60 | #implement QRLSTM
61 | #first implemnet lstm
62 | class LSTM(nn.Module):
63 |
64 | def __init__(self, num_classes, input_size, hidden_size, num_layers):
65 | super(LSTM, self).__init__()
66 |
67 | self.num_classes = num_classes
68 | self.num_layers = num_layers
69 | self.input_size = input_size
70 | self.hidden_size = hidden_size
71 |
72 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
73 | num_layers=num_layers, batch_first=True)
74 | self.softmax = nn.Softmax(dim=1)
75 | self.fc = nn.Linear(hidden_size, num_classes)
76 |
77 | def forward(self, x):
78 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
79 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
80 | _, (h_output, _) = self.lstm(x, (h_0, c_0))
81 | h_output = h_output.view(-1, self.hidden_size)
82 | output = self.softmax(h_output)
83 | output = self.fc(output)
84 | return output
85 |
86 | #define pinball loss to use update parameters in lstm
87 | class PinballLoss():
88 | def __init__(self, quantile=0.10, reduction='mean'):
89 | self.quantile = quantile
90 | assert 0 < self.quantile
91 | assert self.quantile < 1
92 | self.reduction = reduction
93 | def __call__(self, output, target):
94 | assert output.shape == target.shape
95 | loss = torch.zeros_like(target, dtype=torch.float)
96 | error = output - target
97 | smaller_index = error < 0
98 | bigger_index = 0 < error
99 | loss[smaller_index] = self.quantile * (abs(error)[smaller_index])
100 | loss[bigger_index] = (1-self.quantile) * (abs(error)[bigger_index])
101 |
102 | if self.reduction == 'sum':
103 | loss = loss.sum()
104 | if self.reduction == 'mean':
105 | loss = loss.mean()
106 | return loss
107 |
108 | def qrlstm(data):
109 | sc = MinMaxScaler()
110 | training_data = sc.fit_transform(data)
111 |
112 | x, y = training_data[169:,:8], training_data[169:,-1]
113 | print(x.shape)
114 | print(y.shape)
115 | train_size = 7995
116 |
117 | dataX = Variable(torch.Tensor(np.array(x))).reshape((11495,8,1))
118 | dataY = Variable(torch.Tensor(np.array(y))).reshape((11495,1))
119 |
120 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((7995,8,1))
121 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((7995,1))
122 |
123 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((3500,8,1))
124 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((3500,1))
125 | num_epochs = 2000
126 | learning_rate = 0.01
127 |
128 | input_size = 1
129 | hidden_size = 8
130 | num_layers = 1
131 | num_classes = 1
132 |
133 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
134 | criterion = PinballLoss(quantile=0.95)
135 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
136 |
137 | # Train the model
138 | for epoch in range(num_epochs):
139 | outputs = lstm(trainX)
140 | optimizer.zero_grad()
141 |
142 | # obtain the loss function
143 | loss = criterion(outputs, trainY)
144 |
145 | loss.backward()
146 |
147 | optimizer.step()
148 |
149 | lstm.eval()
150 | test_predict = lstm(testX)
151 |
152 | data_predict = test_predict.data.numpy()
153 | dataY_plot = dataY.data.numpy()
154 |
155 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1)
156 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1)
157 | data_predict = sc.inverse_transform(d_p)
158 | dataY_plot = sc.inverse_transform(dY_p)
159 |
160 | dataY_plot = dataY_plot[:,0]
161 | data_predict = data_predict[:,0]
162 | upper_bounds = data_predict
163 |
164 | #get lower bounds
165 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
166 | criterion = PinballLoss(quantile=0.05)
167 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
168 |
169 | # Train the model
170 | for epoch in range(num_epochs):
171 | outputs = lstm(trainX)
172 | optimizer.zero_grad()
173 |
174 | # obtain the loss function
175 | loss = criterion(outputs, trainY)
176 |
177 | loss.backward()
178 |
179 | optimizer.step()
180 |
181 | lstm.eval()
182 | test_predict = lstm(testX)
183 | data_predict = test_predict.data.numpy()
184 | dataY_plot = dataY.data.numpy()
185 |
186 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1)
187 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1)
188 | data_predict = sc.inverse_transform(d_p)
189 | dataY_plot = sc.inverse_transform(dY_p)
190 |
191 | dataY_plot = dataY_plot[:,0]
192 | data_predict = data_predict[:,0]
193 | lower_bounds = data_predict
194 |
195 | y_upper = np.array(upper_bounds)
196 | y_lower = np.array(lower_bounds)
197 |
198 | PIs_qrlstm = []
199 | for i in range(len(y_upper)):
200 | PIs_qrlstm.append([y_lower[i], y_upper[i]])
201 | return PIs_qrlstm
202 |
203 |
204 |
--------------------------------------------------------------------------------
/slstm_istanbul_data_del.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 | from sklearn.preprocessing import MinMaxScaler
7 |
8 | class LSTM(nn.Module):
9 | def __init__(self, num_classes, input_size, hidden_size, num_layers):
10 | super(LSTM, self).__init__()
11 | self.num_classes = num_classes
12 | self.num_layers = num_layers
13 | self.input_size = input_size
14 | self.hidden_size = hidden_size
15 |
16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
17 | num_layers=num_layers, batch_first=True)
18 | self.softmax = nn.Softmax(dim=1)
19 | self.fc = nn.Linear(hidden_size, num_classes)
20 |
21 | def forward(self, x):
22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
24 | _, (h_output, _) = self.lstm(x, (h_0, c_0))
25 | h_output = h_output.view(-1, self.hidden_size)
26 | output = self.softmax(h_output)
27 | output= self.fc(output)
28 | return output
29 |
30 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']]
31 | data2 = pd.DataFrame(data.values)
32 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
33 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
34 | data4 = data3.values
35 | train_size = 7396
36 | test_size = 3500
37 |
38 | minmaxscaler = MinMaxScaler()
39 | training_data = minmaxscaler.fit_transform(data4)
40 | x, y = training_data[169:,:8], training_data[169:,-1]
41 |
42 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1))
43 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1))
44 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1))
45 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1))
46 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1))
47 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1))
48 |
49 | num_epochs = 2000
50 | learning_rate = 0.01
51 |
52 | input_size = 1
53 | hidden_size = 8
54 | num_layers = 1
55 | num_classes = 1
56 |
57 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
58 |
59 | criterion = torch.nn.MSELoss()
60 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
61 |
62 | # Train the model
63 | for epoch in range(num_epochs):
64 | outputs = lstm(trainX)
65 | optimizer.zero_grad()
66 | # obtain the loss function
67 | loss = criterion(outputs, trainY)
68 | loss.backward()
69 | optimizer.step()
70 | if epoch % 100 == 0:
71 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
72 |
73 | lstm.eval()
74 | train_predict = lstm(trainX)
75 | data_predict2 = train_predict.data.numpy()
76 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1)
77 | data_predict2 = minmaxscaler.inverse_transform(d_p2)
78 | train_predict = data_predict2[:,0]
79 |
80 | lstm.eval()
81 | test_predict = lstm(testX)
82 | data_predict = test_predict.data.numpy()
83 | dataY_plot = dataY.data.numpy()
84 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1)
85 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1)
86 | data_predict = minmaxscaler.inverse_transform(d_p)
87 | dataY_plot = minmaxscaler.inverse_transform(dY_p)
88 | dataY_plot = dataY_plot[:,0]
89 | test_predict = data_predict[:,0]
90 |
91 |
92 | pd.DataFrame(train_predict).to_csv("slstm_istanbul_data_del_train.csv")
93 | pd.DataFrame(test_predict).to_csv("slstm_istanbul_data_del_test.csv")
94 |
95 |
--------------------------------------------------------------------------------
/slstm_istanbul_data_mean_sdsh.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 | from sklearn.preprocessing import MinMaxScaler
7 |
8 | class LSTM(nn.Module):
9 | def __init__(self, num_classes, input_size, hidden_size, num_layers):
10 | super(LSTM, self).__init__()
11 | self.num_classes = num_classes
12 | self.num_layers = num_layers
13 | self.input_size = input_size
14 | self.hidden_size = hidden_size
15 |
16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
17 | num_layers=num_layers, batch_first=True)
18 | self.softmax = nn.Softmax(dim=1)
19 | self.fc = nn.Linear(hidden_size, num_classes)
20 |
21 | def forward(self, x):
22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
24 | _, (h_output, _) = self.lstm(x, (h_0, c_0))
25 | h_output = h_output.view(-1, self.hidden_size)
26 | output = self.softmax(h_output)
27 | output= self.fc(output)
28 | return output
29 |
30 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']]
31 | data2 = pd.DataFrame(data.values)
32 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
33 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
34 | data4 = data3.values
35 | train_size = 7995
36 | test_size = 3500
37 |
38 | minmaxscaler = MinMaxScaler()
39 | training_data = minmaxscaler.fit_transform(data4)
40 | x, y = training_data[169:,:8], training_data[169:,-1]
41 |
42 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1))
43 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1))
44 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1))
45 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1))
46 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1))
47 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1))
48 |
49 | num_epochs = 2000
50 | learning_rate = 0.01
51 |
52 | input_size = 1
53 | hidden_size = 8
54 | num_layers = 1
55 | num_classes = 1
56 |
57 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
58 |
59 | criterion = torch.nn.MSELoss()
60 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
61 | losses = []
62 | # Train the model
63 | for epoch in range(num_epochs):
64 | outputs = lstm(trainX)
65 | optimizer.zero_grad()
66 | # obtain the loss function
67 | loss = criterion(outputs, trainY)
68 | loss.backward()
69 | optimizer.step()
70 | losses.append(loss.item())
71 | if epoch % 100 == 0:
72 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
73 |
74 | lstm.eval()
75 | train_predict = lstm(trainX)
76 | data_predict2 = train_predict.data.numpy()
77 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1)
78 | data_predict2 = minmaxscaler.inverse_transform(d_p2)
79 | train_predict = data_predict2[:,0]
80 |
81 | lstm.eval()
82 | test_predict = lstm(testX)
83 | data_predict = test_predict.data.numpy()
84 | dataY_plot = dataY.data.numpy()
85 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1)
86 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1)
87 | data_predict = minmaxscaler.inverse_transform(d_p)
88 | dataY_plot = minmaxscaler.inverse_transform(dY_p)
89 | dataY_plot = dataY_plot[:,0]
90 | test_predict = data_predict[:,0]
91 |
92 |
93 | pd.DataFrame(train_predict).to_csv("pointforecasts/slstm_istanbul_data_mean_sdsh_train.csv")
94 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_istanbul_data_mean_sdsh_test.csv")
95 | pd.DataFrame(losses).to_csv("slstm_loss_istanbul_data_mean_sdsh.csv")
96 |
--------------------------------------------------------------------------------
/slstm_istanbul_data_mean_sh.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 | from sklearn.preprocessing import MinMaxScaler
7 |
8 | class LSTM(nn.Module):
9 | def __init__(self, num_classes, input_size, hidden_size, num_layers):
10 | super(LSTM, self).__init__()
11 | self.num_classes = num_classes
12 | self.num_layers = num_layers
13 | self.input_size = input_size
14 | self.hidden_size = hidden_size
15 |
16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
17 | num_layers=num_layers, batch_first=True)
18 | self.softmax = nn.Softmax(dim=1)
19 | self.fc = nn.Linear(hidden_size, num_classes)
20 |
21 | def forward(self, x):
22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
24 | _, (h_output, _) = self.lstm(x, (h_0, c_0))
25 | h_output = h_output.view(-1, self.hidden_size)
26 | output = self.softmax(h_output)
27 | output= self.fc(output)
28 | return output
29 |
30 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']]
31 | data2 = pd.DataFrame(data.values)
32 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
33 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
34 | data4 = data3.values
35 | train_size = 7995
36 | test_size = 3500
37 |
38 | minmaxscaler = MinMaxScaler()
39 | training_data = minmaxscaler.fit_transform(data4)
40 | x, y = training_data[169:,:8], training_data[169:,-1]
41 |
42 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1))
43 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1))
44 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1))
45 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1))
46 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1))
47 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1))
48 |
49 | num_epochs = 2000
50 | learning_rate = 0.01
51 |
52 | input_size = 1
53 | hidden_size = 8
54 | num_layers = 1
55 | num_classes = 1
56 |
57 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
58 |
59 | criterion = torch.nn.MSELoss()
60 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
61 |
62 | # Train the model
63 | for epoch in range(num_epochs):
64 | outputs = lstm(trainX)
65 | optimizer.zero_grad()
66 | # obtain the loss function
67 | loss = criterion(outputs, trainY)
68 | loss.backward()
69 | optimizer.step()
70 | if epoch % 100 == 0:
71 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
72 |
73 | lstm.eval()
74 | train_predict = lstm(trainX)
75 | data_predict2 = train_predict.data.numpy()
76 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1)
77 | data_predict2 = minmaxscaler.inverse_transform(d_p2)
78 | train_predict = data_predict2[:,0]
79 |
80 | lstm.eval()
81 | test_predict = lstm(testX)
82 | data_predict = test_predict.data.numpy()
83 | dataY_plot = dataY.data.numpy()
84 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1)
85 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1)
86 | data_predict = minmaxscaler.inverse_transform(d_p)
87 | dataY_plot = minmaxscaler.inverse_transform(dY_p)
88 | dataY_plot = dataY_plot[:,0]
89 | test_predict = data_predict[:,0]
90 |
91 |
92 | pd.DataFrame(train_predict).to_csv("point_forecasts/slstm_istanbul_data_mean_sh_train.csv")
93 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_istanbul_data_mean_sh_test.csv")
94 |
95 |
--------------------------------------------------------------------------------
/slstm_pems_716933.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 | from sklearn.preprocessing import MinMaxScaler
7 |
8 | class LSTM(nn.Module):
9 | def __init__(self, num_classes, input_size, hidden_size, num_layers):
10 | super(LSTM, self).__init__()
11 | self.num_classes = num_classes
12 | self.num_layers = num_layers
13 | self.input_size = input_size
14 | self.hidden_size = hidden_size
15 |
16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
17 | num_layers=num_layers, batch_first=True)
18 | self.softmax = nn.Softmax(dim=1)
19 | self.fc = nn.Linear(hidden_size, num_classes)
20 |
21 | def forward(self, x):
22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
24 | _, (h_output, _) = self.lstm(x, (h_0, c_0))
25 | h_output = h_output.view(-1, self.hidden_size)
26 | output = self.softmax(h_output)
27 | output= self.fc(output)
28 | return output
29 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']]
30 | data2 = pd.DataFrame(data.values)
31 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1)
32 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1', 't']
33 | data4 = data3.values
34 | train_size = 17672
35 | test_size = 7863
36 |
37 | minmaxscaler = MinMaxScaler()
38 | training_data = minmaxscaler.fit_transform(data4)
39 | x, y = training_data[673:,:8], training_data[673:,-1]
40 |
41 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1))
42 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1))
43 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1))
44 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1))
45 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1))
46 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1))
47 |
48 | num_epochs = 2000
49 | learning_rate = 0.01
50 |
51 | input_size = 1
52 | hidden_size = 8
53 | num_layers = 1
54 | num_classes = 1
55 |
56 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
57 |
58 | criterion = torch.nn.MSELoss()
59 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
60 |
61 | # Train the model
62 | for epoch in range(num_epochs):
63 | outputs = lstm(trainX)
64 | optimizer.zero_grad()
65 | # obtain the loss function
66 | loss = criterion(outputs, trainY)
67 | loss.backward()
68 | optimizer.step()
69 | if epoch % 100 == 0:
70 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
71 |
72 | lstm.eval()
73 | train_predict = lstm(trainX)
74 | data_predict2 = train_predict.data.numpy()
75 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1)
76 | data_predict2 = minmaxscaler.inverse_transform(d_p2)
77 | train_predict = data_predict2[:,0]
78 |
79 | lstm.eval()
80 | test_predict = lstm(testX)
81 | data_predict = test_predict.data.numpy()
82 | dataY_plot = dataY.data.numpy()
83 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1)
84 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1)
85 | data_predict = minmaxscaler.inverse_transform(d_p)
86 | dataY_plot = minmaxscaler.inverse_transform(dY_p)
87 | dataY_plot = dataY_plot[:,0]
88 | test_predict = data_predict[:,0]
89 |
90 |
91 | pd.DataFrame(train_predict).to_csv("point_forecasts/slstm_pems_716933_train.csv")
92 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_pems_716933_test.csv")
93 |
94 |
--------------------------------------------------------------------------------
/slstm_pems_717087.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 | from sklearn.preprocessing import MinMaxScaler
7 |
8 | class LSTM(nn.Module):
9 | def __init__(self, num_classes, input_size, hidden_size, num_layers):
10 | super(LSTM, self).__init__()
11 | self.num_classes = num_classes
12 | self.num_layers = num_layers
13 | self.input_size = input_size
14 | self.hidden_size = hidden_size
15 |
16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
17 | num_layers=num_layers, batch_first=True)
18 | self.softmax = nn.Softmax(dim=1)
19 | self.fc = nn.Linear(hidden_size, num_classes)
20 |
21 | def forward(self, x):
22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
24 | _, (h_output, _) = self.lstm(x, (h_0, c_0))
25 | h_output = h_output.view(-1, self.hidden_size)
26 | output = self.softmax(h_output)
27 | output= self.fc(output)
28 | return output
29 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']]
30 | data2 = pd.DataFrame(data.values)
31 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1)
32 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1', 't']
33 | data4 = data3.values
34 | train_size = 17672
35 | test_size = 7863
36 |
37 | minmaxscaler = MinMaxScaler()
38 | training_data = minmaxscaler.fit_transform(data4)
39 | x, y = training_data[673:,:8], training_data[673:,-1]
40 |
41 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1))
42 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1))
43 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1))
44 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1))
45 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1))
46 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1))
47 |
48 | num_epochs = 2000
49 | learning_rate = 0.01
50 |
51 | input_size = 1
52 | hidden_size = 8
53 | num_layers = 1
54 | num_classes = 1
55 |
56 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
57 |
58 | criterion = torch.nn.MSELoss()
59 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
60 | losses = []
61 | # Train the model
62 | for epoch in range(num_epochs):
63 | outputs = lstm(trainX)
64 | optimizer.zero_grad()
65 | # obtain the loss function
66 | loss = criterion(outputs, trainY)
67 | loss.backward()
68 | optimizer.step()
69 | losses.append(loss.item())
70 | if epoch % 100 == 0:
71 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
72 |
73 | lstm.eval()
74 | train_predict = lstm(trainX)
75 | data_predict2 = train_predict.data.numpy()
76 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1)
77 | data_predict2 = minmaxscaler.inverse_transform(d_p2)
78 | train_predict = data_predict2[:,0]
79 |
80 | lstm.eval()
81 | test_predict = lstm(testX)
82 | data_predict = test_predict.data.numpy()
83 | dataY_plot = dataY.data.numpy()
84 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1)
85 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1)
86 | data_predict = minmaxscaler.inverse_transform(d_p)
87 | dataY_plot = minmaxscaler.inverse_transform(dY_p)
88 | dataY_plot = dataY_plot[:,0]
89 | test_predict = data_predict[:,0]
90 |
91 |
92 | pd.DataFrame(train_predict).to_csv("point_forecasts/slstm_pems_717087_train.csv")
93 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_pems_717087_test.csv")
94 | pd.DataFrame(losses).to_csv("slstm_loss_pems_717087.csv")
95 |
96 |
--------------------------------------------------------------------------------
/ssvr_istanbul_data_del.py:
--------------------------------------------------------------------------------
1 | from sklearn.model_selection import GridSearchCV
2 | from sklearn.svm import SVR
3 | import pandas as pd
4 |
5 |
6 | def gridsearch(train_X, train_y, parameters):
7 | svr = SVR()
8 | grid_search = GridSearchCV(svr, parameters, cv=2)
9 | grid_search.fit(train_X, train_y)
10 | best_parameters = grid_search.best_params_
11 | return best_parameters
12 |
13 |
14 | def ssvr_model(train_X, train_y, test_X, best_parameters):
15 | C = best_parameters['C']
16 | kernel = best_parameters['kernel']
17 | svr = SVR(kernel=kernel, C=C)
18 | svr.fit(train_X, train_y)
19 | train_predictions = svr.predict(train_X)
20 | test_predictions = svr.predict(test_X)
21 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_istanbul_data_del_train.csv")
22 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_istanbul_data_del_test.csv")
23 | return train_predictions, test_predictions
24 |
25 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']]
26 | data2 = pd.DataFrame(data.values)
27 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
28 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
29 |
30 | data4 = data3.values
31 | train_size4 = 7565
32 | train, test = data4[169:train_size4], data4[train_size4:]
33 | train_X, train_y = train[:,:8], train[:,-1]
34 | test_X, test_y = test[:,:8], test[:,-1]
35 |
36 | parameters = {'kernel':['rbf', 'linear'], 'C':[0.1, 1, 10, 100]}
37 | best_parameters = gridsearch(train_X, train_y, parameters)
38 | ssvr_model(train_X, train_y, test_X, best_parameters)
--------------------------------------------------------------------------------
/ssvr_istanbul_data_mean_sdsh.py:
--------------------------------------------------------------------------------
1 | from sklearn.svm import SVR
2 | import pandas as pd
3 |
4 | #Since we examine the effect of missing data points in istanbul traffic data,
5 | #we only use Grid search in the data with deletion of the missing points.
6 | #Thus, we use these parameters for other data with different completion of missing data points.
7 | def ssvr_model(train_X, train_y, test_X, best_parameters):
8 | C = best_parameters['C']
9 | kernel = best_parameters['kernel']
10 | svr = SVR(kernel=kernel, C=C)
11 | svr.fit(train_X, train_y)
12 | train_predictions = svr.predict(train_X)
13 | test_predictions = svr.predict(test_X)
14 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_train.csv")
15 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_test.csv")
16 | return train_predictions, test_predictions
17 |
18 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']]
19 | data2 = pd.DataFrame(data.values)
20 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
21 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
22 |
23 | data4 = data3.values
24 | train_size4 = int(len(data4) * 0.70)
25 | train, test = data4[169:train_size4], data4[train_size4:]
26 | train_X, train_y = train[:,:8], train[:,-1]
27 | test_X, test_y = test[:,:8], test[:,-1]
28 |
29 | best_parameters = {'C': 100, 'kernel': 'rbf'}
30 | ssvr_model(train_X, train_y, test_X, best_parameters)
--------------------------------------------------------------------------------
/ssvr_istanbul_data_mean_sh.py:
--------------------------------------------------------------------------------
1 | from sklearn.svm import SVR
2 | import pandas as pd
3 |
4 | #Since we examine the effect of missing data points in istanbul traffic data,
5 | #we only use Grid search in the data with deletion of the missing points.
6 | #Thus, we use these parameters for other data with different completion of missing data points.
7 | def ssvr_model(train_X, train_y, test_X, best_parameters):
8 | C = best_parameters['C']
9 | kernel = best_parameters['kernel']
10 | svr = SVR(kernel=kernel, C=C)
11 | svr.fit(train_X, train_y)
12 | train_predictions = svr.predict(train_X)
13 | test_predictions = svr.predict(test_X)
14 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sh_train.csv")
15 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sh_test.csv")
16 | return train_predictions, test_predictions
17 |
18 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']]
19 | data2 = pd.DataFrame(data.values)
20 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
21 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
22 |
23 | data4 = data3.values
24 | train_size4 = int(len(data4) * 0.70)
25 | train, test = data4[169:train_size4], data4[train_size4:]
26 | train_X, train_y = train[:,:8], train[:,-1]
27 | test_X, test_y = test[:,:8], test[:,-1]
28 |
29 | best_parameters = {'C': 100, 'kernel': 'rbf'}
30 | ssvr_model(train_X, train_y, test_X, best_parameters)
--------------------------------------------------------------------------------
/ssvr_pems_716933.py:
--------------------------------------------------------------------------------
1 | from sklearn.model_selection import GridSearchCV
2 | from sklearn.svm import SVR
3 | import pandas as pd
4 |
5 |
6 | def gridsearch(train_X, train_y, parameters):
7 | svr = SVR()
8 | grid_search = GridSearchCV(svr, parameters, cv=2)
9 | grid_search.fit(train_X, train_y)
10 | best_parameters = grid_search.best_params_
11 | return best_parameters
12 |
13 |
14 | def ssvr_model(train_X, train_y, test_X, best_parameters):
15 | C = best_parameters['C']
16 | kernel = best_parameters['kernel']
17 | svr = SVR(kernel=kernel, C=C)
18 | svr.fit(train_X, train_y)
19 | train_predictions = svr.predict(train_X)
20 | test_predictions = svr.predict(test_X)
21 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_pems_716933_train.csv")
22 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_pems_716933_test.csv")
23 | return train_predictions, test_predictions
24 |
25 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']]
26 | data2 = pd.DataFrame(data.values)
27 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1)
28 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t']
29 |
30 | data4 = data3.values
31 | train_size4 = int(len(data4) * 0.70)
32 | train, test = data4[673:train_size4], data4[train_size4:]
33 | train_X, train_y = train[:,:8], train[:,-1]
34 | test_X, test_y = test[:,:8], test[:,-1]
35 |
36 | parameters = {'kernel':['rbf', 'linear'], 'C':[0.1, 1, 10, 100]}
37 | best_parameters = gridsearch(train_X, train_y, parameters)
38 | ssvr_model(train_X, train_y, test_X, best_parameters)
--------------------------------------------------------------------------------
/ssvr_pems_717087.py:
--------------------------------------------------------------------------------
1 | from sklearn.model_selection import GridSearchCV
2 | from sklearn.svm import SVR
3 | import pandas as pd
4 |
5 |
6 | def gridsearch(train_X, train_y, parameters):
7 | svr = SVR()
8 | grid_search = GridSearchCV(svr, parameters, cv=2)
9 | grid_search.fit(train_X, train_y)
10 | best_parameters = grid_search.best_params_
11 | return best_parameters
12 |
13 |
14 | def ssvr_model(train_X, train_y, test_X, best_parameters):
15 | C = best_parameters['C']
16 | kernel = best_parameters['kernel']
17 | svr = SVR(kernel=kernel, C=C)
18 | svr.fit(train_X, train_y)
19 | train_predictions = svr.predict(train_X)
20 | test_predictions = svr.predict(test_X)
21 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_pems_717087_train.csv")
22 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_pems_717087_test.csv")
23 | return train_predictions, test_predictions
24 |
25 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']]
26 | data2 = pd.DataFrame(data.values)
27 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1)
28 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t']
29 |
30 | data4 = data3.values
31 | train_size4 = int(len(data4) * 0.70)
32 | train, test = data4[673:train_size4], data4[train_size4:]
33 | train_X, train_y = train[:,:8], train[:,-1]
34 | test_X, test_y = test[:,:8], test[:,-1]
35 |
36 | parameters = {'kernel':['rbf'], 'C':[0.1, 1, 10, 100]}
37 | best_parameters = gridsearch(train_X, train_y, parameters)
38 | ssvr_model(train_X, train_y, test_X, best_parameters)
--------------------------------------------------------------------------------
/xgboost_istanbul_data_del.py:
--------------------------------------------------------------------------------
1 | from numpy import asarray
2 | from pandas import read_csv
3 | from pandas import DataFrame
4 | from pandas import concat
5 | from sklearn.metrics import mean_absolute_error
6 | from xgboost import XGBRegressor
7 | import pandas as pd
8 | import numpy as np
9 |
10 |
11 |
12 | # transform a time series dataset into a supervised learning dataset
13 | def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
14 | n_vars = 1 if type(data) is list else data.shape[1]
15 | df = DataFrame(data)
16 | cols = list()
17 | for i in range(n_in, 0, -1):
18 | cols.append(df.shift(i))
19 | for i in range(0, n_out):
20 | cols.append(df.shift(-i))
21 | agg = concat(cols, axis=1)
22 | if dropnan:
23 | agg.dropna(inplace=True)
24 | return agg.values
25 |
26 | # split a univariate dataset into train/test sets
27 | def train_test_split(data, n_test):
28 | return data[:-n_test, :], data[-n_test:, :]
29 |
30 | # fit an xgboost model and make a one step prediction
31 | def xgboost_forecast(train, testX):
32 | train = asarray(train)
33 | trainX, trainy = train[:, :-1], train[:, -1]
34 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
35 | model.fit(trainX, trainy)
36 | yhat = model.predict(testX)
37 | return yhat
38 |
39 | # walk-forward validation for univariate data
40 | def walk_forward_validation(data, n_test):
41 | train_predictions = []
42 | test_predictions = []
43 | train, test = train_test_split(data, n_test)
44 | history = [x for x in train]
45 | train = asarray(train)
46 | trainX, trainy = train[:, :-1], train[:, -1]
47 | testX, testy = test[:, :-1], test[:, -1]
48 | yhat_train = xgboost_forecast(history, trainX)
49 | train_predictions.append(yhat_train)
50 | yhat_test = xgboost_forecast(history, testX)
51 | test_predictions.append(yhat_test)
52 | return train_predictions, test_predictions
53 |
54 |
55 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']]
56 | train_size = 7396
57 | test_size = 3500
58 | data2 = pd.DataFrame(data.values)
59 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
60 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
61 |
62 | values = data3.values
63 | data = values[169:,:]
64 | train_predictions, test_predictions = walk_forward_validation(data, 3500)
65 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1))
66 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1))
67 | train_predictions.to_csv("point_forecasts/xgboost_istanbul_data_del_train.csv")
68 | test_predictions.to_csv("point_forecasts/xgboost_istanbul_data_del_test.csv")
69 |
70 |
--------------------------------------------------------------------------------
/xgboost_istanbul_data_mean_sdsh.py:
--------------------------------------------------------------------------------
1 | from numpy import asarray
2 | from xgboost import XGBRegressor
3 | import pandas as pd
4 | import numpy as np
5 |
6 |
7 | # split a univariate dataset into train/test sets
8 | def train_test_split(data, n_test):
9 | return data[:-n_test, :], data[-n_test:, :]
10 |
11 | # fit an xgboost model and make a one step prediction
12 | def xgboost_forecast(train, testX):
13 | train = asarray(train)
14 | trainX, trainy = train[:, :-1], train[:, -1]
15 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
16 | model.fit(trainX, trainy)
17 | yhat = model.predict(testX)
18 | return yhat
19 |
20 | # walk-forward validation for univariate data
21 | def walk_forward_validation(data, n_test):
22 | train_predictions = []
23 | test_predictions = []
24 | train, test = train_test_split(data, n_test)
25 | history = [x for x in train]
26 | train = asarray(train)
27 | trainX, trainy = train[:, :-1], train[:, -1]
28 | testX, testy = test[:, :-1], test[:, -1]
29 | yhat_train = xgboost_forecast(history, trainX)
30 | train_predictions.append(yhat_train)
31 | yhat_test = xgboost_forecast(history, testX)
32 | test_predictions.append(yhat_test)
33 | return train_predictions, test_predictions
34 |
35 |
36 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']]
37 | train_size = 7995
38 | test_size = 3500
39 | data2 = pd.DataFrame(data.values)
40 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
41 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
42 |
43 | values = data3.values
44 | data = values[169:,:]
45 | train_predictions, test_predictions = walk_forward_validation(data, 3500)
46 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1))
47 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1))
48 | train_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sdsh_train.csv")
49 | test_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sdsh_test.csv")
50 |
51 |
--------------------------------------------------------------------------------
/xgboost_istanbul_data_mean_sh.py:
--------------------------------------------------------------------------------
1 | from numpy import asarray
2 | from xgboost import XGBRegressor
3 | import pandas as pd
4 | import numpy as np
5 |
6 |
7 | # split a univariate dataset into train/test sets
8 | def train_test_split(data, n_test):
9 | return data[:-n_test, :], data[-n_test:, :]
10 |
11 | # fit an xgboost model and make a one step prediction
12 | def xgboost_forecast(train, testX):
13 | train = asarray(train)
14 | trainX, trainy = train[:, :-1], train[:, -1]
15 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
16 | model.fit(trainX, trainy)
17 | yhat = model.predict(testX)
18 | return yhat
19 |
20 | # walk-forward validation for univariate data
21 | def walk_forward_validation(data, n_test):
22 | train_predictions = []
23 | test_predictions = []
24 | train, test = train_test_split(data, n_test)
25 | history = [x for x in train]
26 | train = asarray(train)
27 | trainX, trainy = train[:, :-1], train[:, -1]
28 | testX, testy = test[:, :-1], test[:, -1]
29 | yhat_train = xgboost_forecast(history, trainX)
30 | train_predictions.append(yhat_train)
31 | yhat_test = xgboost_forecast(history, testX)
32 | test_predictions.append(yhat_test)
33 | return train_predictions, test_predictions
34 |
35 |
36 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']]
37 | train_size = 7995
38 | test_size = 3500
39 | data2 = pd.DataFrame(data.values)
40 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1)
41 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't']
42 |
43 | values = data3.values
44 | data = values[169:,:]
45 | train_predictions, test_predictions = walk_forward_validation(data, 3500)
46 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1))
47 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1))
48 | train_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sh_train.csv")
49 | test_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sh_test.csv")
50 |
51 |
--------------------------------------------------------------------------------
/xgboost_oems_717087.py:
--------------------------------------------------------------------------------
1 | from numpy import asarray
2 | from xgboost import XGBRegressor
3 | import pandas as pd
4 | import numpy as np
5 |
6 |
7 |
8 | """# transform a time series dataset into a supervised learning dataset
9 | def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
10 | n_vars = 1 if type(data) is list else data.shape[1]
11 | df = DataFrame(data)
12 | cols = list()
13 | for i in range(n_in, 0, -1):
14 | cols.append(df.shift(i))
15 | for i in range(0, n_out):
16 | cols.append(df.shift(-i))
17 | agg = concat(cols, axis=1)
18 | if dropnan:
19 | agg.dropna(inplace=True)
20 | return agg.values"""
21 |
22 | # split a univariate dataset into train/test sets
23 | def train_test_split(data, n_test):
24 | return data[:-n_test, :], data[-n_test:, :]
25 |
26 | # fit an xgboost model and make a one step prediction
27 | def xgboost_forecast(train, testX):
28 | train = asarray(train)
29 | trainX, trainy = train[:, :-1], train[:, -1]
30 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
31 | model.fit(trainX, trainy)
32 | yhat = model.predict(testX)
33 | return yhat
34 |
35 | # walk-forward validation for univariate data
36 | def walk_forward_validation(data, n_test):
37 | train_predictions = []
38 | test_predictions = []
39 | train, test = train_test_split(data, n_test)
40 | history = [x for x in train]
41 | train = asarray(train)
42 | trainX, trainy = train[:, :-1], train[:, -1]
43 | testX, testy = test[:, :-1], test[:, -1]
44 | yhat_train = xgboost_forecast(history, trainX)
45 | train_predictions.append(yhat_train)
46 | yhat_test = xgboost_forecast(history, testX)
47 | test_predictions.append(yhat_test)
48 | return train_predictions, test_predictions
49 |
50 |
51 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']]
52 | data2 = pd.DataFrame(data.values)
53 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1)
54 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t']
55 | train_size = 17672
56 | test_size = 7863
57 |
58 | values = data3.values
59 | data = values[673:,:]
60 | train_predictions, test_predictions = walk_forward_validation(data, 7863)
61 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1))
62 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1))
63 | train_predictions.to_csv("point_forecasts/xgboost_pems_717087_train.csv")
64 | test_predictions.to_csv("point_forecasts/xgboost_pems_717087_test.csv")
65 |
66 |
--------------------------------------------------------------------------------
/xgboost_pems_716933.py:
--------------------------------------------------------------------------------
1 | from numpy import asarray
2 | from xgboost import XGBRegressor
3 | import pandas as pd
4 | import numpy as np
5 |
6 |
7 |
8 | # split a univariate dataset into train/test sets
9 | def train_test_split(data, n_test):
10 | return data[:-n_test, :], data[-n_test:, :]
11 |
12 | # fit an xgboost model and make a one step prediction
13 | def xgboost_forecast(train, testX):
14 | train = asarray(train)
15 | trainX, trainy = train[:, :-1], train[:, -1]
16 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
17 | model.fit(trainX, trainy)
18 | yhat = model.predict(testX)
19 | return yhat
20 |
21 | # walk-forward validation for univariate data
22 | def walk_forward_validation(data, n_test):
23 | train_predictions = []
24 | test_predictions = []
25 | train, test = train_test_split(data, n_test)
26 | history = [x for x in train]
27 | train = asarray(train)
28 | trainX, trainy = train[:, :-1], train[:, -1]
29 | testX, testy = test[:, :-1], test[:, -1]
30 | yhat_train = xgboost_forecast(history, trainX)
31 | train_predictions.append(yhat_train)
32 | yhat_test = xgboost_forecast(history, testX)
33 | test_predictions.append(yhat_test)
34 | return train_predictions, test_predictions
35 |
36 |
37 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']]
38 | data2 = pd.DataFrame(data.values)
39 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1)
40 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t']
41 | train_size = 17672
42 | test_size = 7863
43 |
44 | values = data3.values
45 | data = values[673:,:]
46 | train_predictions, test_predictions = walk_forward_validation(data, 7863)
47 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1))
48 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1))
49 | train_predictions.to_csv("point_forecasts/xgboost_pems_716933_train.csv")
50 | test_predictions.to_csv("point_forecasts/xgboost_pems_716933_test.csv")
51 |
52 |
--------------------------------------------------------------------------------