├── README.md ├── arimavariations_autoarima_istanbul.ipynb ├── arimavariations_autoarima_pems.ipynb ├── arimavariations_istanbul_data_del.py ├── arimavariations_istanbul_data_mean_sdsh.py ├── arimavariations_istanbul_mean_sh.py ├── arimavariations_pems_716933.py ├── arimavariations_pems_717087.py ├── evaluatemodels_istanbul_data_del.ipynb ├── evaluatemodels_istanbul_data_mean_sdsh.ipynb ├── evaluatemodels_istanbul_data_mean_sh.ipynb ├── evaluatemodels_pems_716933.ipynb ├── evaluatemodels_pems_717087.ipynb ├── evaluationmetrics_pointforecasts.py ├── evaluationmetrics_qrapproaches.py ├── hybridmodels_istanbul.py ├── hybridmodels_pems.py ├── naive_and_average_methods.py ├── plots_istanbul.ipynb ├── plots_pems.ipynb ├── qrapproaches.py ├── qrapproaches_istanbul.ipynb ├── slstm_istanbul_data_del.py ├── slstm_istanbul_data_mean_sdsh.py ├── slstm_istanbul_data_mean_sh.py ├── slstm_pems_716933.py ├── slstm_pems_717087.py ├── ssvr_istanbul_data_del.py ├── ssvr_istanbul_data_mean_sdsh.py ├── ssvr_istanbul_data_mean_sh.py ├── ssvr_pems_716933.py ├── ssvr_pems_717087.py ├── xgboost_istanbul_data_del.py ├── xgboost_istanbul_data_mean_sdsh.py ├── xgboost_istanbul_data_mean_sh.py ├── xgboost_oems_717087.py └── xgboost_pems_716933.py /README.md: -------------------------------------------------------------------------------- 1 | # Traffic Flow Forecasting Methods 2 | The repository gives case studies on short-term traffic flow forecasting strategies within the scope of my master thesis. After implementing the traditional (AR, ARMA, ARIMA and SARIMA), machine learning (SXGBoost and SSVR) and deep learning methods (SLSTM), one of main goals is to experiment on the uses of hybrid methods (SSVRARIMA, SSLSTMARIMA and SXGBoostARIMA). Besides analyzing approaches that were already used in the traffic flow literature, distinct strategies are also introduced and tested. Further, the point forecast results are supplemented with interval forecasts. In particular, quantile regression based intervals such as quantile regression averaging (QRA), quantile regression neural network (QRNN) and quantile regression long short-term memory (QRLSTM) are implemented. Both point and interval forecasts are evaluated via several evaluation metrics, and an extensive comparison is provided among the methodologies studied. 3 | 4 | You can reach my master thesis by [this link](https://tez.yok.gov.tr/UlusalTezMerkezi/TezGoster?key=qVqOZFj2DwNmvdf1oGFYiNVa87wIIBf5zByl7jQFtF29p6cfiQrQFFbSu-EjVTEn). 5 | -------------------------------------------------------------------------------- /arimavariations_autoarima_istanbul.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pmdarima.arima import auto_arima\n", 10 | "import pandas as pd\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 8, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "

\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | "

	Unnamed: 0	_id	DATE_TIME	LONGITUDE	LATITUDE	GEOHASH	MINIMUM_SPEED	MAXIMUM_SPEED	AVERAGE_SPEED	NUMBER_OF_VEHICLES
0	12913	12914	2020-01-01 00:00:00	29.108276	41.069641	sxk9wk	58	144	94	114
1	621904	621905	2020-01-01 01:00:00	29.108276	41.069641	sxk9wk	64	136	95	105
2	160509	160510	2020-01-01 02:00:00	29.108276	41.069641	sxk9wk	68	181	105	97
3	136167	136168	2020-01-01 03:00:00	29.108276	41.069641	sxk9wk	76	158	107	63
4	684343	684344	2020-01-01 04:00:00	29.108276	41.069641	sxk9wk	61	196	102	53
...	...	...	...	...	...	...	...	...	...	...
11060	1179972	1179973	2021-04-30 18:00:00	29.108276	41.069641	sxk9wk	50	170	97	273
11061	1197195	1197196	2021-04-30 19:00:00	29.108276	41.069641	sxk9wk	58	177	109	198
11062	551362	551363	2021-04-30 20:00:00	29.108276	41.069641	sxk9wk	64	164	113	139
11063	444616	444617	2021-04-30 21:00:00	29.108276	41.069641	sxk9wk	64	182	100	128
11064	914968	914969	2021-04-30 22:00:00	29.108276	41.069641	sxk9wk	50	168	102	109

\n", 198 | "

11065 rows × 10 columns

\n", 199 | "

" 200 | ], 201 | "text/plain": [ 202 | " Unnamed: 0 _id DATE_TIME LONGITUDE LATITUDE GEOHASH \\\n", 203 | "0 12913 12914 2020-01-01 00:00:00 29.108276 41.069641 sxk9wk \n", 204 | "1 621904 621905 2020-01-01 01:00:00 29.108276 41.069641 sxk9wk \n", 205 | "2 160509 160510 2020-01-01 02:00:00 29.108276 41.069641 sxk9wk \n", 206 | "3 136167 136168 2020-01-01 03:00:00 29.108276 41.069641 sxk9wk \n", 207 | "4 684343 684344 2020-01-01 04:00:00 29.108276 41.069641 sxk9wk \n", 208 | "... ... ... ... ... ... ... \n", 209 | "11060 1179972 1179973 2021-04-30 18:00:00 29.108276 41.069641 sxk9wk \n", 210 | "11061 1197195 1197196 2021-04-30 19:00:00 29.108276 41.069641 sxk9wk \n", 211 | "11062 551362 551363 2021-04-30 20:00:00 29.108276 41.069641 sxk9wk \n", 212 | "11063 444616 444617 2021-04-30 21:00:00 29.108276 41.069641 sxk9wk \n", 213 | "11064 914968 914969 2021-04-30 22:00:00 29.108276 41.069641 sxk9wk \n", 214 | "\n", 215 | " MINIMUM_SPEED MAXIMUM_SPEED AVERAGE_SPEED NUMBER_OF_VEHICLES \n", 216 | "0 58 144 94 114 \n", 217 | "1 64 136 95 105 \n", 218 | "2 68 181 105 97 \n", 219 | "3 76 158 107 63 \n", 220 | "4 61 196 102 53 \n", 221 | "... ... ... ... ... \n", 222 | "11060 50 170 97 273 \n", 223 | "11061 58 177 109 198 \n", 224 | "11062 64 164 113 139 \n", 225 | "11063 64 182 100 128 \n", 226 | "11064 50 168 102 109 \n", 227 | "\n", 228 | "[11065 rows x 10 columns]" 229 | ] 230 | }, 231 | "execution_count": 8, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "data = pd.read_csv(\"data/istanbul/istanbul_data_del.csv\")\n", 238 | "data" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 3, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "train_data = data[\"NUMBER_OF_VEHICLES\"].values[:-3500]\n", 248 | "test_data = data[\"NUMBER_OF_VEHICLES\"].values[-3500:]" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 4, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "text": [ 260 | "Performing stepwise search to minimize aic\n", 261 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=84063.193, Time=2.02 sec\n", 262 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=103809.100, Time=0.09 sec\n", 263 | " ARIMA(2,0,0)(0,0,0)[0] : AIC=83822.416, Time=0.31 sec\n", 264 | " ARIMA(3,0,0)(0,0,0)[0] : AIC=83816.720, Time=0.39 sec\n", 265 | " ARIMA(4,0,0)(0,0,0)[0] : AIC=83812.823, Time=0.47 sec\n", 266 | " ARIMA(5,0,0)(0,0,0)[0] : AIC=83777.511, Time=0.65 sec\n", 267 | " ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=83055.995, Time=1.49 sec\n", 268 | " ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=83059.645, Time=1.22 sec\n", 269 | "\n", 270 | "Best model: ARIMA(5,0,0)(0,0,0)[0] intercept\n", 271 | "Total fit time: 6.632 seconds\n" 272 | ] 273 | } 274 | ], 275 | "source": [ 276 | "model = auto_arima(train_data, start_p=1, start_q=0,\n", 277 | " max_p=5, max_q=0, \n", 278 | " d=0, \n", 279 | " seasonal=False, \n", 280 | " start_P=0, \n", 281 | " D=None, \n", 282 | " trace=True)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 9, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "Performing stepwise search to minimize aic\n", 295 | " ARIMA(1,0,1)(0,0,0)[0] : AIC=83816.739, Time=0.45 sec\n", 296 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=103809.100, Time=0.08 sec\n", 297 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=84063.193, Time=0.16 sec\n", 298 | " ARIMA(0,0,1)(0,0,0)[0] : AIC=95706.917, Time=0.68 sec\n", 299 | " ARIMA(1,0,1)(0,0,0)[0] intercept : AIC=83196.692, Time=2.42 sec\n", 300 | " ARIMA(0,0,1)(0,0,0)[0] intercept : AIC=87293.626, Time=1.39 sec\n", 301 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=83609.892, Time=0.41 sec\n", 302 | " ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=93226.640, Time=0.12 sec\n", 303 | "\n", 304 | "Best model: ARIMA(1,0,1)(0,0,0)[0] intercept\n", 305 | "Total fit time: 5.717 seconds\n" 306 | ] 307 | } 308 | ], 309 | "source": [ 310 | "model = auto_arima(train_data, start_p=1, start_q=1,\n", 311 | " max_p=1, max_q=1, \n", 312 | " d=0, \n", 313 | " seasonal=False, \n", 314 | " start_P=0, \n", 315 | " D=None, \n", 316 | " trace=True)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 10, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "name": "stdout", 326 | "output_type": "stream", 327 | "text": [ 328 | "Performing stepwise search to minimize aic\n", 329 | " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=83994.220, Time=0.93 sec\n", 330 | " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=84192.277, Time=0.16 sec\n", 331 | " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=84008.590, Time=0.41 sec\n", 332 | " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=83993.884, Time=0.71 sec\n", 333 | " ARIMA(0,1,0)(0,0,0)[0] : AIC=84190.279, Time=0.09 sec\n", 334 | " ARIMA(0,1,2)(0,0,0)[0] intercept : AIC=83993.439, Time=0.72 sec\n", 335 | " ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=inf, Time=7.01 sec\n", 336 | " ARIMA(0,1,3)(0,0,0)[0] intercept : AIC=83975.918, Time=1.03 sec\n", 337 | " ARIMA(1,1,3)(0,0,0)[0] intercept : AIC=inf, Time=17.39 sec\n", 338 | " ARIMA(0,1,4)(0,0,0)[0] intercept : AIC=inf, Time=12.40 sec\n", 339 | " ARIMA(1,1,4)(0,0,0)[0] intercept : AIC=inf, Time=10.19 sec\n", 340 | " ARIMA(0,1,3)(0,0,0)[0] : AIC=83973.919, Time=0.64 sec\n", 341 | " ARIMA(0,1,2)(0,0,0)[0] : AIC=83991.440, Time=0.46 sec\n", 342 | " ARIMA(1,1,3)(0,0,0)[0] : AIC=inf, Time=6.96 sec\n", 343 | " ARIMA(0,1,4)(0,0,0)[0] : AIC=inf, Time=6.19 sec\n", 344 | " ARIMA(1,1,2)(0,0,0)[0] : AIC=inf, Time=4.73 sec\n", 345 | " ARIMA(1,1,4)(0,0,0)[0] : AIC=inf, Time=13.72 sec\n", 346 | "\n", 347 | "Best model: ARIMA(0,1,3)(0,0,0)[0] \n", 348 | "Total fit time: 83.753 seconds\n" 349 | ] 350 | } 351 | ], 352 | "source": [ 353 | "model = auto_arima(train_data, start_p=1, start_q=1,\n", 354 | " max_p=5, max_q=5, \n", 355 | " d=1, \n", 356 | " seasonal=False, \n", 357 | " start_P=0, \n", 358 | " D=None, \n", 359 | " trace=True)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [] 368 | } 369 | ], 370 | "metadata": { 371 | "interpreter": { 372 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 373 | }, 374 | "kernelspec": { 375 | "display_name": "Python 3.7.9", 376 | "language": "python", 377 | "name": "python3" 378 | }, 379 | "language_info": { 380 | "codemirror_mode": { 381 | "name": "ipython", 382 | "version": 3 383 | }, 384 | "file_extension": ".py", 385 | "mimetype": "text/x-python", 386 | "name": "python", 387 | "nbconvert_exporter": "python", 388 | "pygments_lexer": "ipython3", 389 | "version": "3.7.9" 390 | }, 391 | "orig_nbformat": 4 392 | }, 393 | "nbformat": 4, 394 | "nbformat_minor": 2 395 | } 396 | -------------------------------------------------------------------------------- /arimavariations_autoarima_pems.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pmdarima.arima import auto_arima\n", 10 | "import pandas as pd\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Station 716933" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "

\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "

	Unnamed: 0	Total Flow
0	0	119.333333
1	1	182.333333
2	2	241.666667
3	3	262.666667
4	4	261.666667
...	...	...
26203	26203	389.000000
26204	26204	357.666667
26205	26205	373.333333
26206	26206	340.000000
26207	26207	332.333333

\n", 109 | "

26208 rows × 2 columns

\n", 110 | "

" 111 | ], 112 | "text/plain": [ 113 | " Unnamed: 0 Total Flow\n", 114 | "0 0 119.333333\n", 115 | "1 1 182.333333\n", 116 | "2 2 241.666667\n", 117 | "3 3 262.666667\n", 118 | "4 4 261.666667\n", 119 | "... ... ...\n", 120 | "26203 26203 389.000000\n", 121 | "26204 26204 357.666667\n", 122 | "26205 26205 373.333333\n", 123 | "26206 26206 340.000000\n", 124 | "26207 26207 332.333333\n", 125 | "\n", 126 | "[26208 rows x 2 columns]" 127 | ] 128 | }, 129 | "execution_count": 2, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "data = pd.read_csv(\"data/pems/pems-d07-9months-2021-station716933-15min.csv\")\n", 136 | "data" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 3, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "train_data = data[\"Total Flow\"].values[:-7863]\n", 146 | "test_data = data[\"Total Flow\"].values[-7863:]" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 4, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "Performing stepwise search to minimize aic\n", 159 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=1.67 sec\n", 160 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=280647.034, Time=0.20 sec\n", 161 | " ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=241280.816, Time=0.37 sec\n", 162 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=192746.376, Time=1.07 sec\n", 163 | " ARIMA(2,0,0)(0,0,0)[0] intercept : AIC=192720.428, Time=2.11 sec\n", 164 | " ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=192716.566, Time=2.17 sec\n", 165 | " ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=192430.904, Time=2.90 sec\n", 166 | " ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=192099.290, Time=3.63 sec\n", 167 | " ARIMA(5,0,0)(0,0,0)[0] : AIC=inf, Time=0.96 sec\n", 168 | "\n", 169 | "Best model: ARIMA(5,0,0)(0,0,0)[0] intercept\n", 170 | "Total fit time: 15.085 seconds\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "model = auto_arima(train_data, start_p=1, start_q=0,\n", 176 | " max_p=5, max_q=0, \n", 177 | " d=0, \n", 178 | " seasonal=False, \n", 179 | " start_P=0, \n", 180 | " D=None, \n", 181 | " trace=True)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 5, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "name": "stdout", 191 | "output_type": "stream", 192 | "text": [ 193 | "Performing stepwise search to minimize aic\n", 194 | " ARIMA(1,0,1)(0,0,0)[0] : AIC=193031.946, Time=0.53 sec\n", 195 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=280647.034, Time=0.17 sec\n", 196 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=0.24 sec\n", 197 | " ARIMA(0,0,1)(0,0,0)[0] : AIC=257525.493, Time=1.47 sec\n", 198 | " ARIMA(2,0,1)(0,0,0)[0] : AIC=193033.753, Time=1.43 sec\n", 199 | " ARIMA(1,0,2)(0,0,0)[0] : AIC=193033.808, Time=0.87 sec\n", 200 | " ARIMA(0,0,2)(0,0,0)[0] : AIC=240320.015, Time=4.67 sec\n", 201 | " ARIMA(2,0,0)(0,0,0)[0] : AIC=inf, Time=0.42 sec\n", 202 | " ARIMA(2,0,2)(0,0,0)[0] : AIC=193035.852, Time=3.88 sec\n", 203 | " ARIMA(1,0,1)(0,0,0)[0] intercept : AIC=192721.259, Time=5.00 sec\n", 204 | " ARIMA(0,0,1)(0,0,0)[0] intercept : AIC=222190.468, Time=6.11 sec\n", 205 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=192746.376, Time=1.07 sec\n", 206 | " ARIMA(2,0,1)(0,0,0)[0] intercept : AIC=191808.976, Time=27.34 sec\n", 207 | " ARIMA(2,0,0)(0,0,0)[0] intercept : AIC=192720.428, Time=2.55 sec\n", 208 | " ARIMA(3,0,1)(0,0,0)[0] intercept : AIC=191811.256, Time=37.37 sec\n", 209 | " ARIMA(2,0,2)(0,0,0)[0] intercept : AIC=192724.408, Time=32.17 sec\n", 210 | " ARIMA(1,0,2)(0,0,0)[0] intercept : AIC=192721.705, Time=17.40 sec\n", 211 | " ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=192716.566, Time=2.95 sec\n", 212 | " ARIMA(3,0,2)(0,0,0)[0] intercept : AIC=192661.699, Time=38.07 sec\n", 213 | "\n", 214 | "Best model: ARIMA(2,0,1)(0,0,0)[0] intercept\n", 215 | "Total fit time: 183.720 seconds\n" 216 | ] 217 | } 218 | ], 219 | "source": [ 220 | "model = auto_arima(train_data, start_p=1, start_q=1,\n", 221 | " max_p=5, max_q=5, \n", 222 | " d=0, \n", 223 | " seasonal=False, \n", 224 | " start_P=0, \n", 225 | " D=None, \n", 226 | " trace=True)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 6, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "name": "stdout", 236 | "output_type": "stream", 237 | "text": [ 238 | "Performing stepwise search to minimize aic\n", 239 | " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=193051.060, Time=3.12 sec\n", 240 | " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=193063.542, Time=0.46 sec\n", 241 | " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=193058.007, Time=0.59 sec\n", 242 | " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=193057.955, Time=1.08 sec\n", 243 | " ARIMA(0,1,0)(0,0,0)[0] : AIC=193061.542, Time=0.27 sec\n", 244 | " ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=193050.716, Time=9.28 sec\n", 245 | " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=193059.961, Time=1.66 sec\n", 246 | " ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=192728.883, Time=12.69 sec\n", 247 | " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=192866.723, Time=2.23 sec\n", 248 | " ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=192659.660, Time=8.77 sec\n", 249 | " ARIMA(4,1,0)(0,0,0)[0] intercept : AIC=192658.990, Time=2.84 sec\n", 250 | " ARIMA(5,1,0)(0,0,0)[0] intercept : AIC=192659.118, Time=4.01 sec\n", 251 | " ARIMA(5,1,1)(0,0,0)[0] intercept : AIC=192659.334, Time=20.38 sec\n", 252 | " ARIMA(4,1,0)(0,0,0)[0] : AIC=192656.990, Time=1.07 sec\n", 253 | " ARIMA(3,1,0)(0,0,0)[0] : AIC=192864.723, Time=0.88 sec\n", 254 | " ARIMA(5,1,0)(0,0,0)[0] : AIC=192657.118, Time=1.48 sec\n", 255 | " ARIMA(4,1,1)(0,0,0)[0] : AIC=192657.660, Time=3.08 sec\n", 256 | " ARIMA(3,1,1)(0,0,0)[0] : AIC=192726.882, Time=4.45 sec\n", 257 | " ARIMA(5,1,1)(0,0,0)[0] : AIC=192657.334, Time=6.43 sec\n", 258 | "\n", 259 | "Best model: ARIMA(4,1,0)(0,0,0)[0] \n", 260 | "Total fit time: 84.764 seconds\n" 261 | ] 262 | } 263 | ], 264 | "source": [ 265 | "model = auto_arima(train_data, start_p=1, start_q=1,\n", 266 | " max_p=5, max_q=5, \n", 267 | " d=1, \n", 268 | " seasonal=False, \n", 269 | " start_P=0, \n", 270 | " D=None, \n", 271 | " trace=True)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "# Station 717087" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 12, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/html": [ 289 | "

\n", 290 | "\n", 303 | "\n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | "

	Unnamed: 0	Total Flow
0	0	51.000000
1	1	93.666667
2	2	136.000000
3	3	124.000000
4	4	124.333333
...	...	...
26203	26203	156.333333
26204	26204	159.333333
26205	26205	150.666667
26206	26206	136.333333
26207	26207	120.333333

\n", 369 | "

26208 rows × 2 columns

\n", 370 | "

" 371 | ], 372 | "text/plain": [ 373 | " Unnamed: 0 Total Flow\n", 374 | "0 0 51.000000\n", 375 | "1 1 93.666667\n", 376 | "2 2 136.000000\n", 377 | "3 3 124.000000\n", 378 | "4 4 124.333333\n", 379 | "... ... ...\n", 380 | "26203 26203 156.333333\n", 381 | "26204 26204 159.333333\n", 382 | "26205 26205 150.666667\n", 383 | "26206 26206 136.333333\n", 384 | "26207 26207 120.333333\n", 385 | "\n", 386 | "[26208 rows x 2 columns]" 387 | ] 388 | }, 389 | "execution_count": 12, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "data2 = pd.read_csv(\"data/pems/pems-d07-9months-2021-station717087-15min.csv\")\n", 396 | "data2" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 13, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [ 405 | "train_data2 = data2[\"Total Flow\"].values[:-7863]\n", 406 | "test_data2 = data2[\"Total Flow\"].values[-7863:]" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 14, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "name": "stdout", 416 | "output_type": "stream", 417 | "text": [ 418 | "Performing stepwise search to minimize aic\n", 419 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=0.40 sec\n", 420 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=267850.493, Time=0.27 sec\n", 421 | " ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=236188.702, Time=0.46 sec\n", 422 | " ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=178483.149, Time=0.86 sec\n", 423 | " ARIMA(2,0,0)(0,0,0)[0] intercept : AIC=178480.515, Time=3.17 sec\n", 424 | " ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=178269.972, Time=2.36 sec\n", 425 | " ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=177788.848, Time=3.45 sec\n", 426 | " ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=176351.117, Time=3.91 sec\n", 427 | " ARIMA(5,0,0)(0,0,0)[0] : AIC=176806.000, Time=1.48 sec\n", 428 | "\n", 429 | "Best model: ARIMA(5,0,0)(0,0,0)[0] intercept\n", 430 | "Total fit time: 16.377 seconds\n" 431 | ] 432 | } 433 | ], 434 | "source": [ 435 | "model = auto_arima(train_data2, start_p=1, start_q=0,\n", 436 | " max_p=5, max_q=0, \n", 437 | " d=0, \n", 438 | " seasonal=False, \n", 439 | " start_P=0, \n", 440 | " D=None, \n", 441 | " trace=True)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 15, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "name": "stdout", 451 | "output_type": "stream", 452 | "text": [ 453 | "Performing stepwise search to minimize aic\n", 454 | " ARIMA(1,0,1)(0,0,0)[0] : AIC=178637.992, Time=0.98 sec\n", 455 | " ARIMA(0,0,0)(0,0,0)[0] : AIC=267850.493, Time=0.31 sec\n", 456 | " ARIMA(1,0,0)(0,0,0)[0] : AIC=inf, Time=0.41 sec\n", 457 | " ARIMA(0,0,1)(0,0,0)[0] : AIC=245047.435, Time=2.34 sec\n", 458 | " ARIMA(2,0,1)(0,0,0)[0] : AIC=inf, Time=10.72 sec\n", 459 | " ARIMA(1,0,2)(0,0,0)[0] : AIC=178497.477, Time=1.54 sec\n", 460 | " ARIMA(0,0,2)(0,0,0)[0] : AIC=227697.044, Time=4.65 sec\n", 461 | " ARIMA(2,0,2)(0,0,0)[0] : AIC=inf, Time=12.43 sec\n", 462 | " ARIMA(1,0,3)(0,0,0)[0] : AIC=177927.824, Time=3.10 sec\n", 463 | " ARIMA(0,0,3)(0,0,0)[0] : AIC=216130.891, Time=7.23 sec\n", 464 | " ARIMA(2,0,3)(0,0,0)[0] : AIC=176948.176, Time=8.09 sec\n", 465 | " ARIMA(3,0,3)(0,0,0)[0] : AIC=176946.427, Time=11.82 sec\n", 466 | " ARIMA(3,0,2)(0,0,0)[0] : AIC=inf, Time=16.52 sec\n", 467 | " ARIMA(4,0,3)(0,0,0)[0] : AIC=176760.440, Time=15.73 sec\n", 468 | " ARIMA(4,0,2)(0,0,0)[0] : AIC=178594.325, Time=12.01 sec\n", 469 | " ARIMA(5,0,3)(0,0,0)[0] : AIC=176508.266, Time=18.60 sec\n", 470 | " ARIMA(5,0,2)(0,0,0)[0] : AIC=177014.134, Time=14.45 sec\n", 471 | " ARIMA(5,0,4)(0,0,0)[0] : AIC=176395.141, Time=21.28 sec\n", 472 | " ARIMA(4,0,4)(0,0,0)[0] : AIC=176418.285, Time=21.85 sec\n", 473 | " ARIMA(5,0,5)(0,0,0)[0] : AIC=inf, Time=31.80 sec\n", 474 | " ARIMA(4,0,5)(0,0,0)[0] : AIC=176405.872, Time=25.80 sec\n", 475 | " ARIMA(5,0,4)(0,0,0)[0] intercept : AIC=inf, Time=65.17 sec\n", 476 | "\n", 477 | "Best model: ARIMA(5,0,4)(0,0,0)[0] \n", 478 | "Total fit time: 306.848 seconds\n" 479 | ] 480 | } 481 | ], 482 | "source": [ 483 | "model = auto_arima(train_data2, start_p=1, start_q=1,\n", 484 | " max_p=5, max_q=5, \n", 485 | " d=0, \n", 486 | " seasonal=False, \n", 487 | " start_P=0, \n", 488 | " D=None, \n", 489 | " trace=True)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 16, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "name": "stdout", 499 | "output_type": "stream", 500 | "text": [ 501 | "Performing stepwise search to minimize aic\n", 502 | " ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=inf, Time=12.30 sec\n", 503 | " ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=178667.167, Time=0.59 sec\n", 504 | " ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=178656.210, Time=0.79 sec\n", 505 | " ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=178658.366, Time=2.53 sec\n", 506 | " ARIMA(0,1,0)(0,0,0)[0] : AIC=178665.167, Time=0.49 sec\n", 507 | " ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=178489.001, Time=2.53 sec\n", 508 | " ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=178093.359, Time=2.56 sec\n", 509 | " ARIMA(4,1,0)(0,0,0)[0] intercept : AIC=176885.338, Time=3.93 sec\n", 510 | " ARIMA(5,1,0)(0,0,0)[0] intercept : AIC=176693.059, Time=4.28 sec\n", 511 | " ARIMA(5,1,1)(0,0,0)[0] intercept : AIC=176681.538, Time=19.26 sec\n", 512 | " ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=176749.959, Time=21.70 sec\n", 513 | " ARIMA(5,1,2)(0,0,0)[0] intercept : AIC=176644.909, Time=26.23 sec\n", 514 | " ARIMA(4,1,2)(0,0,0)[0] intercept : AIC=176677.889, Time=22.52 sec\n", 515 | " ARIMA(5,1,3)(0,0,0)[0] intercept : AIC=inf, Time=83.99 sec\n", 516 | " ARIMA(4,1,3)(0,0,0)[0] intercept : AIC=176610.537, Time=26.95 sec\n", 517 | " ARIMA(3,1,3)(0,0,0)[0] intercept : AIC=176855.717, Time=29.81 sec\n", 518 | " ARIMA(4,1,4)(0,0,0)[0] intercept : AIC=176361.318, Time=74.64 sec\n", 519 | " ARIMA(3,1,4)(0,0,0)[0] intercept : AIC=176383.006, Time=67.48 sec\n", 520 | " ARIMA(5,1,4)(0,0,0)[0] intercept : AIC=inf, Time=80.68 sec\n", 521 | " ARIMA(4,1,5)(0,0,0)[0] intercept : AIC=inf, Time=98.36 sec\n", 522 | " ARIMA(3,1,5)(0,0,0)[0] intercept : AIC=176352.357, Time=90.69 sec\n", 523 | " ARIMA(2,1,5)(0,0,0)[0] intercept : AIC=176811.663, Time=40.08 sec\n", 524 | " ARIMA(2,1,4)(0,0,0)[0] intercept : AIC=176918.415, Time=19.27 sec\n", 525 | " ARIMA(3,1,5)(0,0,0)[0] : AIC=inf, Time=36.53 sec\n", 526 | "\n", 527 | "Best model: ARIMA(3,1,5)(0,0,0)[0] intercept\n", 528 | "Total fit time: 768.222 seconds\n" 529 | ] 530 | } 531 | ], 532 | "source": [ 533 | "model = auto_arima(train_data2, start_p=1, start_q=1,\n", 534 | " max_p=5, max_q=5, \n", 535 | " d=1, \n", 536 | " seasonal=False,\n", 537 | " start_P=0, \n", 538 | " D=None, \n", 539 | " trace=True)" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [] 548 | } 549 | ], 550 | "metadata": { 551 | "interpreter": { 552 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 553 | }, 554 | "kernelspec": { 555 | "display_name": "Python 3.7.9", 556 | "language": "python", 557 | "name": "python3" 558 | }, 559 | "language_info": { 560 | "codemirror_mode": { 561 | "name": "ipython", 562 | "version": 3 563 | }, 564 | "file_extension": ".py", 565 | "mimetype": "text/x-python", 566 | "name": "python", 567 | "nbconvert_exporter": "python", 568 | "pygments_lexer": "ipython3", 569 | "version": "3.7.9" 570 | }, 571 | "orig_nbformat": 4 572 | }, 573 | "nbformat": 4, 574 | "nbformat_minor": 2 575 | } 576 | -------------------------------------------------------------------------------- /arimavariations_istanbul_data_del.py: -------------------------------------------------------------------------------- 1 | from statsmodels.tsa.arima_model import ARIMA 2 | import pandas as pd 3 | 4 | def armodel(train_data, test_data): 5 | arima = ARIMA(train_data, order=(5,0,0)) 6 | arima_fit = arima.fit() 7 | print(arima_fit.summary()) 8 | parameters = arima_fit.params 9 | a1 = parameters[1] 10 | a2 = parameters[2] 11 | a3 = parameters[3] 12 | a4 = parameters[4] 13 | a5 = parameters[5] 14 | train_predictions = [] 15 | for t in range(4,len(train_data)): 16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) 17 | train_predictions.append(output_train) 18 | 19 | test_data2=[] 20 | test_data2.append(train_data[-5]) 21 | test_data2.append(train_data[-4]) 22 | test_data2.append(train_data[-3]) 23 | test_data2.append(train_data[-2]) 24 | test_data2.append(train_data[-1]) 25 | for i in range(len(test_data)-1): 26 | test_data2.append(test_data[i]) 27 | 28 | test_predictions = [] 29 | for t in range(4,len(test_data2)): 30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) 31 | test_predictions.append(output_test) 32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_istanbul_data_del_train.csv") 33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_istanbul_data_del_train.csv") 34 | return train_predictions, test_predictions 35 | 36 | def armamodel(train_data, test_data): 37 | arima = ARIMA(train_data, order=(1,0,1)) 38 | arima_fit = arima.fit() 39 | print(arima_fit.summary()) 40 | parameters = arima_fit.params 41 | a = parameters[1] 42 | b = parameters[2] 43 | output_train = arima_fit.forecast() 44 | train_predictions = [] 45 | for t in range(len(train_data)): 46 | output_train = (train_data[t] * a) + ((train_data[t] - output_train[0]) * b) 47 | train_predictions.append(output_train[0]) 48 | 49 | output_test = arima_fit.forecast() 50 | test_predictions = [] 51 | test_predictions.append(output_test[0][0]) 52 | for t in range(len(test_data)-1): 53 | output_test = (test_data[t] * a) + ((test_data[t] - output_test[0]) * b) 54 | test_predictions.append(output_test[0]) 55 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_istanbul_data_del_train.csv") 56 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_istanbul_data_del_test.csv") 57 | return train_predictions, test_predictions 58 | 59 | 60 | def arimamodel(train_data, test_data): 61 | arima = ARIMA(train_data, order=(0,1,3)) 62 | arima_fit = arima.fit() 63 | print(arima_fit.summary()) 64 | 65 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all()) 66 | train_predictions2 = [] 67 | for t in range(len(train_data)): 68 | output_train = train_predictions[t] + train_data[t] 69 | train_predictions2.append(output_train) 70 | 71 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all()) 72 | test_predictions2 = [] 73 | test_data2=[] 74 | test_data2.append(train_data[-1]) 75 | for i in range(len(test_data)-1): 76 | test_data2.append(test_data[i]) 77 | for t in range(len(test_data2)): 78 | output_test = test_predictions[t] + test_data2[t] 79 | test_predictions2.append(output_test) 80 | 81 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_istanbul_data_del_train.csv") 82 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_istanbul_data_del_test.csv") 83 | return train_predictions2, test_predictions2 84 | 85 | def sarimamodel(data): 86 | data2 = pd.DataFrame(data) 87 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(25),data2.shift(24),data2], axis=1) 88 | data3.columns = ['t-169','t-168','t-25','t-24','t'] 89 | data4 = data3.values 90 | train_size = 7565 91 | train, test = data4[169:train_size], data4[train_size:] 92 | train_X, train_y = train[:,:4], train[:,-1] 93 | test_X, test_y = test[:,:4], test[:,-1] 94 | 95 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X) 96 | sarima_fit = sarima.fit() 97 | print(sarima_fit.summary()) 98 | 99 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X) 100 | train_predictions2 = [] 101 | for t in range(len(train_y)): 102 | output_train = train_predictions[t] + train_y[t] 103 | train_predictions2.append(output_train) 104 | 105 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X) 106 | test_predictions2 = [] 107 | test_y2=[] 108 | test_y2.append(train_y[-1]) 109 | for i in range(len(test_y)-1): 110 | test_y2.append(test_y[i]) 111 | for t in range(len(test_y2)): 112 | output_test = test_predictions[t] + test_y2[t] 113 | test_predictions2.append(output_test) 114 | 115 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_istanbul_data_del_train.csv") 116 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_istanbul_data_del_test.csv") 117 | return train_predictions2, test_predictions2 118 | 119 | 120 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']] 121 | data = data.values 122 | train_size = 7565 123 | train_data, test_data = data[:train_size], data[train_size:] 124 | armamodel(train_data, test_data) 125 | armodel(train_data, test_data) 126 | arimamodel(train_data, test_data) 127 | sarimamodel(data) 128 | -------------------------------------------------------------------------------- /arimavariations_istanbul_data_mean_sdsh.py: -------------------------------------------------------------------------------- 1 | from statsmodels.tsa.arima_model import ARIMA 2 | import pandas as pd 3 | 4 | def armodel(train_data, test_data): 5 | arima = ARIMA(train_data, order=(5,0,0)) 6 | arima_fit = arima.fit() 7 | print(arima_fit.summary()) 8 | parameters = arima_fit.params 9 | a1 = parameters[1] 10 | a2 = parameters[2] 11 | a3 = parameters[3] 12 | a4 = parameters[4] 13 | a5 = parameters[5] 14 | train_predictions = [] 15 | for t in range(4,len(train_data)): 16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) 17 | train_predictions.append(output_train) 18 | 19 | test_data2=[] 20 | test_data2.append(train_data[-5]) 21 | test_data2.append(train_data[-4]) 22 | test_data2.append(train_data[-3]) 23 | test_data2.append(train_data[-2]) 24 | test_data2.append(train_data[-1]) 25 | for i in range(len(test_data)-1): 26 | test_data2.append(test_data[i]) 27 | 28 | test_predictions = [] 29 | for t in range(4,len(test_data2)): 30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) 31 | test_predictions.append(output_test) 32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sdsh_train.csv") 33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sdsh_test.csv") 34 | return train_predictions, test_predictions 35 | 36 | def armamodel(train_data, test_data): 37 | arima = ARIMA(train_data, order=(4,0,5)) 38 | arima_fit = arima.fit() 39 | print(arima_fit.summary()) 40 | parameters = arima_fit.params 41 | a1 = parameters[1] 42 | a2 = parameters[2] 43 | a3 = parameters[3] 44 | a4 = parameters[4] 45 | b1 = parameters[5] 46 | b2 = parameters[6] 47 | b3 = parameters[7] 48 | b4 = parameters[8] 49 | b5 = parameters[9] 50 | 51 | train_predictions = [] 52 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+4,dynamic=test_data.all()) 53 | for i in range(len(outputs)): 54 | train_predictions.append(outputs[i]) 55 | for t in range(4,len(train_data)): 56 | output_train = (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) + ((train_data[t-4] - train_predictions[-5]) * b5) + ((train_data[t-3] - train_predictions[-4]) * b4) + ((train_data[t-2] - train_predictions[-3]) * b3) + ((train_data[t-1] - train_predictions[-2]) * b2) + ((train_data[t] - train_predictions[-1]) * b1) 57 | train_predictions.append(output_train) 58 | 59 | test_data2=[] 60 | test_data2.append(train_data[-4]) 61 | test_data2.append(train_data[-3]) 62 | test_data2.append(train_data[-2]) 63 | test_data2.append(train_data[-1]) 64 | for i in range(len(test_data)-1): 65 | test_data2.append(test_data[i]) 66 | 67 | test_predictions = [] 68 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+4,dynamic=test_data.all()) 69 | for i in range(len(outputs)): 70 | test_predictions.append(outputs[i]) 71 | 72 | for t in range(4,len(test_data2)): 73 | output_test = (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) + ((test_data2[t-4] - test_predictions[-5]) * b5) + ((test_data2[t-3] - test_predictions[-4]) * b4) + ((test_data2[t-2] - test_predictions[-3]) * b3) + ((test_data2[t-1] - test_predictions[-2]) * b2) + ((test_data2[t] - test_predictions[-1]) * b1) 74 | test_predictions.append(output_test) 75 | test_predictions = test_predictions[4:] 76 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sdsh_train.csv") 77 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sdsh_test.csv") 78 | return train_predictions, test_predictions 79 | 80 | 81 | def arimamodel(train_data, test_data): 82 | arima = ARIMA(train_data, order=(0,1,3)) 83 | arima_fit = arima.fit() 84 | print(arima_fit.summary()) 85 | 86 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all()) 87 | train_predictions2 = [] 88 | for t in range(len(train_data)): 89 | output_train = train_predictions[t] + train_data[t] 90 | train_predictions2.append(output_train) 91 | 92 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all()) 93 | test_predictions2 = [] 94 | test_data2=[] 95 | test_data2.append(train_data[-1]) 96 | for i in range(len(test_data)-1): 97 | test_data2.append(test_data[i]) 98 | for t in range(len(test_data2)): 99 | output_test = test_predictions[t] + test_data2[t] 100 | test_predictions2.append(output_test) 101 | 102 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sdsh_train.csv") 103 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sdsh_test.csv") 104 | return train_predictions2, test_predictions2 105 | 106 | def sarimamodel(data): 107 | data2 = pd.DataFrame(data) 108 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(25),data2.shift(24),data2], axis=1) 109 | data3.columns = ['t-169','t-168','t-25','t-24','t'] 110 | data4 = data3.values 111 | train_size = int(len(data4) * 0.70) 112 | train, test = data4[169:train_size], data4[train_size:] 113 | train_X, train_y = train[:,:4], train[:,-1] 114 | test_X, test_y = test[:,:4], test[:,-1] 115 | 116 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X) 117 | sarima_fit = sarima.fit() 118 | print(sarima_fit.summary()) 119 | 120 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X) 121 | train_predictions2 = [] 122 | for t in range(len(train_y)): 123 | output_train = train_predictions[t] + train_y[t] 124 | train_predictions2.append(output_train) 125 | 126 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X) 127 | test_predictions2 = [] 128 | test_y2=[] 129 | test_y2.append(train_y[-1]) 130 | for i in range(len(test_y)-1): 131 | test_y2.append(test_y[i]) 132 | for t in range(len(test_y2)): 133 | output_test = test_predictions[t] + test_y2[t] 134 | test_predictions2.append(output_test) 135 | 136 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sdsh_train.csv") 137 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sdsh_test.csv") 138 | return train_predictions2, test_predictions2 139 | 140 | 141 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']] 142 | data = data.values 143 | train_size = int(len(data) * 0.70) 144 | train_data, test_data = data[:train_size], data[train_size:] 145 | armamodel(train_data, test_data) 146 | armodel(train_data, test_data) 147 | arimamodel(train_data, test_data) 148 | sarimamodel(data) 149 | -------------------------------------------------------------------------------- /arimavariations_istanbul_mean_sh.py: -------------------------------------------------------------------------------- 1 | from statsmodels.tsa.arima_model import ARIMA 2 | import pandas as pd 3 | 4 | def armodel(train_data, test_data): 5 | arima = ARIMA(train_data, order=(5,0,0)) 6 | arima_fit = arima.fit() 7 | print(arima_fit.summary()) 8 | parameters = arima_fit.params 9 | a1 = parameters[1] 10 | a2 = parameters[2] 11 | a3 = parameters[3] 12 | a4 = parameters[4] 13 | a5 = parameters[5] 14 | train_predictions = [] 15 | for t in range(4,len(train_data)): 16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) 17 | train_predictions.append(output_train) 18 | 19 | test_data2=[] 20 | test_data2.append(train_data[-5]) 21 | test_data2.append(train_data[-4]) 22 | test_data2.append(train_data[-3]) 23 | test_data2.append(train_data[-2]) 24 | test_data2.append(train_data[-1]) 25 | for i in range(len(test_data)-1): 26 | test_data2.append(test_data[i]) 27 | 28 | test_predictions = [] 29 | for t in range(4,len(test_data2)): 30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) 31 | test_predictions.append(output_test) 32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sh_train.csv") 33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_istanbul_data_mean_sh_test.csv") 34 | return train_predictions, test_predictions 35 | 36 | def armamodel(train_data, test_data): 37 | arima = ARIMA(train_data, order=(1,0,1)) 38 | arima_fit = arima.fit() 39 | print(arima_fit.summary()) 40 | parameters = arima_fit.params 41 | a = parameters[1] 42 | b = parameters[2] 43 | output_train = arima_fit.forecast() 44 | train_predictions = [] 45 | for t in range(len(train_data)): 46 | output_train = (train_data[t] * a) + ((train_data[t] - output_train[0]) * b) 47 | train_predictions.append(output_train[0]) 48 | 49 | output_test = arima_fit.forecast() 50 | test_predictions = [] 51 | test_predictions.append(output_test[0][0]) 52 | for t in range(len(test_data)-1): 53 | output_test = (test_data[t] * a) + ((test_data[t] - output_test[0]) * b) 54 | test_predictions.append(output_test[0]) 55 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sh_train.csv") 56 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_istanbul_data_mean_sh_test.csv") 57 | return train_predictions, test_predictions 58 | 59 | 60 | def arimamodel(train_data, test_data): 61 | arima = ARIMA(train_data, order=(0,1,3)) 62 | arima_fit = arima.fit() 63 | print(arima_fit.summary()) 64 | 65 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all()) 66 | train_predictions2 = [] 67 | for t in range(len(train_data)): 68 | output_train = train_predictions[t] + train_data[t] 69 | train_predictions2.append(output_train) 70 | 71 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all()) 72 | test_predictions2 = [] 73 | test_data2=[] 74 | test_data2.append(train_data[-1]) 75 | for i in range(len(test_data)-1): 76 | test_data2.append(test_data[i]) 77 | for t in range(len(test_data2)): 78 | output_test = test_predictions[t] + test_data2[t] 79 | test_predictions2.append(output_test) 80 | 81 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sh_train.csv") 82 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_istanbul_data_mean_sh_test.csv") 83 | return train_predictions2, test_predictions2 84 | 85 | def sarimamodel(data): 86 | data2 = pd.DataFrame(data) 87 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(25),data2.shift(24),data2], axis=1) 88 | data3.columns = ['t-169','t-168','t-25','t-24','t'] 89 | data4 = data3.values 90 | train_size = int(len(data4) * 0.70) 91 | train, test = data4[169:train_size], data4[train_size:] 92 | train_X, train_y = train[:,:4], train[:,-1] 93 | test_X, test_y = test[:,:4], test[:,-1] 94 | 95 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X) 96 | sarima_fit = sarima.fit() 97 | print(sarima_fit.summary()) 98 | 99 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X) 100 | train_predictions2 = [] 101 | for t in range(len(train_y)): 102 | output_train = train_predictions[t] + train_y[t] 103 | train_predictions2.append(output_train) 104 | 105 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X) 106 | test_predictions2 = [] 107 | test_y2=[] 108 | test_y2.append(train_y[-1]) 109 | for i in range(len(test_y)-1): 110 | test_y2.append(test_y[i]) 111 | for t in range(len(test_y2)): 112 | output_test = test_predictions[t] + test_y2[t] 113 | test_predictions2.append(output_test) 114 | 115 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sh_train.csv") 116 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_istanbul_data_mean_sh_test.csv") 117 | return train_predictions2, test_predictions2 118 | 119 | 120 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']] 121 | data = data.values 122 | train_size = int(len(data) * 0.70) 123 | train_data, test_data = data[:train_size], data[train_size:] 124 | armamodel(train_data, test_data) 125 | armodel(train_data, test_data) 126 | arimamodel(train_data, test_data) 127 | sarimamodel(data) 128 | -------------------------------------------------------------------------------- /arimavariations_pems_716933.py: -------------------------------------------------------------------------------- 1 | from statsmodels.tsa.arima_model import ARIMA 2 | import pandas as pd 3 | import numpy as np 4 | 5 | def armodel(train_data, test_data): 6 | arima = ARIMA(train_data, order=(5,0,0)) 7 | arima_fit = arima.fit() 8 | print(arima_fit.summary()) 9 | parameters = arima_fit.params 10 | a1 = parameters[1] 11 | a2 = parameters[2] 12 | a3 = parameters[3] 13 | a4 = parameters[4] 14 | a5 = parameters[5] 15 | train_predictions = [] 16 | for t in range(4,len(train_data)): 17 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) 18 | train_predictions.append(output_train) 19 | 20 | test_data2=[] 21 | test_data2.append(train_data[-5]) 22 | test_data2.append(train_data[-4]) 23 | test_data2.append(train_data[-3]) 24 | test_data2.append(train_data[-2]) 25 | test_data2.append(train_data[-1]) 26 | for i in range(len(test_data)-1): 27 | test_data2.append(test_data[i]) 28 | 29 | test_predictions = [] 30 | for t in range(4,len(test_data2)): 31 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) 32 | test_predictions.append(output_test) 33 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_pems_716933_train.csv") 34 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_pems_716933_test.csv") 35 | return train_predictions, test_predictions 36 | 37 | def armamodel(train_data, test_data): 38 | arima = ARIMA(train_data, order=(1,0,1)) 39 | arima_fit = arima.fit() 40 | print(arima_fit.summary()) 41 | parameters = arima_fit.params 42 | a = parameters[1] 43 | b = parameters[2] 44 | output_train = arima_fit.forecast() 45 | train_predictions = [] 46 | for t in range(len(train_data)): 47 | output_train = (train_data[t] * a) + ((train_data[t] - output_train[0]) * b) 48 | train_predictions.append(output_train[0]) 49 | 50 | output_test = arima_fit.forecast() 51 | test_predictions = [] 52 | test_predictions.append(output_test[0][0]) 53 | for t in range(len(test_data)-1): 54 | output_test = (test_data[t] * a) + ((test_data[t] - output_test[0]) * b) 55 | test_predictions.append(output_test[0]) 56 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_pems_716933_train.csv") 57 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_pems_716933_test.csv") 58 | return train_predictions, test_predictions 59 | 60 | 61 | def arimamodel(train_data, test_data): 62 | arima = ARIMA(train_data, order=(4,1,0)) 63 | arima_fit = arima.fit() 64 | print(arima_fit.summary()) 65 | 66 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all()) 67 | train_predictions2 = [] 68 | for t in range(len(train_data)): 69 | output_train = train_predictions[t] + train_data[t] 70 | train_predictions2.append(output_train) 71 | 72 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all()) 73 | test_predictions2 = [] 74 | test_data2=[] 75 | test_data2.append(train_data[-1]) 76 | for i in range(len(test_data)-1): 77 | test_data2.append(test_data[i]) 78 | for t in range(len(test_data2)): 79 | output_test = test_predictions[t] + test_data2[t] 80 | test_predictions2.append(output_test) 81 | 82 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_pems_716933_train.csv") 83 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_pems_716933_test.csv") 84 | return train_predictions2, test_predictions2 85 | 86 | def sarimamodel(data): 87 | data2 = pd.DataFrame(data) 88 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(97),data2.shift(96),data2], axis=1) 89 | data3.columns = ["t-673", "t-672", "t-97", "t-96", "t"] 90 | data4 = data3.values 91 | train_size = int(len(data4) * 0.70) 92 | train, test = data4[673:train_size], data4[train_size:] 93 | train_X, train_y = train[:,:4], train[:,-1] 94 | test_X, test_y = test[:,:4], test[:,-1] 95 | 96 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X) 97 | sarima_fit = sarima.fit() 98 | print(sarima_fit.summary()) 99 | 100 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_y.all(),exog=train_X) 101 | train_predictions2 = [] 102 | for t in range(len(train_y)): 103 | output_train = train_predictions[t] + train_y[t] 104 | train_predictions2.append(output_train) 105 | 106 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_y.all(),exog=test_X) 107 | test_predictions2 = [] 108 | test_y2=[] 109 | test_y2.append(train_y[-1]) 110 | for i in range(len(test_y)-1): 111 | test_y2.append(test_y[i]) 112 | for t in range(len(test_y2)): 113 | output_test = test_predictions[t] + test_y2[t] 114 | test_predictions2.append(output_test) 115 | 116 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_pems_716933_train.csv") 117 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_pems_716933_test.csv") 118 | return train_predictions2, test_predictions2 119 | 120 | 121 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']] 122 | data = data.values 123 | train_size = int(len(data) * 0.70) 124 | train_data, test_data = data[:train_size], data[train_size:] 125 | armamodel(train_data, test_data) 126 | armodel(train_data, test_data) 127 | arimamodel(train_data, test_data) 128 | sarimamodel(data) -------------------------------------------------------------------------------- /arimavariations_pems_717087.py: -------------------------------------------------------------------------------- 1 | from statsmodels.tsa.arima_model import ARIMA 2 | import pandas as pd 3 | 4 | def armodel(train_data, test_data): 5 | arima = ARIMA(train_data, order=(5,0,0)) 6 | arima_fit = arima.fit() 7 | print(arima_fit.summary()) 8 | parameters = arima_fit.params 9 | a1 = parameters[1] 10 | a2 = parameters[2] 11 | a3 = parameters[3] 12 | a4 = parameters[4] 13 | a5 = parameters[5] 14 | train_predictions = [] 15 | for t in range(4,len(train_data)): 16 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) 17 | train_predictions.append(output_train) 18 | 19 | test_data2=[] 20 | test_data2.append(train_data[-5]) 21 | test_data2.append(train_data[-4]) 22 | test_data2.append(train_data[-3]) 23 | test_data2.append(train_data[-2]) 24 | test_data2.append(train_data[-1]) 25 | for i in range(len(test_data)-1): 26 | test_data2.append(test_data[i]) 27 | 28 | test_predictions = [] 29 | for t in range(4,len(test_data2)): 30 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) 31 | test_predictions.append(output_test) 32 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ar_pems_717087_train.csv") 33 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ar_pems_717087_test.csv") 34 | return train_predictions, test_predictions 35 | 36 | def armamodel(train_data, test_data): 37 | arima = ARIMA(train_data, order=(5,0,4)) 38 | arima_fit = arima.fit() 39 | print(arima_fit.summary()) 40 | parameters = arima_fit.params 41 | a1 = parameters[1] 42 | a2 = parameters[2] 43 | a3 = parameters[3] 44 | a4 = parameters[4] 45 | a5 = parameters[5] 46 | b1 = parameters[6] 47 | b2 = parameters[7] 48 | b3 = parameters[8] 49 | b4 = parameters[9] 50 | 51 | 52 | train_predictions = [] 53 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+3,dynamic=test_data.all()) 54 | for i in range(len(outputs)): 55 | train_predictions.append(outputs[i]) 56 | for t in range(4,len(train_data)): 57 | output_train = (train_data[t-4] * a5) + (train_data[t-3] * a4) + (train_data[t-2] * a3) + (train_data[t-1] * a2) + (train_data[t] * a1) + ((train_data[t-3] - train_predictions[-4]) * b4) + ((train_data[t-2] - train_predictions[-3]) * b3) + ((train_data[t-1] - train_predictions[-2]) * b2) + ((train_data[t] - train_predictions[-1]) * b1) 58 | train_predictions.append(output_train[0]) 59 | 60 | test_data2=[] 61 | test_data2.append(train_data[-5]) 62 | test_data2.append(train_data[-4]) 63 | test_data2.append(train_data[-3]) 64 | test_data2.append(train_data[-2]) 65 | test_data2.append(train_data[-1]) 66 | for i in range(len(test_data)-1): 67 | test_data2.append(test_data[i]) 68 | 69 | test_predictions = [] 70 | outputs = arima_fit.predict(start=len(train_data),end=len(train_data)+3,dynamic=test_data.all()) 71 | for i in range(len(outputs)): 72 | test_predictions.append(outputs[i]) 73 | 74 | for t in range(4,len(test_data2)): 75 | output_test = (test_data2[t-4] * a5) + (test_data2[t-3] * a4) + (test_data2[t-2] * a3) + (test_data2[t-1] * a2) + (test_data2[t] * a1) + ((test_data2[t-3] - test_predictions[-4]) * b4) + ((test_data2[t-2] - test_predictions[-3]) * b3) + ((test_data2[t-1] - test_predictions[-2]) * b2) + ((test_data2[t] - test_predictions[-1]) * b1) 76 | test_predictions.append(output_test[0]) 77 | test_predictions = test_predictions[4:] 78 | pd.DataFrame(train_predictions).to_csv("point_forecasts/arma_pems_717087_train.csv") 79 | pd.DataFrame(test_predictions).to_csv("point_forecasts/arma_pems_717087_test.csv") 80 | return train_predictions, test_predictions 81 | 82 | 83 | def arimamodel(train_data, test_data): 84 | arima = ARIMA(train_data, order=(3,1,5)) 85 | arima_fit = arima.fit() 86 | print(arima_fit.summary()) 87 | 88 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all()) 89 | train_predictions2 = [] 90 | for t in range(len(train_data)): 91 | output_train = train_predictions[t] + train_data[t] 92 | train_predictions2.append(output_train) 93 | 94 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all()) 95 | test_predictions2 = [] 96 | test_data2=[] 97 | test_data2.append(train_data[-1]) 98 | for i in range(len(test_data)-1): 99 | test_data2.append(test_data[i]) 100 | for t in range(len(test_data2)): 101 | output_test = test_predictions[t] + test_data2[t] 102 | test_predictions2.append(output_test) 103 | 104 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/arima_pems_717087_train.csv") 105 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/arima_pems_717087_test.csv") 106 | return train_predictions2, test_predictions2 107 | 108 | def sarimamodel(data): 109 | data2 = pd.DataFrame(data) 110 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(97),data2.shift(96),data2], axis=1) 111 | data3.columns = ["t-673", "t-672", "t-97", "t-96", "t"] 112 | data4 = data3.values 113 | train_size = int(len(data4) * 0.70) 114 | train, test = data4[673:train_size], data4[train_size:] 115 | train_X, train_y = train[:,:4], train[:,-1] 116 | test_X, test_y = test[:,:4], test[:,-1] 117 | 118 | sarima = ARIMA(train_y, order=(1,1,2), exog=train_X) 119 | sarima_fit = sarima.fit() 120 | print(sarima_fit.summary()) 121 | 122 | train_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(train_y)-1,dynamic=train_data.all(),exog=train_X) 123 | train_predictions2 = [] 124 | for t in range(len(train_y)): 125 | output_train = train_predictions[t] + train_y[t] 126 | train_predictions2.append(output_train) 127 | 128 | test_predictions = sarima_fit.predict(start=len(train_y),end=len(train_y)+len(test_y)-1,dynamic=test_data.all(),exog=test_X) 129 | test_predictions2 = [] 130 | test_y2=[] 131 | test_y2.append(train_y[-1]) 132 | for i in range(len(test_y)-1): 133 | test_y2.append(test_y[i]) 134 | for t in range(len(test_y2)): 135 | output_test = test_predictions[t] + test_y2[t] 136 | test_predictions2.append(output_test) 137 | 138 | pd.DataFrame(train_predictions2).to_csv("point_forecasts/sarima_pems_717087_train.csv") 139 | pd.DataFrame(test_predictions2).to_csv("point_forecasts/sarima_pems_717087_test.csv") 140 | return train_predictions2, test_predictions2 141 | 142 | 143 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']] 144 | data = data.values 145 | train_size = int(len(data) * 0.70) 146 | train_data, test_data = data[:train_size], data[train_size:] 147 | armamodel(train_data, test_data) 148 | armodel(train_data, test_data) 149 | arimamodel(train_data, test_data) 150 | sarimamodel(data) 151 | -------------------------------------------------------------------------------- /evaluatemodels_istanbul_data_del.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "#Defining MAPE function\n", 13 | "def MAPE(actual_values,predicted_values):\n", 14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n", 17 | " return mape\n", 18 | "\n", 19 | "#Defining MAPE_100 function\n", 20 | "def MAPE_100(actual_values,predicted_values):\n", 21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 24 | " x_100 = x[x[:,0]>100]\n", 25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n", 26 | " return mape\n", 27 | "\n", 28 | "#Defining MAPE_250 function\n", 29 | "def MAPE_250(actual_values,predicted_values):\n", 30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 33 | " x_250 = x[x[:,0]>250]\n", 34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n", 35 | " return mape\n", 36 | "\n", 37 | "#Defining MAE function\n", 38 | "def MAE(actual_values,predicted_values):\n", 39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n", 42 | " return mae" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 11, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "testdata_istanbul_data_del = np.array(pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']][-3500:])\n", 52 | "ar_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/ar_istanbul_data_del_test.csv\")[\"0\"])\n", 53 | "arma_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/arma_istanbul_data_del_test.csv\")[\"0\"])\n", 54 | "arima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/arima_istanbul_data_del_test.csv\")[\"0\"])\n", 55 | "sarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/sarima_istanbul_data_del_test.csv\")[\"0\"])\n", 56 | "slstm_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/slstm_istanbul_data_del_test.csv\")[\"0\"])\n", 57 | "ssvr_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/ssvr_istanbul_data_del_test.csv\")[\"0\"])\n", 58 | "sxgboost_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/sxgboost_istanbul_data_del_test.csv\")[\"0\"])\n", 59 | "slstmarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/slstmarima_istanbul_data_del_test.csv\")[\"0\"])\n", 60 | "ssvrarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/ssvrarima_istanbul_data_del_test.csv\")[\"0\"])\n", 61 | "sxgboostarima_istanbul_data_del = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_istanbul_data_del_test.csv\")[\"0\"])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 12, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "28.86974512582298\n", 74 | "29.060338232637356\n", 75 | "31.04630704707781\n", 76 | "27.644744318481546\n", 77 | "26.083904765431708\n", 78 | "22.78030832823932\n", 79 | "24.796676785320347\n", 80 | "26.030395358255294\n", 81 | "22.728086954168397\n", 82 | "24.838684592834568\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "mape_ar_istanbul_data_del = MAPE(testdata_istanbul_data_del, ar_istanbul_data_del)\n", 88 | "mape_arma_istanbul_data_del = MAPE(testdata_istanbul_data_del, arma_istanbul_data_del)\n", 89 | "mape_arima_istanbul_data_del = MAPE(testdata_istanbul_data_del, arima_istanbul_data_del)\n", 90 | "mape_sarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, sarima_istanbul_data_del)\n", 91 | "mape_slstm_istanbul_data_del = MAPE(testdata_istanbul_data_del, slstm_istanbul_data_del)\n", 92 | "mape_ssvr_istanbul_data_del = MAPE(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n", 93 | "mape_sxgboost_istanbul_data_del = MAPE(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n", 94 | "mape_slstmarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n", 95 | "mape_ssvrarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n", 96 | "mape_sxgboostarima_istanbul_data_del = MAPE(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n", 97 | "\n", 98 | "print(mape_ar_istanbul_data_del)\n", 99 | "print(mape_arma_istanbul_data_del)\n", 100 | "print(mape_arima_istanbul_data_del)\n", 101 | "print(mape_sarima_istanbul_data_del)\n", 102 | "print(mape_slstm_istanbul_data_del)\n", 103 | "print(mape_ssvr_istanbul_data_del)\n", 104 | "print(mape_sxgboost_istanbul_data_del)\n", 105 | "print(mape_slstmarima_istanbul_data_del)\n", 106 | "print(mape_ssvrarima_istanbul_data_del)\n", 107 | "print(mape_sxgboostarima_istanbul_data_del)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 13, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "23.712783205267975\n", 120 | "24.1882174224178\n", 121 | "22.971047404769376\n", 122 | "20.600738845263685\n", 123 | "16.022873063955497\n", 124 | "16.454937256236214\n", 125 | "16.7834587509336\n", 126 | "16.0409316683468\n", 127 | "16.416078548446443\n", 128 | "16.832564889685074\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "mape_100_ar_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, ar_istanbul_data_del)\n", 134 | "mape_100_arma_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, arma_istanbul_data_del)\n", 135 | "mape_100_arima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, arima_istanbul_data_del)\n", 136 | "mape_100_sarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, sarima_istanbul_data_del)\n", 137 | "mape_100_slstm_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, slstm_istanbul_data_del)\n", 138 | "mape_100_ssvr_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n", 139 | "mape_100_sxgboost_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n", 140 | "mape_100_slstmarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n", 141 | "mape_100_ssvrarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n", 142 | "mape_100_sxgboostarima_istanbul_data_del = MAPE_100(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n", 143 | "\n", 144 | "print(mape_100_ar_istanbul_data_del)\n", 145 | "print(mape_100_arma_istanbul_data_del)\n", 146 | "print(mape_100_arima_istanbul_data_del)\n", 147 | "print(mape_100_sarima_istanbul_data_del)\n", 148 | "print(mape_100_slstm_istanbul_data_del)\n", 149 | "print(mape_100_ssvr_istanbul_data_del)\n", 150 | "print(mape_100_sxgboost_istanbul_data_del)\n", 151 | "print(mape_100_slstmarima_istanbul_data_del)\n", 152 | "print(mape_100_ssvrarima_istanbul_data_del)\n", 153 | "print(mape_100_sxgboostarima_istanbul_data_del)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 14, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "21.841070095474745\n", 166 | "22.600807335632386\n", 167 | "17.584517478461393\n", 168 | "16.529284045080818\n", 169 | "11.65226367899847\n", 170 | "11.99353751711907\n", 171 | "12.014449828425033\n", 172 | "11.615596940182671\n", 173 | "12.060929432100648\n", 174 | "12.000875495862601\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "mape_250_ar_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, ar_istanbul_data_del)\n", 180 | "mape_250_arma_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, arma_istanbul_data_del)\n", 181 | "mape_250_arima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, arima_istanbul_data_del)\n", 182 | "mape_250_sarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, sarima_istanbul_data_del)\n", 183 | "mape_250_slstm_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, slstm_istanbul_data_del)\n", 184 | "mape_250_ssvr_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n", 185 | "mape_250_sxgboost_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n", 186 | "mape_250_slstmarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n", 187 | "mape_250_ssvrarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n", 188 | "mape_250_sxgboostarima_istanbul_data_del = MAPE_250(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n", 189 | "\n", 190 | "print(mape_250_ar_istanbul_data_del)\n", 191 | "print(mape_250_arma_istanbul_data_del)\n", 192 | "print(mape_250_arima_istanbul_data_del)\n", 193 | "print(mape_250_sarima_istanbul_data_del)\n", 194 | "print(mape_250_slstm_istanbul_data_del)\n", 195 | "print(mape_250_ssvr_istanbul_data_del)\n", 196 | "print(mape_250_sxgboost_istanbul_data_del)\n", 197 | "print(mape_250_slstmarima_istanbul_data_del)\n", 198 | "print(mape_250_ssvrarima_istanbul_data_del)\n", 199 | "print(mape_250_sxgboostarima_istanbul_data_del)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 15, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "45.41822357559246\n", 212 | "46.25644379087454\n", 213 | "42.692464063978946\n", 214 | "38.92137252502022\n", 215 | "31.360570671221716\n", 216 | "30.76783905214454\n", 217 | "31.62889098894286\n", 218 | "31.355986601823076\n", 219 | "30.723100894109738\n", 220 | "31.694861789309655\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "mae_ar_istanbul_data_del = MAE(testdata_istanbul_data_del, ar_istanbul_data_del)\n", 226 | "mae_arma_istanbul_data_del = MAE(testdata_istanbul_data_del, arma_istanbul_data_del)\n", 227 | "mae_arima_istanbul_data_del = MAE(testdata_istanbul_data_del, arima_istanbul_data_del)\n", 228 | "mae_sarima_istanbul_data_del = MAE(testdata_istanbul_data_del, sarima_istanbul_data_del)\n", 229 | "mae_slstm_istanbul_data_del = MAE(testdata_istanbul_data_del, slstm_istanbul_data_del)\n", 230 | "mae_ssvr_istanbul_data_del = MAE(testdata_istanbul_data_del, ssvr_istanbul_data_del)\n", 231 | "mae_sxgboost_istanbul_data_del = MAE(testdata_istanbul_data_del, sxgboost_istanbul_data_del)\n", 232 | "mae_slstmarima_istanbul_data_del = MAE(testdata_istanbul_data_del, slstmarima_istanbul_data_del)\n", 233 | "mae_ssvrarima_istanbul_data_del = MAE(testdata_istanbul_data_del, ssvrarima_istanbul_data_del)\n", 234 | "mae_sxgboostarima_istanbul_data_del = MAE(testdata_istanbul_data_del, sxgboostarima_istanbul_data_del)\n", 235 | "\n", 236 | "print(mae_ar_istanbul_data_del)\n", 237 | "print(mae_arma_istanbul_data_del)\n", 238 | "print(mae_arima_istanbul_data_del)\n", 239 | "print(mae_sarima_istanbul_data_del)\n", 240 | "print(mae_slstm_istanbul_data_del)\n", 241 | "print(mae_ssvr_istanbul_data_del)\n", 242 | "print(mae_sxgboost_istanbul_data_del)\n", 243 | "print(mae_slstmarima_istanbul_data_del)\n", 244 | "print(mae_ssvrarima_istanbul_data_del)\n", 245 | "print(mae_sxgboostarima_istanbul_data_del)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | } 255 | ], 256 | "metadata": { 257 | "interpreter": { 258 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 259 | }, 260 | "kernelspec": { 261 | "display_name": "Python 3.7.9", 262 | "language": "python", 263 | "name": "python3" 264 | }, 265 | "language_info": { 266 | "codemirror_mode": { 267 | "name": "ipython", 268 | "version": 3 269 | }, 270 | "file_extension": ".py", 271 | "mimetype": "text/x-python", 272 | "name": "python", 273 | "nbconvert_exporter": "python", 274 | "pygments_lexer": "ipython3", 275 | "version": "3.7.9" 276 | }, 277 | "orig_nbformat": 4 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 2 281 | } 282 | -------------------------------------------------------------------------------- /evaluatemodels_istanbul_data_mean_sdsh.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "#Defining MAPE function\n", 13 | "def MAPE(actual_values,predicted_values):\n", 14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n", 17 | " return mape\n", 18 | "\n", 19 | "#Defining MAPE_100 function\n", 20 | "def MAPE_100(actual_values,predicted_values):\n", 21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 24 | " x_100 = x[x[:,0]>100]\n", 25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n", 26 | " return mape\n", 27 | "\n", 28 | "#Defining MAPE_250 function\n", 29 | "def MAPE_250(actual_values,predicted_values):\n", 30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 33 | " x_250 = x[x[:,0]>250]\n", 34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n", 35 | " return mape\n", 36 | "\n", 37 | "#Defining MAE function\n", 38 | "def MAE(actual_values,predicted_values):\n", 39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n", 42 | " return mae" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "testdata_istanbul_data_mean_sdsh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']][-3500:])\n", 52 | "ar_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/ar_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 53 | "arma_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/arma_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 54 | "arima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/arima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 55 | "sarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/sarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 56 | "slstm_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/slstm_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 57 | "ssvr_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/ssvr_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 58 | "sxgboost_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/sxgboost_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 59 | "slstmarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/slstmarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 60 | "ssvrarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/ssvrarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])\n", 61 | "sxgboostarima_istanbul_data_mean_sdsh = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_istanbul_data_mean_sdsh_test.csv\")[\"0\"])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "32.28522825674658\n", 74 | "29.421164709879783\n", 75 | "31.072668155397913\n", 76 | "23.330365809285443\n", 77 | "17.936230435083743\n", 78 | "16.750979864766805\n", 79 | "18.13295639807828\n", 80 | "17.907516554919198\n", 81 | "16.752112486882776\n", 82 | "18.138081388324558\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "mape_ar_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n", 88 | "mape_arma_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n", 89 | "mape_arima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n", 90 | "mape_sarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n", 91 | "mape_slstm_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n", 92 | "mape_ssvr_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n", 93 | "mape_sxgboost_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n", 94 | "mape_slstmarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n", 95 | "mape_ssvrarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n", 96 | "mape_sxgboostarima_istanbul_data_mean_sdsh = MAPE(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n", 97 | "\n", 98 | "print(mape_ar_istanbul_data_mean_sdsh)\n", 99 | "print(mape_arma_istanbul_data_mean_sdsh)\n", 100 | "print(mape_arima_istanbul_data_mean_sdsh)\n", 101 | "print(mape_sarima_istanbul_data_mean_sdsh)\n", 102 | "print(mape_slstm_istanbul_data_mean_sdsh)\n", 103 | "print(mape_ssvr_istanbul_data_mean_sdsh)\n", 104 | "print(mape_sxgboost_istanbul_data_mean_sdsh)\n", 105 | "print(mape_slstmarima_istanbul_data_mean_sdsh)\n", 106 | "print(mape_ssvrarima_istanbul_data_mean_sdsh)\n", 107 | "print(mape_sxgboostarima_istanbul_data_mean_sdsh)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "27.492503060389602\n", 120 | "24.628539402099292\n", 121 | "23.077257114198847\n", 122 | "16.90765610289425\n", 123 | "12.364571240132793\n", 124 | "11.773044608936988\n", 125 | "12.609899198448616\n", 126 | "12.382148093388146\n", 127 | "11.776840396405351\n", 128 | "12.616524462824188\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "mape_100_ar_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n", 134 | "mape_100_arma_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n", 135 | "mape_100_arima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n", 136 | "mape_100_sarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n", 137 | "mape_100_slstm_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n", 138 | "mape_100_ssvr_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n", 139 | "mape_100_sxgboost_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n", 140 | "mape_100_slstmarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n", 141 | "mape_100_ssvrarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n", 142 | "mape_100_sxgboostarima_istanbul_data_mean_sdsh = MAPE_100(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n", 143 | "\n", 144 | "print(mape_100_ar_istanbul_data_mean_sdsh)\n", 145 | "print(mape_100_arma_istanbul_data_mean_sdsh)\n", 146 | "print(mape_100_arima_istanbul_data_mean_sdsh)\n", 147 | "print(mape_100_sarima_istanbul_data_mean_sdsh)\n", 148 | "print(mape_100_slstm_istanbul_data_mean_sdsh)\n", 149 | "print(mape_100_ssvr_istanbul_data_mean_sdsh)\n", 150 | "print(mape_100_sxgboost_istanbul_data_mean_sdsh)\n", 151 | "print(mape_100_slstmarima_istanbul_data_mean_sdsh)\n", 152 | "print(mape_100_ssvrarima_istanbul_data_mean_sdsh)\n", 153 | "print(mape_100_sxgboostarima_istanbul_data_mean_sdsh)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 5, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "26.359933440174725\n", 166 | "23.55588242842907\n", 167 | "17.101445941329548\n", 168 | "13.139766020671567\n", 169 | "9.528538117688552\n", 170 | "9.110524556053173\n", 171 | "9.925149527667402\n", 172 | "9.515685702854562\n", 173 | "9.104299224136252\n", 174 | "9.920039238024659\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "mape_250_ar_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n", 180 | "mape_250_arma_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n", 181 | "mape_250_arima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n", 182 | "mape_250_sarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n", 183 | "mape_250_slstm_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n", 184 | "mape_250_ssvr_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n", 185 | "mape_250_sxgboost_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n", 186 | "mape_250_slstmarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n", 187 | "mape_250_ssvrarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n", 188 | "mape_250_sxgboostarima_istanbul_data_mean_sdsh = MAPE_250(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n", 189 | "\n", 190 | "print(mape_250_ar_istanbul_data_mean_sdsh)\n", 191 | "print(mape_250_arma_istanbul_data_mean_sdsh)\n", 192 | "print(mape_250_arima_istanbul_data_mean_sdsh)\n", 193 | "print(mape_250_sarima_istanbul_data_mean_sdsh)\n", 194 | "print(mape_250_slstm_istanbul_data_mean_sdsh)\n", 195 | "print(mape_250_ssvr_istanbul_data_mean_sdsh)\n", 196 | "print(mape_250_sxgboost_istanbul_data_mean_sdsh)\n", 197 | "print(mape_250_slstmarima_istanbul_data_mean_sdsh)\n", 198 | "print(mape_250_ssvrarima_istanbul_data_mean_sdsh)\n", 199 | "print(mape_250_sxgboostarima_istanbul_data_mean_sdsh)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "52.29404862398487\n", 212 | "47.1007412386391\n", 213 | "42.39015327603023\n", 214 | "31.579195499410115\n", 215 | "23.470105247747735\n", 216 | "22.230927819831624\n", 217 | "23.976517753869476\n", 218 | "23.475449282581536\n", 219 | "22.233469339131968\n", 220 | "23.98729810076995\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "mae_ar_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, ar_istanbul_data_mean_sdsh)\n", 226 | "mae_arma_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, arma_istanbul_data_mean_sdsh)\n", 227 | "mae_arima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, arima_istanbul_data_mean_sdsh)\n", 228 | "mae_sarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, sarima_istanbul_data_mean_sdsh)\n", 229 | "mae_slstm_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, slstm_istanbul_data_mean_sdsh)\n", 230 | "mae_ssvr_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, ssvr_istanbul_data_mean_sdsh)\n", 231 | "mae_sxgboost_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, sxgboost_istanbul_data_mean_sdsh)\n", 232 | "mae_slstmarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, slstmarima_istanbul_data_mean_sdsh)\n", 233 | "mae_ssvrarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, ssvrarima_istanbul_data_mean_sdsh)\n", 234 | "mae_sxgboostarima_istanbul_data_mean_sdsh = MAE(testdata_istanbul_data_mean_sdsh, sxgboostarima_istanbul_data_mean_sdsh)\n", 235 | "\n", 236 | "print(mae_ar_istanbul_data_mean_sdsh)\n", 237 | "print(mae_arma_istanbul_data_mean_sdsh)\n", 238 | "print(mae_arima_istanbul_data_mean_sdsh)\n", 239 | "print(mae_sarima_istanbul_data_mean_sdsh)\n", 240 | "print(mae_slstm_istanbul_data_mean_sdsh)\n", 241 | "print(mae_ssvr_istanbul_data_mean_sdsh)\n", 242 | "print(mae_sxgboost_istanbul_data_mean_sdsh)\n", 243 | "print(mae_slstmarima_istanbul_data_mean_sdsh)\n", 244 | "print(mae_ssvrarima_istanbul_data_mean_sdsh)\n", 245 | "print(mae_sxgboostarima_istanbul_data_mean_sdsh)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | } 255 | ], 256 | "metadata": { 257 | "interpreter": { 258 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 259 | }, 260 | "kernelspec": { 261 | "display_name": "Python 3.7.9", 262 | "language": "python", 263 | "name": "python3" 264 | }, 265 | "language_info": { 266 | "codemirror_mode": { 267 | "name": "ipython", 268 | "version": 3 269 | }, 270 | "file_extension": ".py", 271 | "mimetype": "text/x-python", 272 | "name": "python", 273 | "nbconvert_exporter": "python", 274 | "pygments_lexer": "ipython3", 275 | "version": "3.7.9" 276 | }, 277 | "orig_nbformat": 4 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 2 281 | } 282 | -------------------------------------------------------------------------------- /evaluatemodels_istanbul_data_mean_sh.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "#Defining MAPE function\n", 13 | "def MAPE(actual_values,predicted_values):\n", 14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n", 17 | " return mape\n", 18 | "\n", 19 | "#Defining MAPE_100 function\n", 20 | "def MAPE_100(actual_values,predicted_values):\n", 21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 24 | " x_100 = x[x[:,0]>100]\n", 25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n", 26 | " return mape\n", 27 | "\n", 28 | "#Defining MAPE_250 function\n", 29 | "def MAPE_250(actual_values,predicted_values):\n", 30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 33 | " x_250 = x[x[:,0]>250]\n", 34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n", 35 | " return mape\n", 36 | "\n", 37 | "#Defining MAE function\n", 38 | "def MAE(actual_values,predicted_values):\n", 39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n", 42 | " return mae" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "testdata_istanbul_data_mean_sh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']][-3500:])\n", 52 | "ar_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/ar_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 53 | "arma_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/arma_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 54 | "arima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/arima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 55 | "sarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/sarima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 56 | "slstm_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/slstm_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 57 | "ssvr_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/ssvr_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 58 | "sxgboost_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/sxgboost_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 59 | "slstmarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/slstmarima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 60 | "ssvrarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/ssvrarima_istanbul_data_mean_sh_test.csv\")[\"0\"])\n", 61 | "sxgboostarima_istanbul_data_mean_sh = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_istanbul_data_mean_sh_test.csv\")[\"0\"])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "31.639771004700513\n", 74 | "28.572032803815915\n", 75 | "29.968686582552234\n", 76 | "21.774666788498752\n", 77 | "17.65213224379523\n", 78 | "17.155343621539714\n", 79 | "18.51083359724517\n", 80 | "17.670485960850637\n", 81 | "17.16216537051296\n", 82 | "18.51645262368806\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "mape_ar_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n", 88 | "mape_arma_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n", 89 | "mape_arima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n", 90 | "mape_sarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n", 91 | "mape_slstm_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n", 92 | "mape_ssvr_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n", 93 | "mape_sxgboost_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n", 94 | "mape_slstmarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n", 95 | "mape_ssvrarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n", 96 | "mape_sxgboostarima_istanbul_data_mean_sh = MAPE(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n", 97 | "\n", 98 | "print(mape_ar_istanbul_data_mean_sh)\n", 99 | "print(mape_arma_istanbul_data_mean_sh)\n", 100 | "print(mape_arima_istanbul_data_mean_sh)\n", 101 | "print(mape_sarima_istanbul_data_mean_sh)\n", 102 | "print(mape_slstm_istanbul_data_mean_sh)\n", 103 | "print(mape_ssvr_istanbul_data_mean_sh)\n", 104 | "print(mape_sxgboost_istanbul_data_mean_sh)\n", 105 | "print(mape_slstmarima_istanbul_data_mean_sh)\n", 106 | "print(mape_ssvrarima_istanbul_data_mean_sh)\n", 107 | "print(mape_sxgboostarima_istanbul_data_mean_sh)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "26.65815858456\n", 120 | "23.500346531481295\n", 121 | "21.68985263196856\n", 122 | "15.153660727371493\n", 123 | "12.373201752239357\n", 124 | "12.024076054376485\n", 125 | "12.980894248875982\n", 126 | "12.391141607796829\n", 127 | "12.036228092861402\n", 128 | "12.99737582199011\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "mape_100_ar_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n", 134 | "mape_100_arma_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n", 135 | "mape_100_arima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n", 136 | "mape_100_sarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n", 137 | "mape_100_slstm_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n", 138 | "mape_100_ssvr_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n", 139 | "mape_100_sxgboost_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n", 140 | "mape_100_slstmarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n", 141 | "mape_100_ssvrarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n", 142 | "mape_100_sxgboostarima_istanbul_data_mean_sh = MAPE_100(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n", 143 | "\n", 144 | "print(mape_100_ar_istanbul_data_mean_sh)\n", 145 | "print(mape_100_arma_istanbul_data_mean_sh)\n", 146 | "print(mape_100_arima_istanbul_data_mean_sh)\n", 147 | "print(mape_100_sarima_istanbul_data_mean_sh)\n", 148 | "print(mape_100_slstm_istanbul_data_mean_sh)\n", 149 | "print(mape_100_ssvr_istanbul_data_mean_sh)\n", 150 | "print(mape_100_sxgboost_istanbul_data_mean_sh)\n", 151 | "print(mape_100_slstmarima_istanbul_data_mean_sh)\n", 152 | "print(mape_100_ssvrarima_istanbul_data_mean_sh)\n", 153 | "print(mape_100_sxgboostarima_istanbul_data_mean_sh)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 5, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "25.311646310380866\n", 166 | "22.605266190396456\n", 167 | "16.743081708139183\n", 168 | "12.363706488522883\n", 169 | "10.107239766679745\n", 170 | "9.710289104268918\n", 171 | "10.599377377177987\n", 172 | "10.107467632206035\n", 173 | "9.705971697418457\n", 174 | "10.613657190766002\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "mape_250_ar_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n", 180 | "mape_250_arma_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n", 181 | "mape_250_arima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n", 182 | "mape_250_sarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n", 183 | "mape_250_slstm_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n", 184 | "mape_250_ssvr_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n", 185 | "mape_250_sxgboost_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n", 186 | "mape_250_slstmarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n", 187 | "mape_250_ssvrarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n", 188 | "mape_250_sxgboostarima_istanbul_data_mean_sh = MAPE_250(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n", 189 | "\n", 190 | "print(mape_250_ar_istanbul_data_mean_sh)\n", 191 | "print(mape_250_arma_istanbul_data_mean_sh)\n", 192 | "print(mape_250_arima_istanbul_data_mean_sh)\n", 193 | "print(mape_250_sarima_istanbul_data_mean_sh)\n", 194 | "print(mape_250_slstm_istanbul_data_mean_sh)\n", 195 | "print(mape_250_ssvr_istanbul_data_mean_sh)\n", 196 | "print(mape_250_sxgboost_istanbul_data_mean_sh)\n", 197 | "print(mape_250_slstmarima_istanbul_data_mean_sh)\n", 198 | "print(mape_250_ssvrarima_istanbul_data_mean_sh)\n", 199 | "print(mape_250_sxgboostarima_istanbul_data_mean_sh)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "50.59338322306562\n", 212 | "45.1325882067358\n", 213 | "40.46508991283195\n", 214 | "29.04744789522173\n", 215 | "23.601114571248978\n", 216 | "22.836079943066032\n", 217 | "24.71005580608951\n", 218 | "23.643619065937603\n", 219 | "22.850270792773518\n", 220 | "24.732252707828827\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "mae_ar_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, ar_istanbul_data_mean_sh)\n", 226 | "mae_arma_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, arma_istanbul_data_mean_sh)\n", 227 | "mae_arima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, arima_istanbul_data_mean_sh)\n", 228 | "mae_sarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, sarima_istanbul_data_mean_sh)\n", 229 | "mae_slstm_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, slstm_istanbul_data_mean_sh)\n", 230 | "mae_ssvr_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, ssvr_istanbul_data_mean_sh)\n", 231 | "mae_sxgboost_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, sxgboost_istanbul_data_mean_sh)\n", 232 | "mae_slstmarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, slstmarima_istanbul_data_mean_sh)\n", 233 | "mae_ssvrarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, ssvrarima_istanbul_data_mean_sh)\n", 234 | "mae_sxgboostarima_istanbul_data_mean_sh = MAE(testdata_istanbul_data_mean_sh, sxgboostarima_istanbul_data_mean_sh)\n", 235 | "\n", 236 | "print(mae_ar_istanbul_data_mean_sh)\n", 237 | "print(mae_arma_istanbul_data_mean_sh)\n", 238 | "print(mae_arima_istanbul_data_mean_sh)\n", 239 | "print(mae_sarima_istanbul_data_mean_sh)\n", 240 | "print(mae_slstm_istanbul_data_mean_sh)\n", 241 | "print(mae_ssvr_istanbul_data_mean_sh)\n", 242 | "print(mae_sxgboost_istanbul_data_mean_sh)\n", 243 | "print(mae_slstmarima_istanbul_data_mean_sh)\n", 244 | "print(mae_ssvrarima_istanbul_data_mean_sh)\n", 245 | "print(mae_sxgboostarima_istanbul_data_mean_sh)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | } 255 | ], 256 | "metadata": { 257 | "interpreter": { 258 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 259 | }, 260 | "kernelspec": { 261 | "display_name": "Python 3.7.9", 262 | "language": "python", 263 | "name": "python3" 264 | }, 265 | "language_info": { 266 | "codemirror_mode": { 267 | "name": "ipython", 268 | "version": 3 269 | }, 270 | "file_extension": ".py", 271 | "mimetype": "text/x-python", 272 | "name": "python", 273 | "nbconvert_exporter": "python", 274 | "pygments_lexer": "ipython3", 275 | "version": "3.7.9" 276 | }, 277 | "orig_nbformat": 4 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 2 281 | } 282 | -------------------------------------------------------------------------------- /evaluatemodels_pems_716933.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 27, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "#Defining MAPE function\n", 13 | "def MAPE(actual_values,predicted_values):\n", 14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n", 17 | " return mape\n", 18 | "\n", 19 | "#Defining MAPE_100 function\n", 20 | "def MAPE_100(actual_values,predicted_values):\n", 21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 24 | " x_100 = x[x[:,0]>100]\n", 25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n", 26 | " return mape\n", 27 | "\n", 28 | "#Defining MAPE_250 function\n", 29 | "def MAPE_250(actual_values,predicted_values):\n", 30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 33 | " x_250 = x[x[:,0]>250]\n", 34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n", 35 | " return mape\n", 36 | "\n", 37 | "#Defining MAE function\n", 38 | "def MAE(actual_values,predicted_values):\n", 39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n", 42 | " return mae" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 28, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "testdata_716933 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']][-7863:])\n", 52 | "ar_716933 = np.array(pd.read_csv(\"point_forecasts/ar_pems_716933_test.csv\")[\"0\"])\n", 53 | "arma_716933 = np.array(pd.read_csv(\"point_forecasts/arma_pems_716933_test.csv\")[\"0\"])\n", 54 | "arima_716933 = np.array(pd.read_csv(\"point_forecasts/arima_pems_716933_test.csv\")[\"0\"])\n", 55 | "sarima_716933 = np.array(pd.read_csv(\"point_forecasts/sarima_pems_716933_test.csv\")[\"0\"])\n", 56 | "slstm_716933 = np.array(pd.read_csv(\"point_forecasts/slstm_pems_716933_test.csv\")[\"0\"])\n", 57 | "ssvr_716933 = np.array(pd.read_csv(\"point_forecasts/ssvr_pems_716933_test.csv\")[\"0\"])\n", 58 | "sxgboost_716933 = np.array(pd.read_csv(\"point_forecasts/sxgboost_pems_716933_test.csv\")[\"0\"])\n", 59 | "slstmarima_716933 = np.array(pd.read_csv(\"point_forecasts/slstmarima_pems_716933_test.csv\")[\"0\"])\n", 60 | "ssvrarima_716933 = np.array(pd.read_csv(\"point_forecasts/ssvrarima_pems_716933_test.csv\")[\"0\"])\n", 61 | "sxgboostarima_716933 = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_pems_716933_test.csv\")[\"0\"])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 29, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "mape_ar_716933 = MAPE(testdata_716933, ar_716933)\n", 71 | "mape_arma_716933 = MAPE(testdata_716933, arma_716933)\n", 72 | "mape_arima_716933 = MAPE(testdata_716933, arima_716933)\n", 73 | "mape_sarima_716933 = MAPE(testdata_716933, sarima_716933)\n", 74 | "mape_slstm_716933 = MAPE(testdata_716933, slstm_716933)\n", 75 | "mape_ssvr_716933 = MAPE(testdata_716933, ssvr_716933)\n", 76 | "mape_sxgboost_716933 = MAPE(testdata_716933, sxgboost_716933)\n", 77 | "mape_slstmarima_716933 = MAPE(testdata_716933, slstmarima_716933)\n", 78 | "mape_ssvrarima_716933 = MAPE(testdata_716933, ssvrarima_716933)\n", 79 | "mape_sxgboostarima_716933 = MAPE(testdata_716933, sxgboostarima_716933)\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 30, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "8.047163203140883\n", 92 | "7.74001675364598\n", 93 | "7.3660278247569115\n", 94 | "6.648892868326587\n", 95 | "6.253069644402445\n", 96 | "6.000669052475197\n", 97 | "6.099251378263277\n", 98 | "6.253429810578235\n", 99 | "6.000702024746338\n", 100 | "6.099321286450661\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "print(mape_ar_716933)\n", 106 | "print(mape_arma_716933)\n", 107 | "print(mape_arima_716933)\n", 108 | "print(mape_sarima_716933)\n", 109 | "print(mape_slstm_716933 )\n", 110 | "print(mape_ssvr_716933)\n", 111 | "print(mape_sxgboost_716933)\n", 112 | "print(mape_slstmarima_716933)\n", 113 | "print(mape_ssvrarima_716933)\n", 114 | "print(mape_sxgboostarima_716933)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 31, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "8.047163203140883\n", 127 | "7.74001675364598\n", 128 | "7.3660278247569115\n", 129 | "6.648892868326587\n", 130 | "6.253069644402445\n", 131 | "6.000669052475197\n", 132 | "6.099251378263277\n", 133 | "6.253429810578235\n", 134 | "6.000702024746338\n", 135 | "6.099321286450661\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "mape_100_ar_716933 = MAPE_100(testdata_716933, ar_716933)\n", 141 | "mape_100_arma_716933 = MAPE_100(testdata_716933, arma_716933)\n", 142 | "mape_100_arima_716933 = MAPE_100(testdata_716933, arima_716933)\n", 143 | "mape_100_sarima_716933 = MAPE_100(testdata_716933, sarima_716933)\n", 144 | "mape_100_slstm_716933 = MAPE_100(testdata_716933, slstm_716933)\n", 145 | "mape_100_ssvr_716933 = MAPE_100(testdata_716933, ssvr_716933)\n", 146 | "mape_100_sxgboost_716933 = MAPE_100(testdata_716933, sxgboost_716933)\n", 147 | "mape_100_slstmarima_716933 = MAPE_100(testdata_716933, slstmarima_716933)\n", 148 | "mape_100_ssvrarima_716933 = MAPE_100(testdata_716933, ssvrarima_716933)\n", 149 | "mape_100_sxgboostarima_716933 = MAPE_100(testdata_716933, sxgboostarima_716933)\n", 150 | "print(mape_100_ar_716933)\n", 151 | "print(mape_100_arma_716933)\n", 152 | "print(mape_100_arima_716933)\n", 153 | "print(mape_100_sarima_716933)\n", 154 | "print(mape_100_slstm_716933 )\n", 155 | "print(mape_100_ssvr_716933)\n", 156 | "print(mape_100_sxgboost_716933)\n", 157 | "print(mape_100_slstmarima_716933)\n", 158 | "print(mape_100_ssvrarima_716933)\n", 159 | "print(mape_100_sxgboostarima_716933)\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 32, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "7.69157771661967\n", 172 | "7.333505018200419\n", 173 | "6.816440535576364\n", 174 | "6.248341274074011\n", 175 | "5.619165660371625\n", 176 | "5.469034648723104\n", 177 | "5.559938973342971\n", 178 | "5.619172036305884\n", 179 | "5.469042891030165\n", 180 | "5.559936891963197\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "mape_250_ar_716933 = MAPE_250(testdata_716933, ar_716933)\n", 186 | "mape_250_arma_716933 = MAPE_250(testdata_716933, arma_716933)\n", 187 | "mape_250_arima_716933 = MAPE_250(testdata_716933, arima_716933)\n", 188 | "mape_250_sarima_716933 = MAPE_250(testdata_716933, sarima_716933)\n", 189 | "mape_250_slstm_716933 = MAPE_250(testdata_716933, slstm_716933)\n", 190 | "mape_250_ssvr_716933 = MAPE_250(testdata_716933, ssvr_716933)\n", 191 | "mape_250_sxgboost_716933 = MAPE_250(testdata_716933, sxgboost_716933)\n", 192 | "mape_250_slstmarima_716933 = MAPE_250(testdata_716933, slstmarima_716933)\n", 193 | "mape_250_ssvrarima_716933 = MAPE_250(testdata_716933, ssvrarima_716933)\n", 194 | "mape_250_sxgboostarima_716933 = MAPE_250(testdata_716933, sxgboostarima_716933)\n", 195 | "print(mape_250_ar_716933)\n", 196 | "print(mape_250_arma_716933)\n", 197 | "print(mape_250_arima_716933)\n", 198 | "print(mape_250_sarima_716933)\n", 199 | "print(mape_250_slstm_716933 )\n", 200 | "print(mape_250_ssvr_716933)\n", 201 | "print(mape_250_sxgboost_716933)\n", 202 | "print(mape_250_slstmarima_716933)\n", 203 | "print(mape_250_ssvrarima_716933)\n", 204 | "print(mape_250_sxgboostarima_716933)\n" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 33, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "36.61291387491914\n", 217 | "34.706397687562195\n", 218 | "31.70197713784382\n", 219 | "29.394136701895967\n", 220 | "26.841926865064224\n", 221 | "26.015406145713285\n", 222 | "26.435141600449363\n", 223 | "26.842688512959494\n", 224 | "26.015615351192313\n", 225 | "26.43538085374002\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "mae_ar_716933 = MAE(testdata_716933, ar_716933)\n", 231 | "mae_arma_716933 = MAE(testdata_716933, arma_716933)\n", 232 | "mae_arima_716933 = MAE(testdata_716933, arima_716933)\n", 233 | "mae_sarima_716933 = MAE(testdata_716933, sarima_716933)\n", 234 | "mae_slstm_716933 = MAE(testdata_716933, slstm_716933)\n", 235 | "mae_ssvr_716933 = MAE(testdata_716933, ssvr_716933)\n", 236 | "mae_sxgboost_716933 = MAE(testdata_716933, sxgboost_716933)\n", 237 | "mae_slstmarima_716933 = MAE(testdata_716933, slstmarima_716933)\n", 238 | "mae_ssvrarima_716933 = MAE(testdata_716933, ssvrarima_716933)\n", 239 | "mae_sxgboostarima_716933 = MAE(testdata_716933, sxgboostarima_716933)\n", 240 | "print(mae_ar_716933)\n", 241 | "print(mae_arma_716933)\n", 242 | "print(mae_arima_716933)\n", 243 | "print(mae_sarima_716933)\n", 244 | "print(mae_slstm_716933 )\n", 245 | "print(mae_ssvr_716933)\n", 246 | "print(mae_sxgboost_716933)\n", 247 | "print(mae_slstmarima_716933)\n", 248 | "print(mae_ssvrarima_716933)\n", 249 | "print(mae_sxgboostarima_716933)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [] 258 | } 259 | ], 260 | "metadata": { 261 | "interpreter": { 262 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 263 | }, 264 | "kernelspec": { 265 | "display_name": "Python 3.7.9", 266 | "language": "python", 267 | "name": "python3" 268 | }, 269 | "language_info": { 270 | "codemirror_mode": { 271 | "name": "ipython", 272 | "version": 3 273 | }, 274 | "file_extension": ".py", 275 | "mimetype": "text/x-python", 276 | "name": "python", 277 | "nbconvert_exporter": "python", 278 | "pygments_lexer": "ipython3", 279 | "version": "3.7.9" 280 | }, 281 | "orig_nbformat": 4 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 2 285 | } 286 | -------------------------------------------------------------------------------- /evaluatemodels_pems_717087.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 32, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "#Defining MAPE function\n", 13 | "def MAPE(actual_values,predicted_values):\n", 14 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 15 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 16 | " mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100\n", 17 | " return mape\n", 18 | "\n", 19 | "#Defining MAPE_100 function\n", 20 | "def MAPE_100(actual_values,predicted_values):\n", 21 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 22 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 23 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 24 | " x_100 = x[x[:,0]>100]\n", 25 | " mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100\n", 26 | " return mape\n", 27 | "\n", 28 | "#Defining MAPE_250 function\n", 29 | "def MAPE_250(actual_values,predicted_values):\n", 30 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 31 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 32 | " x = np.concatenate((actual_values,predicted_values), axis=1)\n", 33 | " x_250 = x[x[:,0]>250]\n", 34 | " mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100\n", 35 | " return mape\n", 36 | "\n", 37 | "#Defining MAE function\n", 38 | "def MAE(actual_values,predicted_values):\n", 39 | " predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1))\n", 40 | " actual_values = np.array(actual_values).reshape((len(actual_values), 1))\n", 41 | " mae = np.mean(np.abs(actual_values - predicted_values))\n", 42 | " return mae" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 33, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "testdata_717087 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']][-7863:])\n", 52 | "ar_717087 = np.array(pd.read_csv(\"point_forecasts/ar_pems_717087_test.csv\")[\"0\"])\n", 53 | "arma_717087 = np.array(pd.read_csv(\"point_forecasts/arma_pems_717087_test.csv\")[\"0\"])\n", 54 | "arima_717087 = np.array(pd.read_csv(\"point_forecasts/arima_pems_717087_test.csv\")[\"0\"])\n", 55 | "sarima_717087 = np.array(pd.read_csv(\"point_forecasts/sarima_pems_717087_test.csv\")[\"0\"])\n", 56 | "slstm_717087 = np.array(pd.read_csv(\"point_forecasts/slstm_pems_717087_test.csv\")[\"0\"])\n", 57 | "ssvr_717087 = np.array(pd.read_csv(\"point_forecasts/ssvr_pems_717087_test.csv\")[\"0\"])\n", 58 | "sxgboost_717087 = np.array(pd.read_csv(\"point_forecasts/sxgboost_pems_717087_test.csv\")[\"0\"])\n", 59 | "slstmarima_717087 = np.array(pd.read_csv(\"point_forecasts/slstmarima_pems_717087_test.csv\")[\"0\"])\n", 60 | "ssvrarima_717087 = np.array(pd.read_csv(\"point_forecasts/ssvrarima_pems_717087_test.csv\")[\"0\"])\n", 61 | "sxgboostarima_717087 = np.array(pd.read_csv(\"point_forecasts/sxgboostarima_pems_717087_test.csv\")[\"0\"])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 34, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "mape_ar_717087 = MAPE(testdata_717087, ar_717087)\n", 71 | "mape_arma_717087 = MAPE(testdata_717087, arma_717087)\n", 72 | "mape_arima_717087 = MAPE(testdata_717087, arima_717087)\n", 73 | "mape_sarima_717087 = MAPE(testdata_717087, sarima_717087)\n", 74 | "mape_slstm_717087 = MAPE(testdata_717087, slstm_717087)\n", 75 | "mape_ssvr_717087 = MAPE(testdata_717087, ssvr_717087)\n", 76 | "mape_sxgboost_717087 = MAPE(testdata_717087, sxgboost_717087)\n", 77 | "mape_slstmarima_717087 = MAPE(testdata_717087, slstmarima_717087)\n", 78 | "mape_ssvrarima_717087 = MAPE(testdata_717087, ssvrarima_717087)\n", 79 | "mape_sxgboostarima_717087 = MAPE(testdata_717087, sxgboostarima_717087)\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 35, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "8.11551399417616\n", 92 | "9.038353131930679\n", 93 | "8.22873553313542\n", 94 | "7.3760212002738506\n", 95 | "7.580976751711058\n", 96 | "7.2004394447703\n", 97 | "6.978562813843733\n", 98 | "7.5813706923892905\n", 99 | "7.200232182800511\n", 100 | "6.979441088429994\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "print(mape_ar_717087)\n", 106 | "print(mape_arma_717087)\n", 107 | "print(mape_arima_717087)\n", 108 | "print(mape_sarima_717087)\n", 109 | "print(mape_slstm_717087 )\n", 110 | "print(mape_ssvr_717087)\n", 111 | "print(mape_sxgboost_717087)\n", 112 | "print(mape_slstmarima_717087)\n", 113 | "print(mape_ssvrarima_717087)\n", 114 | "print(mape_sxgboostarima_717087)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 36, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "7.391720458354506\n", 127 | "8.06109426125678\n", 128 | "7.314031730896983\n", 129 | "6.567314554629439\n", 130 | "6.1178471735164255\n", 131 | "5.774518050729633\n", 132 | "5.8093474466635975\n", 133 | "6.118317316487666\n", 134 | "5.774260536340519\n", 135 | "5.809826020231966\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "mape_100_ar_717087 = MAPE_100(testdata_717087, ar_717087)\n", 141 | "mape_100_arma_717087 = MAPE_100(testdata_717087, arma_717087)\n", 142 | "mape_100_arima_717087 = MAPE_100(testdata_717087, arima_717087)\n", 143 | "mape_100_sarima_717087 = MAPE_100(testdata_717087, sarima_717087)\n", 144 | "mape_100_slstm_717087 = MAPE_100(testdata_717087, slstm_717087)\n", 145 | "mape_100_ssvr_717087 = MAPE_100(testdata_717087, ssvr_717087)\n", 146 | "mape_100_sxgboost_717087 = MAPE_100(testdata_717087, sxgboost_717087)\n", 147 | "mape_100_slstmarima_717087 = MAPE_100(testdata_717087, slstmarima_717087)\n", 148 | "mape_100_ssvrarima_717087 = MAPE_100(testdata_717087, ssvrarima_717087)\n", 149 | "mape_100_sxgboostarima_717087 = MAPE_100(testdata_717087, sxgboostarima_717087)\n", 150 | "print(mape_100_ar_717087)\n", 151 | "print(mape_100_arma_717087)\n", 152 | "print(mape_100_arima_717087)\n", 153 | "print(mape_100_sarima_717087)\n", 154 | "print(mape_100_slstm_717087 )\n", 155 | "print(mape_100_ssvr_717087)\n", 156 | "print(mape_100_sxgboost_717087)\n", 157 | "print(mape_100_slstmarima_717087)\n", 158 | "print(mape_100_ssvrarima_717087)\n", 159 | "print(mape_100_sxgboostarima_717087)\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 37, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "6.267276452561113\n", 172 | "6.899966861590215\n", 173 | "5.873963240467723\n", 174 | "5.781455762366732\n", 175 | "4.977597259106809\n", 176 | "4.768709853556574\n", 177 | "4.850612193880713\n", 178 | "4.977597820989974\n", 179 | "4.76871911521475\n", 180 | "4.8506181468472676\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "mape_250_ar_717087 = MAPE_250(testdata_717087, ar_717087)\n", 186 | "mape_250_arma_717087 = MAPE_250(testdata_717087, arma_717087)\n", 187 | "mape_250_arima_717087 = MAPE_250(testdata_717087, arima_717087)\n", 188 | "mape_250_sarima_717087 = MAPE_250(testdata_717087, sarima_717087)\n", 189 | "mape_250_slstm_717087 = MAPE_250(testdata_717087, slstm_717087)\n", 190 | "mape_250_ssvr_717087 = MAPE_250(testdata_717087, ssvr_717087)\n", 191 | "mape_250_sxgboost_717087 = MAPE_250(testdata_717087, sxgboost_717087)\n", 192 | "mape_250_slstmarima_717087 = MAPE_250(testdata_717087, slstmarima_717087)\n", 193 | "mape_250_ssvrarima_717087 = MAPE_250(testdata_717087, ssvrarima_717087)\n", 194 | "mape_250_sxgboostarima_717087 = MAPE_250(testdata_717087, sxgboostarima_717087)\n", 195 | "print(mape_250_ar_717087)\n", 196 | "print(mape_250_arma_717087)\n", 197 | "print(mape_250_arima_717087)\n", 198 | "print(mape_250_sarima_717087)\n", 199 | "print(mape_250_slstm_717087 )\n", 200 | "print(mape_250_ssvr_717087)\n", 201 | "print(mape_250_sxgboost_717087)\n", 202 | "print(mape_250_slstmarima_717087)\n", 203 | "print(mape_250_ssvrarima_717087)\n", 204 | "print(mape_250_sxgboostarima_717087)\n" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 38, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "22.2411687656336\n", 217 | "24.498942450758474\n", 218 | "21.234435065529222\n", 219 | "20.023570872855185\n", 220 | "18.291041833100174\n", 221 | "17.444472161772747\n", 222 | "17.4993064141761\n", 223 | "18.291404048993336\n", 224 | "17.44422946646671\n", 225 | "17.500098465644356\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "mae_ar_717087 = MAE(testdata_717087, ar_717087)\n", 231 | "mae_arma_717087 = MAE(testdata_717087, arma_717087)\n", 232 | "mae_arima_717087 = MAE(testdata_717087, arima_717087)\n", 233 | "mae_sarima_717087 = MAE(testdata_717087, sarima_717087)\n", 234 | "mae_slstm_717087 = MAE(testdata_717087, slstm_717087)\n", 235 | "mae_ssvr_717087 = MAE(testdata_717087, ssvr_717087)\n", 236 | "mae_sxgboost_717087 = MAE(testdata_717087, sxgboost_717087)\n", 237 | "mae_slstmarima_717087 = MAE(testdata_717087, slstmarima_717087)\n", 238 | "mae_ssvrarima_717087 = MAE(testdata_717087, ssvrarima_717087)\n", 239 | "mae_sxgboostarima_717087 = MAE(testdata_717087, sxgboostarima_717087)\n", 240 | "print(mae_ar_717087)\n", 241 | "print(mae_arma_717087)\n", 242 | "print(mae_arima_717087)\n", 243 | "print(mae_sarima_717087)\n", 244 | "print(mae_slstm_717087 )\n", 245 | "print(mae_ssvr_717087)\n", 246 | "print(mae_sxgboost_717087)\n", 247 | "print(mae_slstmarima_717087)\n", 248 | "print(mae_ssvrarima_717087)\n", 249 | "print(mae_sxgboostarima_717087)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [] 258 | } 259 | ], 260 | "metadata": { 261 | "interpreter": { 262 | "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" 263 | }, 264 | "kernelspec": { 265 | "display_name": "Python 3.7.9", 266 | "language": "python", 267 | "name": "python3" 268 | }, 269 | "language_info": { 270 | "codemirror_mode": { 271 | "name": "ipython", 272 | "version": 3 273 | }, 274 | "file_extension": ".py", 275 | "mimetype": "text/x-python", 276 | "name": "python", 277 | "nbconvert_exporter": "python", 278 | "pygments_lexer": "ipython3", 279 | "version": "3.7.9" 280 | }, 281 | "orig_nbformat": 4 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 2 285 | } 286 | -------------------------------------------------------------------------------- /evaluationmetrics_pointforecasts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | #Defining MAPE function 4 | def MAPE(actual_values,predicted_values): 5 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1)) 6 | actual_values = np.array(actual_values).reshape((len(actual_values), 1)) 7 | mape = np.mean(np.abs((actual_values - predicted_values)/actual_values))*100 8 | return mape 9 | 10 | #Defining MAPE_100 function 11 | def MAPE_100(actual_values,predicted_values): 12 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1)) 13 | actual_values = np.array(actual_values).reshape((len(actual_values), 1)) 14 | x = np.concatenate((actual_values,predicted_values), axis=1) 15 | x_100 = x[x[:,0]>100] 16 | mape = np.mean(np.abs((x_100[:,0] - x_100[:,1]) / x_100[:,0]))*100 17 | return mape 18 | 19 | #Defining MAPE_250 function 20 | def MAPE_250(actual_values,predicted_values): 21 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1)) 22 | actual_values = np.array(actual_values).reshape((len(actual_values), 1)) 23 | x = np.concatenate((actual_values,predicted_values), axis=1) 24 | x_250 = x[x[:,0]>250] 25 | mape = np.mean(np.abs((x_250[:,0] - x_250[:,1]) / x_250[:,0]))*100 26 | return mape 27 | 28 | #Defining MAE function 29 | def MAE(actual_values,predicted_values): 30 | predicted_values = np.array(predicted_values).reshape((len(predicted_values), 1)) 31 | actual_values = np.array(actual_values).reshape((len(actual_values), 1)) 32 | mae = np.mean(np.abs(actual_values - predicted_values)) 33 | return mae -------------------------------------------------------------------------------- /evaluationmetrics_qrapproaches.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | #implement onconditional coverage 4 | def unconditional_coverage(PIs, data_test): 5 | indicator = [] 6 | for i in range(len(PIs)): 7 | if PIs[i][0] < data_test[i] < PIs[i][1]: 8 | indicator.append(1) 9 | else: 10 | indicator.append(0) 11 | uc = np.sum(indicator)/len(indicator) 12 | return uc 13 | 14 | #implement Winkler's score 15 | def winlers_score(PIs, data_test, tau): 16 | indicators = [] 17 | for i in range(len(PIs)): 18 | if PIs[i][0] < data_test[i] < PIs[i][1]: 19 | score = PIs[i][1]-PIs[i][0] 20 | indicators.append(score) 21 | elif data_test[i] < PIs[i][0]: 22 | score = (PIs[i][1]-PIs[i][0]) + 2 / tau * (PIs[i][0]-data_test[i]) 23 | indicators.append(score) 24 | elif PIs[i][1] < data_test[i]: 25 | score = (PIs[i][1]-PIs[i][0]) + 2 / tau * (data_test[i]-PIs[i][1]) 26 | indicators.append(score) 27 | wc = np.sum(indicators)/len(indicators) 28 | return wc 29 | -------------------------------------------------------------------------------- /hybridmodels_istanbul.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from statsmodels.tsa.arima_model import ARIMA 3 | import pandas as pd 4 | 5 | def armodel(train_data, test_data): 6 | arima = ARIMA(train_data, order=(1,0,0)) 7 | arima_fit = arima.fit() 8 | print(arima_fit.summary()) 9 | parameters = arima_fit.params 10 | a = parameters[1] 11 | output_test = arima_fit.forecast() 12 | test_predictions = [] 13 | test_predictions.append(output_test[0][0]) 14 | for t in range(len(test_data)-1): 15 | output_test = (test_data[t] * a) 16 | test_predictions.append(output_test[0]) 17 | return test_predictions 18 | 19 | def hybrid_model(train_predictions, train_data, test_predictions, test_data): 20 | train_data = np.array(train_data).reshape((len(train_data),1)) 21 | train_predictions = np.array(train_predictions).reshape((len(train_predictions),1)) 22 | test_data = np.array(test_data).reshape((len(test_data),1)) 23 | test_predictions = np.array(test_predictions).reshape((len(test_predictions),1)) 24 | train_error_series = train_data - train_predictions 25 | test_error_series = test_data - test_predictions 26 | #model residuals 27 | testerror_predictions = armodel(train_error_series, test_error_series) 28 | testerror_predictions = np.array(testerror_predictions).reshape((len(testerror_predictions),1)) 29 | output = test_predictions + testerror_predictions 30 | output = np.array(output).reshape((len(output),1)) 31 | return output 32 | 33 | traindata_istanbul_data_del = np.array(pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']][169:7565]) 34 | testdata_istanbul_data_del = np.array(pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']][7565:]) 35 | trainslstm_istanbul_data_del = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_del_train.csv")["0"]) 36 | trainssvr_istanbul_data_del = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_del_train.csv")["0"]) 37 | trainsxgboost_istanbul_data_del = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_del_train.csv")["0"]) 38 | testslstm_istanbul_data_del = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_del_test.csv")["0"]) 39 | testssvr_istanbul_data_del = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_del_test.csv")["0"]) 40 | testsxgboost_istanbul_data_del = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_del_test.csv")["0"]) 41 | 42 | slstmarima_istanbul_data_del = hybrid_model(trainslstm_istanbul_data_del, traindata_istanbul_data_del, testslstm_istanbul_data_del, testdata_istanbul_data_del) 43 | ssvrarima_istanbul_data_del = hybrid_model(trainssvr_istanbul_data_del, traindata_istanbul_data_del, testssvr_istanbul_data_del, testdata_istanbul_data_del) 44 | sxgboostarima_istanbul_data_del = hybrid_model(trainsxgboost_istanbul_data_del, traindata_istanbul_data_del, testsxgboost_istanbul_data_del, testdata_istanbul_data_del) 45 | pd.DataFrame(slstmarima_istanbul_data_del).to_csv("point_forecasts/slstmarima_istanbul_data_del_test.csv") 46 | pd.DataFrame(ssvrarima_istanbul_data_del).to_csv("point_forecasts/ssvrarima_istanbul_data_del_test.csv") 47 | pd.DataFrame(sxgboostarima_istanbul_data_del).to_csv("point_forecasts/sxgboostarima_istanbul_data_del_test.csv") 48 | 49 | traindata_istanbul_data_mean_sh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']][169:8164]) 50 | testdata_istanbul_data_mean_sh = np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']][8164:]) 51 | trainslstm_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sh_train.csv")["0"]) 52 | trainssvr_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sh_train.csv")["0"]) 53 | trainsxgboost_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sh_train.csv")["0"]) 54 | testslstm_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sh_test.csv")["0"]) 55 | testssvr_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sh_test.csv")["0"]) 56 | testsxgboost_istanbul_data_mean_sh = np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sh_test.csv")["0"]) 57 | 58 | slstmarima_istanbul_data_mean_sh = hybrid_model(trainslstm_istanbul_data_mean_sh, traindata_istanbul_data_mean_sh, testslstm_istanbul_data_mean_sh, testdata_istanbul_data_mean_sh) 59 | ssvrarima_istanbul_data_mean_sh = hybrid_model(trainssvr_istanbul_data_mean_sh, traindata_istanbul_data_mean_sh, testssvr_istanbul_data_mean_sh, testdata_istanbul_data_mean_sh) 60 | sxgboostarima_istanbul_data_mean_sh = hybrid_model(trainsxgboost_istanbul_data_mean_sh, traindata_istanbul_data_mean_sh, testsxgboost_istanbul_data_mean_sh, testdata_istanbul_data_mean_sh) 61 | pd.DataFrame(slstmarima_istanbul_data_mean_sh).to_csv("point_forecasts/slstmarima_istanbul_data_mean_sh_test.csv") 62 | pd.DataFrame(ssvrarima_istanbul_data_mean_sh).to_csv("point_forecasts/ssvrarima_istanbul_data_mean_sh_test.csv") 63 | pd.DataFrame(sxgboostarima_istanbul_data_mean_sh).to_csv("point_forecasts/sxgboostarima_istanbul_data_mean_sh_test.csv") 64 | 65 | 66 | traindata_istanbul_data_mean_sdsh= np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']][169:8164]) 67 | testdata_istanbul_data_mean_sdsh= np.array(pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']][8164:]) 68 | trainslstm_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sdsh_train.csv")["0"]) 69 | trainssvr_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_train.csv")["0"]) 70 | trainsxgboost_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sdsh_train.csv")["0"]) 71 | testslstm_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/slstm_istanbul_data_mean_sdsh_test.csv")["0"]) 72 | testssvr_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_test.csv")["0"]) 73 | testsxgboost_istanbul_data_mean_sdsh= np.array(pd.read_csv("point_forecasts/sxgboost_istanbul_data_mean_sdsh_test.csv")["0"]) 74 | 75 | slstmarima_istanbul_data_mean_sdsh= hybrid_model(trainslstm_istanbul_data_mean_sdsh, traindata_istanbul_data_mean_sdsh, testslstm_istanbul_data_mean_sdsh, testdata_istanbul_data_mean_sdsh) 76 | ssvrarima_istanbul_data_mean_sdsh= hybrid_model(trainssvr_istanbul_data_mean_sdsh, traindata_istanbul_data_mean_sdsh, testssvr_istanbul_data_mean_sdsh, testdata_istanbul_data_mean_sdsh) 77 | sxgboostarima_istanbul_data_mean_sdsh= hybrid_model(trainsxgboost_istanbul_data_mean_sdsh, traindata_istanbul_data_mean_sdsh, testsxgboost_istanbul_data_mean_sdsh, testdata_istanbul_data_mean_sdsh) 78 | pd.DataFrame(slstmarima_istanbul_data_mean_sdsh).to_csv("point_forecasts/slstmarima_istanbul_data_mean_sdsh_test.csv") 79 | pd.DataFrame(ssvrarima_istanbul_data_mean_sdsh).to_csv("point_forecasts/ssvrarima_istanbul_data_mean_sdsh_test.csv") 80 | pd.DataFrame(sxgboostarima_istanbul_data_mean_sdsh).to_csv("point_forecasts/sxgboostarima_istanbul_data_mean_sdsh_test.csv") 81 | -------------------------------------------------------------------------------- /hybridmodels_pems.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from statsmodels.tsa.arima_model import ARIMA 3 | import pandas as pd 4 | 5 | def arimamodel(train_data, test_data): 6 | arima = ARIMA(train_data, order=(1,1,0)) 7 | arima_fit = arima.fit() 8 | print(arima_fit.summary()) 9 | 10 | train_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(train_data),dynamic=train_data.all()) 11 | train_predictions2 = [] 12 | for t in range(len(train_data)): 13 | output_train = train_predictions[t] + train_data[t] 14 | train_predictions2.append(output_train) 15 | 16 | test_predictions = arima_fit.predict(start=len(train_data),end=len(train_data)+len(test_data)-1,dynamic=test_data.all()) 17 | test_predictions2 = [] 18 | test_data2=[] 19 | test_data2.append(train_data[-1]) 20 | for i in range(len(test_data)-1): 21 | test_data2.append(test_data[i]) 22 | for t in range(len(test_data2)): 23 | output_test = test_predictions[t] + test_data2[t] 24 | test_predictions2.append(output_test) 25 | return test_predictions 26 | 27 | def hybrid_model(train_predictions, train_data, test_predictions, test_data): 28 | train_data = np.array(train_data).reshape((len(train_data),1)) 29 | train_predictions = np.array(train_predictions).reshape((len(train_predictions),1)) 30 | test_data = np.array(test_data).reshape((len(test_data),1)) 31 | test_predictions = np.array(test_predictions).reshape((len(test_predictions),1)) 32 | train_error_series = train_data - train_predictions 33 | test_error_series = test_data - test_predictions 34 | #model residuals 35 | testerror_predictions = arimamodel(train_error_series, test_error_series) 36 | testerror_predictions = np.array(testerror_predictions).reshape((len(testerror_predictions),1)) 37 | output = test_predictions + testerror_predictions 38 | output = np.array(output).reshape((len(output),1)) 39 | return output 40 | 41 | traindata_716933 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']][673:18345]) 42 | testdata_716933 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']][18345:]) 43 | trainslstm_716933 = np.array(pd.read_csv("point_forecasts/slstm_pems_716933_train.csv")["0"]) 44 | trainssvr_716933 = np.array(pd.read_csv("point_forecasts/ssvr_pems_716933_train.csv")["0"]) 45 | trainsxgboost_716933 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_716933_train.csv")["0"]) 46 | testslstm_716933 = np.array(pd.read_csv("point_forecasts/slstm_pems_716933_test.csv")["0"]) 47 | testssvr_716933 = np.array(pd.read_csv("point_forecasts/ssvr_pems_716933_test.csv")["0"]) 48 | testsxgboost_716933 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_716933_test.csv")["0"]) 49 | 50 | slstmarima_716933 = hybrid_model(trainslstm_716933, traindata_716933, testslstm_716933, testdata_716933) 51 | ssvrarima_716933 = hybrid_model(trainssvr_716933, traindata_716933, testssvr_716933, testdata_716933) 52 | sxgboostarima_716933 = hybrid_model(trainsxgboost_716933, traindata_716933, testsxgboost_716933, testdata_716933) 53 | pd.DataFrame(slstmarima_716933).to_csv("point_forecasts/slstmarima_pems_716933_test.csv") 54 | pd.DataFrame(ssvrarima_716933).to_csv("point_forecasts/ssvrarima_pems_716933_test.csv") 55 | pd.DataFrame(sxgboostarima_716933).to_csv("point_forecasts/sxgboostarima_pems_716933_test.csv") 56 | 57 | traindata_717087 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']][673:18345]) 58 | testdata_717087 = np.array(pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']][18345:]) 59 | trainslstm_717087 = np.array(pd.read_csv("point_forecasts/slstm_pems_717087_train.csv")["0"]) 60 | trainssvr_717087 = np.array(pd.read_csv("point_forecasts/ssvr_pems_717087_train.csv")["0"]) 61 | trainsxgboost_717087 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_717087_train.csv")["0"]) 62 | testslstm_717087 = np.array(pd.read_csv("point_forecasts/slstm_pems_717087_test.csv")["0"]) 63 | testssvr_717087 = np.array(pd.read_csv("point_forecasts/ssvr_pems_717087_test.csv")["0"]) 64 | testsxgboost_717087 = np.array(pd.read_csv("point_forecasts/sxgboost_pems_717087_test.csv")["0"]) 65 | 66 | slstmarima_717087 = hybrid_model(trainslstm_717087, traindata_717087, testslstm_717087, testdata_717087) 67 | ssvrarima_717087 = hybrid_model(trainssvr_717087, traindata_717087, testssvr_717087, testdata_717087) 68 | sxgboostarima_717087 = hybrid_model(trainsxgboost_717087, traindata_717087, testsxgboost_717087, testdata_717087) 69 | pd.DataFrame(slstmarima_717087).to_csv("point_forecasts/slstmarima_pems_717087_test.csv") 70 | pd.DataFrame(ssvrarima_717087).to_csv("point_forecasts/ssvrarima_pems_717087_test.csv") 71 | pd.DataFrame(sxgboostarima_717087).to_csv("point_forecasts/sxgboostarima_pems_717087_test.csv") 72 | -------------------------------------------------------------------------------- /naive_and_average_methods.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | def naive_method(data): 5 | data2 = pd.DataFrame(data.values) 6 | data3 = pd.concat([data2.shift(1), data2], axis=1) 7 | data3.columns = ['t-1', 't'] 8 | data4 = data3.values 9 | train_size = int(len( data4) * 0.70) 10 | train, test = data4[1:train_size], data4[train_size:] 11 | test_predictions, actual_values = test[:,0], test[:,1] 12 | return test_predictions, actual_values 13 | 14 | def average_method(data): 15 | data2 = pd.DataFrame(data.values) 16 | data3 = pd.concat([data2.shift(1), data2], axis=1) 17 | data3.columns = ['t-1', 't'] 18 | avg_values = [] 19 | for i in range(len(data3)): 20 | avg_value = np.mean(data3['t-1'][:(i+1)]) 21 | avg_values.append(avg_value) 22 | avg_values = pd.DataFrame(avg_values) 23 | data4 = pd.concat([data3, avg_values], axis = 1) 24 | data4['avg'] = data4[0] 25 | data4 = data4[['avg', 't']] 26 | data4 = data4.values 27 | train_size = int(len( data4) * 0.70) 28 | train, test = data4[1:train_size], data4[train_size:] 29 | test_predictions, actual_values = test[:,0], test[:,1] 30 | return test_predictions, actual_values -------------------------------------------------------------------------------- /qrapproaches.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.formula.api as smf 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | ## historical PI implementation 9 | def get_corridor(corridor_size, error_series): 10 | corridor = [] 11 | length = len(error_series) - corridor_size 12 | for i in range(length): 13 | errors_in_corridor = error_series[i:i+corridor_size] 14 | ordered_errors_in_corridor = np.sort(errors_in_corridor) 15 | corridor.append({"OrderedErrors": ordered_errors_in_corridor}) 16 | return corridor 17 | 18 | def get_lower_upper_bounds(corridor_size, predictions, error_series): 19 | PIs = [] 20 | corridor = get_corridor(corridor_size, error_series) 21 | predictions2 = np.array(predictions[corridor_size:]) 22 | percent5_index = 1 23 | percent95_index = 19 24 | for i in range(len(corridor)): 25 | OrderedErrors = corridor[i]["OrderedErrors"] 26 | PointForecast = predictions2[i] 27 | lower_bound = OrderedErrors[percent5_index] + PointForecast 28 | upper_bound = OrderedErrors[percent95_index] + PointForecast 29 | PIs.append([lower_bound, upper_bound]) 30 | return PIs 31 | 32 | ## implement distribution-based PI using AR 33 | def distribution_based_PI(test_data, a, sigma, z_alpha, z_1minalpha): 34 | PIs = [] 35 | for i in range(len(test_data)): 36 | lower_bound = (a * test_data[i]) + (sigma * z_alpha) 37 | upper_bound = (a * test_data[i]) + (sigma * z_1minalpha) 38 | PIs.append([lower_bound, upper_bound]) 39 | return PIs 40 | 41 | #implement QRA 42 | def qra(train_dataframe, test_dataframe, tau1, tau2): 43 | #tau1=0.95 44 | #tau2=0.05 45 | #since the best 3 models are ssvr, slstm and sxgboost 46 | #we use these models in QRA. 47 | model1 = smf.quantreg('NUMBER_OF_VEHICLES ~ ssvr + slstm + sxgboost', train_dataframe).fit(q=tau1) 48 | get_y = lambda a, b, c, d: a + b * test_dataframe.ssvr + c * test_dataframe.slstm + d * test_dataframe.sxgboost 49 | y_upper = get_y(model1.params['Intercept'], model1.params['ssvr'], model1.params['slstm'], model1.params['sxgboost']) 50 | model2 = smf.quantreg('NUMBER_OF_VEHICLES ~ ssvr + slstm + sxgboost', train_dataframe).fit(q=tau2) 51 | y_lower = get_y(model2.params['Intercept'], model2.params['ssvr'], model2.params['slstm'], model1.params['sxgboost']) 52 | y_upper = np.array(y_upper) 53 | y_lower = np.array(y_lower) 54 | 55 | PIs_qra = [] 56 | for i in range(len(y_upper)): 57 | PIs_qra.append([y_lower[i], y_upper[i]]) 58 | return PIs_qra 59 | 60 | #implement QRLSTM 61 | #first implemnet lstm 62 | class LSTM(nn.Module): 63 | 64 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 65 | super(LSTM, self).__init__() 66 | 67 | self.num_classes = num_classes 68 | self.num_layers = num_layers 69 | self.input_size = input_size 70 | self.hidden_size = hidden_size 71 | 72 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 73 | num_layers=num_layers, batch_first=True) 74 | self.softmax = nn.Softmax(dim=1) 75 | self.fc = nn.Linear(hidden_size, num_classes) 76 | 77 | def forward(self, x): 78 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 79 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 80 | _, (h_output, _) = self.lstm(x, (h_0, c_0)) 81 | h_output = h_output.view(-1, self.hidden_size) 82 | output = self.softmax(h_output) 83 | output = self.fc(output) 84 | return output 85 | 86 | #define pinball loss to use update parameters in lstm 87 | class PinballLoss(): 88 | def __init__(self, quantile=0.10, reduction='mean'): 89 | self.quantile = quantile 90 | assert 0 < self.quantile 91 | assert self.quantile < 1 92 | self.reduction = reduction 93 | def __call__(self, output, target): 94 | assert output.shape == target.shape 95 | loss = torch.zeros_like(target, dtype=torch.float) 96 | error = output - target 97 | smaller_index = error < 0 98 | bigger_index = 0 < error 99 | loss[smaller_index] = self.quantile * (abs(error)[smaller_index]) 100 | loss[bigger_index] = (1-self.quantile) * (abs(error)[bigger_index]) 101 | 102 | if self.reduction == 'sum': 103 | loss = loss.sum() 104 | if self.reduction == 'mean': 105 | loss = loss.mean() 106 | return loss 107 | 108 | def qrlstm(data): 109 | sc = MinMaxScaler() 110 | training_data = sc.fit_transform(data) 111 | 112 | x, y = training_data[169:,:8], training_data[169:,-1] 113 | print(x.shape) 114 | print(y.shape) 115 | train_size = 7995 116 | 117 | dataX = Variable(torch.Tensor(np.array(x))).reshape((11495,8,1)) 118 | dataY = Variable(torch.Tensor(np.array(y))).reshape((11495,1)) 119 | 120 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((7995,8,1)) 121 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((7995,1)) 122 | 123 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((3500,8,1)) 124 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((3500,1)) 125 | num_epochs = 2000 126 | learning_rate = 0.01 127 | 128 | input_size = 1 129 | hidden_size = 8 130 | num_layers = 1 131 | num_classes = 1 132 | 133 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers) 134 | criterion = PinballLoss(quantile=0.95) 135 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate) 136 | 137 | # Train the model 138 | for epoch in range(num_epochs): 139 | outputs = lstm(trainX) 140 | optimizer.zero_grad() 141 | 142 | # obtain the loss function 143 | loss = criterion(outputs, trainY) 144 | 145 | loss.backward() 146 | 147 | optimizer.step() 148 | 149 | lstm.eval() 150 | test_predict = lstm(testX) 151 | 152 | data_predict = test_predict.data.numpy() 153 | dataY_plot = dataY.data.numpy() 154 | 155 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1) 156 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1) 157 | data_predict = sc.inverse_transform(d_p) 158 | dataY_plot = sc.inverse_transform(dY_p) 159 | 160 | dataY_plot = dataY_plot[:,0] 161 | data_predict = data_predict[:,0] 162 | upper_bounds = data_predict 163 | 164 | #get lower bounds 165 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers) 166 | criterion = PinballLoss(quantile=0.05) 167 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate) 168 | 169 | # Train the model 170 | for epoch in range(num_epochs): 171 | outputs = lstm(trainX) 172 | optimizer.zero_grad() 173 | 174 | # obtain the loss function 175 | loss = criterion(outputs, trainY) 176 | 177 | loss.backward() 178 | 179 | optimizer.step() 180 | 181 | lstm.eval() 182 | test_predict = lstm(testX) 183 | data_predict = test_predict.data.numpy() 184 | dataY_plot = dataY.data.numpy() 185 | 186 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1) 187 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1) 188 | data_predict = sc.inverse_transform(d_p) 189 | dataY_plot = sc.inverse_transform(dY_p) 190 | 191 | dataY_plot = dataY_plot[:,0] 192 | data_predict = data_predict[:,0] 193 | lower_bounds = data_predict 194 | 195 | y_upper = np.array(upper_bounds) 196 | y_lower = np.array(lower_bounds) 197 | 198 | PIs_qrlstm = [] 199 | for i in range(len(y_upper)): 200 | PIs_qrlstm.append([y_lower[i], y_upper[i]]) 201 | return PIs_qrlstm 202 | 203 | 204 | -------------------------------------------------------------------------------- /slstm_istanbul_data_del.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | class LSTM(nn.Module): 9 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 10 | super(LSTM, self).__init__() 11 | self.num_classes = num_classes 12 | self.num_layers = num_layers 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | 16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 17 | num_layers=num_layers, batch_first=True) 18 | self.softmax = nn.Softmax(dim=1) 19 | self.fc = nn.Linear(hidden_size, num_classes) 20 | 21 | def forward(self, x): 22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 24 | _, (h_output, _) = self.lstm(x, (h_0, c_0)) 25 | h_output = h_output.view(-1, self.hidden_size) 26 | output = self.softmax(h_output) 27 | output= self.fc(output) 28 | return output 29 | 30 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']] 31 | data2 = pd.DataFrame(data.values) 32 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 33 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 34 | data4 = data3.values 35 | train_size = 7396 36 | test_size = 3500 37 | 38 | minmaxscaler = MinMaxScaler() 39 | training_data = minmaxscaler.fit_transform(data4) 40 | x, y = training_data[169:,:8], training_data[169:,-1] 41 | 42 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1)) 43 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1)) 44 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1)) 45 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1)) 46 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1)) 47 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1)) 48 | 49 | num_epochs = 2000 50 | learning_rate = 0.01 51 | 52 | input_size = 1 53 | hidden_size = 8 54 | num_layers = 1 55 | num_classes = 1 56 | 57 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers) 58 | 59 | criterion = torch.nn.MSELoss() 60 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate) 61 | 62 | # Train the model 63 | for epoch in range(num_epochs): 64 | outputs = lstm(trainX) 65 | optimizer.zero_grad() 66 | # obtain the loss function 67 | loss = criterion(outputs, trainY) 68 | loss.backward() 69 | optimizer.step() 70 | if epoch % 100 == 0: 71 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 72 | 73 | lstm.eval() 74 | train_predict = lstm(trainX) 75 | data_predict2 = train_predict.data.numpy() 76 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1) 77 | data_predict2 = minmaxscaler.inverse_transform(d_p2) 78 | train_predict = data_predict2[:,0] 79 | 80 | lstm.eval() 81 | test_predict = lstm(testX) 82 | data_predict = test_predict.data.numpy() 83 | dataY_plot = dataY.data.numpy() 84 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1) 85 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1) 86 | data_predict = minmaxscaler.inverse_transform(d_p) 87 | dataY_plot = minmaxscaler.inverse_transform(dY_p) 88 | dataY_plot = dataY_plot[:,0] 89 | test_predict = data_predict[:,0] 90 | 91 | 92 | pd.DataFrame(train_predict).to_csv("slstm_istanbul_data_del_train.csv") 93 | pd.DataFrame(test_predict).to_csv("slstm_istanbul_data_del_test.csv") 94 | 95 | -------------------------------------------------------------------------------- /slstm_istanbul_data_mean_sdsh.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | class LSTM(nn.Module): 9 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 10 | super(LSTM, self).__init__() 11 | self.num_classes = num_classes 12 | self.num_layers = num_layers 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | 16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 17 | num_layers=num_layers, batch_first=True) 18 | self.softmax = nn.Softmax(dim=1) 19 | self.fc = nn.Linear(hidden_size, num_classes) 20 | 21 | def forward(self, x): 22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 24 | _, (h_output, _) = self.lstm(x, (h_0, c_0)) 25 | h_output = h_output.view(-1, self.hidden_size) 26 | output = self.softmax(h_output) 27 | output= self.fc(output) 28 | return output 29 | 30 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']] 31 | data2 = pd.DataFrame(data.values) 32 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 33 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 34 | data4 = data3.values 35 | train_size = 7995 36 | test_size = 3500 37 | 38 | minmaxscaler = MinMaxScaler() 39 | training_data = minmaxscaler.fit_transform(data4) 40 | x, y = training_data[169:,:8], training_data[169:,-1] 41 | 42 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1)) 43 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1)) 44 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1)) 45 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1)) 46 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1)) 47 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1)) 48 | 49 | num_epochs = 2000 50 | learning_rate = 0.01 51 | 52 | input_size = 1 53 | hidden_size = 8 54 | num_layers = 1 55 | num_classes = 1 56 | 57 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers) 58 | 59 | criterion = torch.nn.MSELoss() 60 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate) 61 | losses = [] 62 | # Train the model 63 | for epoch in range(num_epochs): 64 | outputs = lstm(trainX) 65 | optimizer.zero_grad() 66 | # obtain the loss function 67 | loss = criterion(outputs, trainY) 68 | loss.backward() 69 | optimizer.step() 70 | losses.append(loss.item()) 71 | if epoch % 100 == 0: 72 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 73 | 74 | lstm.eval() 75 | train_predict = lstm(trainX) 76 | data_predict2 = train_predict.data.numpy() 77 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1) 78 | data_predict2 = minmaxscaler.inverse_transform(d_p2) 79 | train_predict = data_predict2[:,0] 80 | 81 | lstm.eval() 82 | test_predict = lstm(testX) 83 | data_predict = test_predict.data.numpy() 84 | dataY_plot = dataY.data.numpy() 85 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1) 86 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1) 87 | data_predict = minmaxscaler.inverse_transform(d_p) 88 | dataY_plot = minmaxscaler.inverse_transform(dY_p) 89 | dataY_plot = dataY_plot[:,0] 90 | test_predict = data_predict[:,0] 91 | 92 | 93 | pd.DataFrame(train_predict).to_csv("pointforecasts/slstm_istanbul_data_mean_sdsh_train.csv") 94 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_istanbul_data_mean_sdsh_test.csv") 95 | pd.DataFrame(losses).to_csv("slstm_loss_istanbul_data_mean_sdsh.csv") 96 | -------------------------------------------------------------------------------- /slstm_istanbul_data_mean_sh.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | class LSTM(nn.Module): 9 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 10 | super(LSTM, self).__init__() 11 | self.num_classes = num_classes 12 | self.num_layers = num_layers 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | 16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 17 | num_layers=num_layers, batch_first=True) 18 | self.softmax = nn.Softmax(dim=1) 19 | self.fc = nn.Linear(hidden_size, num_classes) 20 | 21 | def forward(self, x): 22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 24 | _, (h_output, _) = self.lstm(x, (h_0, c_0)) 25 | h_output = h_output.view(-1, self.hidden_size) 26 | output = self.softmax(h_output) 27 | output= self.fc(output) 28 | return output 29 | 30 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']] 31 | data2 = pd.DataFrame(data.values) 32 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 33 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 34 | data4 = data3.values 35 | train_size = 7995 36 | test_size = 3500 37 | 38 | minmaxscaler = MinMaxScaler() 39 | training_data = minmaxscaler.fit_transform(data4) 40 | x, y = training_data[169:,:8], training_data[169:,-1] 41 | 42 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1)) 43 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1)) 44 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1)) 45 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1)) 46 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1)) 47 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1)) 48 | 49 | num_epochs = 2000 50 | learning_rate = 0.01 51 | 52 | input_size = 1 53 | hidden_size = 8 54 | num_layers = 1 55 | num_classes = 1 56 | 57 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers) 58 | 59 | criterion = torch.nn.MSELoss() 60 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate) 61 | 62 | # Train the model 63 | for epoch in range(num_epochs): 64 | outputs = lstm(trainX) 65 | optimizer.zero_grad() 66 | # obtain the loss function 67 | loss = criterion(outputs, trainY) 68 | loss.backward() 69 | optimizer.step() 70 | if epoch % 100 == 0: 71 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 72 | 73 | lstm.eval() 74 | train_predict = lstm(trainX) 75 | data_predict2 = train_predict.data.numpy() 76 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1) 77 | data_predict2 = minmaxscaler.inverse_transform(d_p2) 78 | train_predict = data_predict2[:,0] 79 | 80 | lstm.eval() 81 | test_predict = lstm(testX) 82 | data_predict = test_predict.data.numpy() 83 | dataY_plot = dataY.data.numpy() 84 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1) 85 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1) 86 | data_predict = minmaxscaler.inverse_transform(d_p) 87 | dataY_plot = minmaxscaler.inverse_transform(dY_p) 88 | dataY_plot = dataY_plot[:,0] 89 | test_predict = data_predict[:,0] 90 | 91 | 92 | pd.DataFrame(train_predict).to_csv("point_forecasts/slstm_istanbul_data_mean_sh_train.csv") 93 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_istanbul_data_mean_sh_test.csv") 94 | 95 | -------------------------------------------------------------------------------- /slstm_pems_716933.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | class LSTM(nn.Module): 9 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 10 | super(LSTM, self).__init__() 11 | self.num_classes = num_classes 12 | self.num_layers = num_layers 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | 16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 17 | num_layers=num_layers, batch_first=True) 18 | self.softmax = nn.Softmax(dim=1) 19 | self.fc = nn.Linear(hidden_size, num_classes) 20 | 21 | def forward(self, x): 22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 24 | _, (h_output, _) = self.lstm(x, (h_0, c_0)) 25 | h_output = h_output.view(-1, self.hidden_size) 26 | output = self.softmax(h_output) 27 | output= self.fc(output) 28 | return output 29 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']] 30 | data2 = pd.DataFrame(data.values) 31 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1) 32 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1', 't'] 33 | data4 = data3.values 34 | train_size = 17672 35 | test_size = 7863 36 | 37 | minmaxscaler = MinMaxScaler() 38 | training_data = minmaxscaler.fit_transform(data4) 39 | x, y = training_data[673:,:8], training_data[673:,-1] 40 | 41 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1)) 42 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1)) 43 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1)) 44 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1)) 45 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1)) 46 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1)) 47 | 48 | num_epochs = 2000 49 | learning_rate = 0.01 50 | 51 | input_size = 1 52 | hidden_size = 8 53 | num_layers = 1 54 | num_classes = 1 55 | 56 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers) 57 | 58 | criterion = torch.nn.MSELoss() 59 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate) 60 | 61 | # Train the model 62 | for epoch in range(num_epochs): 63 | outputs = lstm(trainX) 64 | optimizer.zero_grad() 65 | # obtain the loss function 66 | loss = criterion(outputs, trainY) 67 | loss.backward() 68 | optimizer.step() 69 | if epoch % 100 == 0: 70 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 71 | 72 | lstm.eval() 73 | train_predict = lstm(trainX) 74 | data_predict2 = train_predict.data.numpy() 75 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1) 76 | data_predict2 = minmaxscaler.inverse_transform(d_p2) 77 | train_predict = data_predict2[:,0] 78 | 79 | lstm.eval() 80 | test_predict = lstm(testX) 81 | data_predict = test_predict.data.numpy() 82 | dataY_plot = dataY.data.numpy() 83 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1) 84 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1) 85 | data_predict = minmaxscaler.inverse_transform(d_p) 86 | dataY_plot = minmaxscaler.inverse_transform(dY_p) 87 | dataY_plot = dataY_plot[:,0] 88 | test_predict = data_predict[:,0] 89 | 90 | 91 | pd.DataFrame(train_predict).to_csv("point_forecasts/slstm_pems_716933_train.csv") 92 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_pems_716933_test.csv") 93 | 94 | -------------------------------------------------------------------------------- /slstm_pems_717087.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | class LSTM(nn.Module): 9 | def __init__(self, num_classes, input_size, hidden_size, num_layers): 10 | super(LSTM, self).__init__() 11 | self.num_classes = num_classes 12 | self.num_layers = num_layers 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | 16 | self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 17 | num_layers=num_layers, batch_first=True) 18 | self.softmax = nn.Softmax(dim=1) 19 | self.fc = nn.Linear(hidden_size, num_classes) 20 | 21 | def forward(self, x): 22 | h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 23 | c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 24 | _, (h_output, _) = self.lstm(x, (h_0, c_0)) 25 | h_output = h_output.view(-1, self.hidden_size) 26 | output = self.softmax(h_output) 27 | output= self.fc(output) 28 | return output 29 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']] 30 | data2 = pd.DataFrame(data.values) 31 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1) 32 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1', 't'] 33 | data4 = data3.values 34 | train_size = 17672 35 | test_size = 7863 36 | 37 | minmaxscaler = MinMaxScaler() 38 | training_data = minmaxscaler.fit_transform(data4) 39 | x, y = training_data[673:,:8], training_data[673:,-1] 40 | 41 | dataX = Variable(torch.Tensor(np.array(x))).reshape((len(x),8,1)) 42 | dataY = Variable(torch.Tensor(np.array(y))).reshape((len(x),1)) 43 | trainX = Variable(torch.Tensor(np.array(x[0:train_size]))).reshape((train_size,8,1)) 44 | trainY = Variable(torch.Tensor(np.array(y[0:train_size]))).reshape((train_size,1)) 45 | testX = Variable(torch.Tensor(np.array(x[train_size:]))).reshape((test_size,8,1)) 46 | testY = Variable(torch.Tensor(np.array(y[train_size:]))).reshape((test_size,1)) 47 | 48 | num_epochs = 2000 49 | learning_rate = 0.01 50 | 51 | input_size = 1 52 | hidden_size = 8 53 | num_layers = 1 54 | num_classes = 1 55 | 56 | lstm = LSTM(num_classes, input_size, hidden_size, num_layers) 57 | 58 | criterion = torch.nn.MSELoss() 59 | optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate) 60 | losses = [] 61 | # Train the model 62 | for epoch in range(num_epochs): 63 | outputs = lstm(trainX) 64 | optimizer.zero_grad() 65 | # obtain the loss function 66 | loss = criterion(outputs, trainY) 67 | loss.backward() 68 | optimizer.step() 69 | losses.append(loss.item()) 70 | if epoch % 100 == 0: 71 | print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 72 | 73 | lstm.eval() 74 | train_predict = lstm(trainX) 75 | data_predict2 = train_predict.data.numpy() 76 | d_p2 = np.concatenate((data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2,data_predict2),axis=1) 77 | data_predict2 = minmaxscaler.inverse_transform(d_p2) 78 | train_predict = data_predict2[:,0] 79 | 80 | lstm.eval() 81 | test_predict = lstm(testX) 82 | data_predict = test_predict.data.numpy() 83 | dataY_plot = dataY.data.numpy() 84 | d_p = np.concatenate((data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict,data_predict),axis=1) 85 | dY_p = np.concatenate((dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot,dataY_plot),axis=1) 86 | data_predict = minmaxscaler.inverse_transform(d_p) 87 | dataY_plot = minmaxscaler.inverse_transform(dY_p) 88 | dataY_plot = dataY_plot[:,0] 89 | test_predict = data_predict[:,0] 90 | 91 | 92 | pd.DataFrame(train_predict).to_csv("point_forecasts/slstm_pems_717087_train.csv") 93 | pd.DataFrame(test_predict).to_csv("point_forecasts/slstm_pems_717087_test.csv") 94 | pd.DataFrame(losses).to_csv("slstm_loss_pems_717087.csv") 95 | 96 | -------------------------------------------------------------------------------- /ssvr_istanbul_data_del.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import GridSearchCV 2 | from sklearn.svm import SVR 3 | import pandas as pd 4 | 5 | 6 | def gridsearch(train_X, train_y, parameters): 7 | svr = SVR() 8 | grid_search = GridSearchCV(svr, parameters, cv=2) 9 | grid_search.fit(train_X, train_y) 10 | best_parameters = grid_search.best_params_ 11 | return best_parameters 12 | 13 | 14 | def ssvr_model(train_X, train_y, test_X, best_parameters): 15 | C = best_parameters['C'] 16 | kernel = best_parameters['kernel'] 17 | svr = SVR(kernel=kernel, C=C) 18 | svr.fit(train_X, train_y) 19 | train_predictions = svr.predict(train_X) 20 | test_predictions = svr.predict(test_X) 21 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_istanbul_data_del_train.csv") 22 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_istanbul_data_del_test.csv") 23 | return train_predictions, test_predictions 24 | 25 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']] 26 | data2 = pd.DataFrame(data.values) 27 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 28 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 29 | 30 | data4 = data3.values 31 | train_size4 = 7565 32 | train, test = data4[169:train_size4], data4[train_size4:] 33 | train_X, train_y = train[:,:8], train[:,-1] 34 | test_X, test_y = test[:,:8], test[:,-1] 35 | 36 | parameters = {'kernel':['rbf', 'linear'], 'C':[0.1, 1, 10, 100]} 37 | best_parameters = gridsearch(train_X, train_y, parameters) 38 | ssvr_model(train_X, train_y, test_X, best_parameters) -------------------------------------------------------------------------------- /ssvr_istanbul_data_mean_sdsh.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVR 2 | import pandas as pd 3 | 4 | #Since we examine the effect of missing data points in istanbul traffic data, 5 | #we only use Grid search in the data with deletion of the missing points. 6 | #Thus, we use these parameters for other data with different completion of missing data points. 7 | def ssvr_model(train_X, train_y, test_X, best_parameters): 8 | C = best_parameters['C'] 9 | kernel = best_parameters['kernel'] 10 | svr = SVR(kernel=kernel, C=C) 11 | svr.fit(train_X, train_y) 12 | train_predictions = svr.predict(train_X) 13 | test_predictions = svr.predict(test_X) 14 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_train.csv") 15 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sdsh_test.csv") 16 | return train_predictions, test_predictions 17 | 18 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']] 19 | data2 = pd.DataFrame(data.values) 20 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 21 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 22 | 23 | data4 = data3.values 24 | train_size4 = int(len(data4) * 0.70) 25 | train, test = data4[169:train_size4], data4[train_size4:] 26 | train_X, train_y = train[:,:8], train[:,-1] 27 | test_X, test_y = test[:,:8], test[:,-1] 28 | 29 | best_parameters = {'C': 100, 'kernel': 'rbf'} 30 | ssvr_model(train_X, train_y, test_X, best_parameters) -------------------------------------------------------------------------------- /ssvr_istanbul_data_mean_sh.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVR 2 | import pandas as pd 3 | 4 | #Since we examine the effect of missing data points in istanbul traffic data, 5 | #we only use Grid search in the data with deletion of the missing points. 6 | #Thus, we use these parameters for other data with different completion of missing data points. 7 | def ssvr_model(train_X, train_y, test_X, best_parameters): 8 | C = best_parameters['C'] 9 | kernel = best_parameters['kernel'] 10 | svr = SVR(kernel=kernel, C=C) 11 | svr.fit(train_X, train_y) 12 | train_predictions = svr.predict(train_X) 13 | test_predictions = svr.predict(test_X) 14 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sh_train.csv") 15 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_istanbul_data_mean_sh_test.csv") 16 | return train_predictions, test_predictions 17 | 18 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']] 19 | data2 = pd.DataFrame(data.values) 20 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 21 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 22 | 23 | data4 = data3.values 24 | train_size4 = int(len(data4) * 0.70) 25 | train, test = data4[169:train_size4], data4[train_size4:] 26 | train_X, train_y = train[:,:8], train[:,-1] 27 | test_X, test_y = test[:,:8], test[:,-1] 28 | 29 | best_parameters = {'C': 100, 'kernel': 'rbf'} 30 | ssvr_model(train_X, train_y, test_X, best_parameters) -------------------------------------------------------------------------------- /ssvr_pems_716933.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import GridSearchCV 2 | from sklearn.svm import SVR 3 | import pandas as pd 4 | 5 | 6 | def gridsearch(train_X, train_y, parameters): 7 | svr = SVR() 8 | grid_search = GridSearchCV(svr, parameters, cv=2) 9 | grid_search.fit(train_X, train_y) 10 | best_parameters = grid_search.best_params_ 11 | return best_parameters 12 | 13 | 14 | def ssvr_model(train_X, train_y, test_X, best_parameters): 15 | C = best_parameters['C'] 16 | kernel = best_parameters['kernel'] 17 | svr = SVR(kernel=kernel, C=C) 18 | svr.fit(train_X, train_y) 19 | train_predictions = svr.predict(train_X) 20 | test_predictions = svr.predict(test_X) 21 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_pems_716933_train.csv") 22 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_pems_716933_test.csv") 23 | return train_predictions, test_predictions 24 | 25 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']] 26 | data2 = pd.DataFrame(data.values) 27 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1) 28 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t'] 29 | 30 | data4 = data3.values 31 | train_size4 = int(len(data4) * 0.70) 32 | train, test = data4[673:train_size4], data4[train_size4:] 33 | train_X, train_y = train[:,:8], train[:,-1] 34 | test_X, test_y = test[:,:8], test[:,-1] 35 | 36 | parameters = {'kernel':['rbf', 'linear'], 'C':[0.1, 1, 10, 100]} 37 | best_parameters = gridsearch(train_X, train_y, parameters) 38 | ssvr_model(train_X, train_y, test_X, best_parameters) -------------------------------------------------------------------------------- /ssvr_pems_717087.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import GridSearchCV 2 | from sklearn.svm import SVR 3 | import pandas as pd 4 | 5 | 6 | def gridsearch(train_X, train_y, parameters): 7 | svr = SVR() 8 | grid_search = GridSearchCV(svr, parameters, cv=2) 9 | grid_search.fit(train_X, train_y) 10 | best_parameters = grid_search.best_params_ 11 | return best_parameters 12 | 13 | 14 | def ssvr_model(train_X, train_y, test_X, best_parameters): 15 | C = best_parameters['C'] 16 | kernel = best_parameters['kernel'] 17 | svr = SVR(kernel=kernel, C=C) 18 | svr.fit(train_X, train_y) 19 | train_predictions = svr.predict(train_X) 20 | test_predictions = svr.predict(test_X) 21 | pd.DataFrame(train_predictions).to_csv("point_forecasts/ssvr_pems_717087_train.csv") 22 | pd.DataFrame(test_predictions).to_csv("point_forecasts/ssvr_pems_717087_test.csv") 23 | return train_predictions, test_predictions 24 | 25 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']] 26 | data2 = pd.DataFrame(data.values) 27 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1) 28 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t'] 29 | 30 | data4 = data3.values 31 | train_size4 = int(len(data4) * 0.70) 32 | train, test = data4[673:train_size4], data4[train_size4:] 33 | train_X, train_y = train[:,:8], train[:,-1] 34 | test_X, test_y = test[:,:8], test[:,-1] 35 | 36 | parameters = {'kernel':['rbf'], 'C':[0.1, 1, 10, 100]} 37 | best_parameters = gridsearch(train_X, train_y, parameters) 38 | ssvr_model(train_X, train_y, test_X, best_parameters) -------------------------------------------------------------------------------- /xgboost_istanbul_data_del.py: -------------------------------------------------------------------------------- 1 | from numpy import asarray 2 | from pandas import read_csv 3 | from pandas import DataFrame 4 | from pandas import concat 5 | from sklearn.metrics import mean_absolute_error 6 | from xgboost import XGBRegressor 7 | import pandas as pd 8 | import numpy as np 9 | 10 | 11 | 12 | # transform a time series dataset into a supervised learning dataset 13 | def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): 14 | n_vars = 1 if type(data) is list else data.shape[1] 15 | df = DataFrame(data) 16 | cols = list() 17 | for i in range(n_in, 0, -1): 18 | cols.append(df.shift(i)) 19 | for i in range(0, n_out): 20 | cols.append(df.shift(-i)) 21 | agg = concat(cols, axis=1) 22 | if dropnan: 23 | agg.dropna(inplace=True) 24 | return agg.values 25 | 26 | # split a univariate dataset into train/test sets 27 | def train_test_split(data, n_test): 28 | return data[:-n_test, :], data[-n_test:, :] 29 | 30 | # fit an xgboost model and make a one step prediction 31 | def xgboost_forecast(train, testX): 32 | train = asarray(train) 33 | trainX, trainy = train[:, :-1], train[:, -1] 34 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000) 35 | model.fit(trainX, trainy) 36 | yhat = model.predict(testX) 37 | return yhat 38 | 39 | # walk-forward validation for univariate data 40 | def walk_forward_validation(data, n_test): 41 | train_predictions = [] 42 | test_predictions = [] 43 | train, test = train_test_split(data, n_test) 44 | history = [x for x in train] 45 | train = asarray(train) 46 | trainX, trainy = train[:, :-1], train[:, -1] 47 | testX, testy = test[:, :-1], test[:, -1] 48 | yhat_train = xgboost_forecast(history, trainX) 49 | train_predictions.append(yhat_train) 50 | yhat_test = xgboost_forecast(history, testX) 51 | test_predictions.append(yhat_test) 52 | return train_predictions, test_predictions 53 | 54 | 55 | data = pd.read_csv('data/istanbul/istanbul_data_del.csv')[['NUMBER_OF_VEHICLES']] 56 | train_size = 7396 57 | test_size = 3500 58 | data2 = pd.DataFrame(data.values) 59 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 60 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 61 | 62 | values = data3.values 63 | data = values[169:,:] 64 | train_predictions, test_predictions = walk_forward_validation(data, 3500) 65 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1)) 66 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1)) 67 | train_predictions.to_csv("point_forecasts/xgboost_istanbul_data_del_train.csv") 68 | test_predictions.to_csv("point_forecasts/xgboost_istanbul_data_del_test.csv") 69 | 70 | -------------------------------------------------------------------------------- /xgboost_istanbul_data_mean_sdsh.py: -------------------------------------------------------------------------------- 1 | from numpy import asarray 2 | from xgboost import XGBRegressor 3 | import pandas as pd 4 | import numpy as np 5 | 6 | 7 | # split a univariate dataset into train/test sets 8 | def train_test_split(data, n_test): 9 | return data[:-n_test, :], data[-n_test:, :] 10 | 11 | # fit an xgboost model and make a one step prediction 12 | def xgboost_forecast(train, testX): 13 | train = asarray(train) 14 | trainX, trainy = train[:, :-1], train[:, -1] 15 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000) 16 | model.fit(trainX, trainy) 17 | yhat = model.predict(testX) 18 | return yhat 19 | 20 | # walk-forward validation for univariate data 21 | def walk_forward_validation(data, n_test): 22 | train_predictions = [] 23 | test_predictions = [] 24 | train, test = train_test_split(data, n_test) 25 | history = [x for x in train] 26 | train = asarray(train) 27 | trainX, trainy = train[:, :-1], train[:, -1] 28 | testX, testy = test[:, :-1], test[:, -1] 29 | yhat_train = xgboost_forecast(history, trainX) 30 | train_predictions.append(yhat_train) 31 | yhat_test = xgboost_forecast(history, testX) 32 | test_predictions.append(yhat_test) 33 | return train_predictions, test_predictions 34 | 35 | 36 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sdsh.csv')[['NUMBER_OF_VEHICLES']] 37 | train_size = 7995 38 | test_size = 3500 39 | data2 = pd.DataFrame(data.values) 40 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 41 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 42 | 43 | values = data3.values 44 | data = values[169:,:] 45 | train_predictions, test_predictions = walk_forward_validation(data, 3500) 46 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1)) 47 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1)) 48 | train_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sdsh_train.csv") 49 | test_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sdsh_test.csv") 50 | 51 | -------------------------------------------------------------------------------- /xgboost_istanbul_data_mean_sh.py: -------------------------------------------------------------------------------- 1 | from numpy import asarray 2 | from xgboost import XGBRegressor 3 | import pandas as pd 4 | import numpy as np 5 | 6 | 7 | # split a univariate dataset into train/test sets 8 | def train_test_split(data, n_test): 9 | return data[:-n_test, :], data[-n_test:, :] 10 | 11 | # fit an xgboost model and make a one step prediction 12 | def xgboost_forecast(train, testX): 13 | train = asarray(train) 14 | trainX, trainy = train[:, :-1], train[:, -1] 15 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000) 16 | model.fit(trainX, trainy) 17 | yhat = model.predict(testX) 18 | return yhat 19 | 20 | # walk-forward validation for univariate data 21 | def walk_forward_validation(data, n_test): 22 | train_predictions = [] 23 | test_predictions = [] 24 | train, test = train_test_split(data, n_test) 25 | history = [x for x in train] 26 | train = asarray(train) 27 | trainX, trainy = train[:, :-1], train[:, -1] 28 | testX, testy = test[:, :-1], test[:, -1] 29 | yhat_train = xgboost_forecast(history, trainX) 30 | train_predictions.append(yhat_train) 31 | yhat_test = xgboost_forecast(history, testX) 32 | test_predictions.append(yhat_test) 33 | return train_predictions, test_predictions 34 | 35 | 36 | data = pd.read_csv('data/istanbul/istanbul_data_mean_sh.csv')[['NUMBER_OF_VEHICLES']] 37 | train_size = 7995 38 | test_size = 3500 39 | data2 = pd.DataFrame(data.values) 40 | data3 = pd.concat([data2.shift(169),data2.shift(168),data2.shift(167),data2.shift(25),data2.shift(24),data2.shift(23),data2.shift(2),data2.shift(1),data2], axis=1) 41 | data3.columns = ['t-169','t-168','t-167','t-25','t-24','t-23','t-2','t-1', 't'] 42 | 43 | values = data3.values 44 | data = values[169:,:] 45 | train_predictions, test_predictions = walk_forward_validation(data, 3500) 46 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1)) 47 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1)) 48 | train_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sh_train.csv") 49 | test_predictions.to_csv("point_forecasts/xgboost_istanbul_data_mean_sh_test.csv") 50 | 51 | -------------------------------------------------------------------------------- /xgboost_oems_717087.py: -------------------------------------------------------------------------------- 1 | from numpy import asarray 2 | from xgboost import XGBRegressor 3 | import pandas as pd 4 | import numpy as np 5 | 6 | 7 | 8 | """# transform a time series dataset into a supervised learning dataset 9 | def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): 10 | n_vars = 1 if type(data) is list else data.shape[1] 11 | df = DataFrame(data) 12 | cols = list() 13 | for i in range(n_in, 0, -1): 14 | cols.append(df.shift(i)) 15 | for i in range(0, n_out): 16 | cols.append(df.shift(-i)) 17 | agg = concat(cols, axis=1) 18 | if dropnan: 19 | agg.dropna(inplace=True) 20 | return agg.values""" 21 | 22 | # split a univariate dataset into train/test sets 23 | def train_test_split(data, n_test): 24 | return data[:-n_test, :], data[-n_test:, :] 25 | 26 | # fit an xgboost model and make a one step prediction 27 | def xgboost_forecast(train, testX): 28 | train = asarray(train) 29 | trainX, trainy = train[:, :-1], train[:, -1] 30 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000) 31 | model.fit(trainX, trainy) 32 | yhat = model.predict(testX) 33 | return yhat 34 | 35 | # walk-forward validation for univariate data 36 | def walk_forward_validation(data, n_test): 37 | train_predictions = [] 38 | test_predictions = [] 39 | train, test = train_test_split(data, n_test) 40 | history = [x for x in train] 41 | train = asarray(train) 42 | trainX, trainy = train[:, :-1], train[:, -1] 43 | testX, testy = test[:, :-1], test[:, -1] 44 | yhat_train = xgboost_forecast(history, trainX) 45 | train_predictions.append(yhat_train) 46 | yhat_test = xgboost_forecast(history, testX) 47 | test_predictions.append(yhat_test) 48 | return train_predictions, test_predictions 49 | 50 | 51 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station717087-15min.csv')[['Total Flow']] 52 | data2 = pd.DataFrame(data.values) 53 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1) 54 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t'] 55 | train_size = 17672 56 | test_size = 7863 57 | 58 | values = data3.values 59 | data = values[673:,:] 60 | train_predictions, test_predictions = walk_forward_validation(data, 7863) 61 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1)) 62 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1)) 63 | train_predictions.to_csv("point_forecasts/xgboost_pems_717087_train.csv") 64 | test_predictions.to_csv("point_forecasts/xgboost_pems_717087_test.csv") 65 | 66 | -------------------------------------------------------------------------------- /xgboost_pems_716933.py: -------------------------------------------------------------------------------- 1 | from numpy import asarray 2 | from xgboost import XGBRegressor 3 | import pandas as pd 4 | import numpy as np 5 | 6 | 7 | 8 | # split a univariate dataset into train/test sets 9 | def train_test_split(data, n_test): 10 | return data[:-n_test, :], data[-n_test:, :] 11 | 12 | # fit an xgboost model and make a one step prediction 13 | def xgboost_forecast(train, testX): 14 | train = asarray(train) 15 | trainX, trainy = train[:, :-1], train[:, -1] 16 | model = XGBRegressor(objective='reg:squarederror', n_estimators=1000) 17 | model.fit(trainX, trainy) 18 | yhat = model.predict(testX) 19 | return yhat 20 | 21 | # walk-forward validation for univariate data 22 | def walk_forward_validation(data, n_test): 23 | train_predictions = [] 24 | test_predictions = [] 25 | train, test = train_test_split(data, n_test) 26 | history = [x for x in train] 27 | train = asarray(train) 28 | trainX, trainy = train[:, :-1], train[:, -1] 29 | testX, testy = test[:, :-1], test[:, -1] 30 | yhat_train = xgboost_forecast(history, trainX) 31 | train_predictions.append(yhat_train) 32 | yhat_test = xgboost_forecast(history, testX) 33 | test_predictions.append(yhat_test) 34 | return train_predictions, test_predictions 35 | 36 | 37 | data = pd.read_csv('data/pems/pems-d07-9months-2021-station716933-15min.csv')[['Total Flow']] 38 | data2 = pd.DataFrame(data.values) 39 | data3 = pd.concat([data2.shift(673),data2.shift(672),data2.shift(671),data2.shift(97),data2.shift(96),data2.shift(95),data2.shift(2),data2.shift(1),data2], axis=1) 40 | data3.columns = ['t-673','t-672','t-671','t-97','t-96','t-95','t-2','t-1','t'] 41 | train_size = 17672 42 | test_size = 7863 43 | 44 | values = data3.values 45 | data = values[673:,:] 46 | train_predictions, test_predictions = walk_forward_validation(data, 7863) 47 | train_predictions = pd.DataFrame(np.array(train_predictions).reshape(train_size,1)) 48 | test_predictions = pd.DataFrame(np.array(test_predictions).reshape(test_size,1)) 49 | train_predictions.to_csv("point_forecasts/xgboost_pems_716933_train.csv") 50 | test_predictions.to_csv("point_forecasts/xgboost_pems_716933_test.csv") 51 | 52 | --------------------------------------------------------------------------------