├── .gitignore ├── README.md ├── data ├── processed │ └── README.md └── raw │ └── README.md ├── models ├── bilstm │ ├── demand │ │ └── q_all_bilstm │ │ │ └── demand_bilstm.h5 │ ├── price │ │ └── q_all_bilstm │ │ │ └── price_bilstm.h5 │ ├── solar │ │ └── q_all_bilstm │ │ │ └── solar_bilstm.h5 │ └── wind │ │ └── q_all_bilstm │ │ └── wind_bilstm.h5 ├── seq2seq+temporal+spatial │ ├── demand │ │ └── q_all_seq2seq+temporal+spatial │ │ │ ├── demand_main.h5 │ │ │ ├── demand_spatial_enc.h5 │ │ │ └── demand_temporal_enc.h5 │ ├── solar │ │ └── q_all_seq2seq+temporal+spatial │ │ │ ├── solar_main.h5 │ │ │ ├── solar_spatial_enc.h5 │ │ │ └── solar_temporal_enc.h5 │ └── wind │ │ └── q_all_seq2seq+temporal+spatial │ │ ├── wind_main.h5 │ │ ├── wind_spatial_enc.h5 │ │ └── wind_temporal_enc.h5 ├── seq2seq+temporal │ ├── demand │ │ └── q_all_seq2seq+temporal │ │ │ ├── demand_seq2seq+temporal.h5 │ │ │ └── demand_seq2seq+temporal_enc.h5 │ ├── price │ │ └── q_all_seq2seq+temporal │ │ │ ├── price_seq2seq+temporal.h5 │ │ │ └── price_seq2seq+temporal_enc.h5 │ ├── solar │ │ └── q_all_seq2seq+temporal │ │ │ ├── solar_seq2seq+temporal.h5 │ │ │ └── solar_seq2seq+temporal_enc.h5 │ └── wind │ │ └── q_all_seq2seq+temporal │ │ ├── wind_seq2seq+temporal.h5 │ │ └── wind_seq2seq+temporal_enc.h5 └── seq2seq │ ├── demand │ └── q_all_seq2seq │ │ └── demand_seq2seq.h5 │ ├── price │ └── q_all_seq2seq │ │ └── price_seq2seq.h5 │ ├── solar │ └── q_all_seq2seq │ │ └── solar_seq2seq.h5 │ └── wind │ └── q_all_seq2seq │ └── wind_seq2seq.h5 ├── requirements.txt ├── results ├── demand │ ├── attention_plot_results_demand.csv │ ├── bilstm │ │ ├── forecasted_time_series_demand_bilstm.pkl │ │ ├── preformance_summary_demand_bilstm.csv │ │ ├── q_all_bilstm │ │ │ └── demand_bilstm.h5 │ │ └── quantile_prediction_results_demand_bilstm.csv │ ├── seq2seq+temporal+spatial │ │ ├── forecasted_time_series_demand_seq2seq+temporal+spatial.pkl │ │ ├── preformance_summary_demand_seq2seq+temporal+spatial.csv │ │ └── quantile_prediction_results_demand_seq2seq+temporal+spatial.csv │ ├── seq2seq+temporal │ │ ├── forecasted_time_series_demand_seq2seq+temporal.pkl │ │ ├── preformance_summary_demand_seq2seq+temporal.csv │ │ └── quantile_prediction_results_demand_seq2seq+temporal.csv │ └── seq2seq │ │ ├── forecasted_time_series_demand_seq2seq.pkl │ │ ├── preformance_summary_demand_seq2seq.csv │ │ └── quantile_prediction_results_demand_seq2seq.csv ├── price │ ├── attention_plot_results_price.csv │ ├── bilstm │ │ ├── forecasted_time_series_price_bilstm.pkl │ │ └── preformance_summary_price_bilstm.csv │ ├── seq2seq+temporal+spatial │ │ ├── forecasted_time_series_price_seq2seq+temporal+spatial.pkl │ │ └── preformance_summary_price_seq2seq+temporal+spatial.csv │ ├── seq2seq+temporal │ │ ├── attention_data_price_seq2seq+temporal.pkl │ │ ├── forecasted_time_series_price_seq2seq+temporal.pkl │ │ ├── preformance_summary_price_seq2seq+temporal.csv │ │ └── quantile_prediction_results_price_seq2seq+temporal.csv │ └── seq2seq │ │ ├── forecasted_time_series_price_seq2seq.pkl │ │ ├── preformance_summary_price_seq2seq.csv │ │ └── quantile_prediction_results_price_seq2seq.csv ├── solar │ ├── attention_plot_results_solar.csv │ ├── bilstm │ │ ├── forecasted_time_series_solar_bilstm.pkl │ │ ├── preformance_summary_solar_bilstm.csv │ │ └── quantile_prediction_results_solar_bilstm.csv │ ├── seq2seq+temporal+spatial │ │ ├── forecasted_time_series_solar_seq2seq+temporal+spatial.pkl │ │ ├── preformance_summary_solar_seq2seq+temporal+spatial.csv │ │ ├── quantile_prediction_results_solar_seq2seq+temporal+spatial.csv │ │ └── spatial_attention_data_solar.pkl │ ├── seq2seq+temporal │ │ ├── forecasted_time_series_solar_seq2seq+temporal.pkl │ │ ├── preformance_summary_solar_seq2seq+temporal.csv │ │ └── quantile_prediction_results_solar_seq2seq+temporal.csv │ └── seq2seq │ │ ├── forecasted_time_series_solar_seq2seq.pkl │ │ └── preformance_summary_solar_seq2seq.csv └── wind │ ├── attention_plot_results_wind.csv │ ├── bilstm │ ├── forecasted_time_series_wind_bilstm.pkl │ └── preformance_summary_wind_bilstm.csv │ ├── seq2seq+temporal+spatial │ ├── forecasted_time_series_wind_seq2seq+temporal+spatial.pkl │ ├── preformance_summary_wind_seq2seq+temporal+spatial.csv │ └── quantile_prediction_results_wind_seq2seq+temporal+spatial.csv │ ├── seq2seq+temporal │ ├── forecasted_time_series_wind_seq2seq+temporal.pkl │ ├── preformance_summary_wind_seq2seq+temporal.csv │ └── quantile_prediction_results_wind_seq2seq+temporal.csv │ └── seq2seq │ ├── forecasted_time_series_wind_seq2seq.pkl │ ├── preformance_summary_wind_seq2seq.csv │ └── quantile_prediction_results_wind_seq2seq.csv ├── scripts ├── models │ ├── _shared │ │ ├── __pycache__ │ │ │ ├── attention_layer.cpython-38.pyc │ │ │ └── timeseries_data_generator.cpython-38.pyc │ │ ├── attention_layer.py │ │ └── timeseries_data_generator.py │ ├── bilstm_model.py │ ├── inference+testing │ │ ├── bilstm_seq2seq_predictions.py │ │ └── inference_model_seq2seq+spatial+temporal_attn.py │ ├── seq2seq+spatial+temporal_attn.py │ ├── seq2seq+temporal_attn.py │ └── seq2seq_model.py ├── postprocessing │ ├── d3_scripts │ │ ├── Context_graph.js │ │ └── forecasting_graph.js │ ├── format_results_Qforecast_plot.py │ ├── format_results_attn_plot.py │ ├── results_summary.py │ └── spatial_attention_plots.py └── preprocessing │ ├── ERA5_downloader.py │ ├── __pycache__ │ └── preprocessing_funcs.cpython-38.pyc │ ├── data_preprocessing_demand.py │ ├── data_preprocessing_price.py │ ├── data_preprocessing_solar.py │ ├── data_preprocessing_wind.py │ └── preprocessing_funcs.py └── visualisations ├── cloud_cover_(input)_animation.gif ├── d3_quantile_plot_examples.png ├── d3_temporal_attention_plot_demand.png ├── d3_temporal_attention_plot_price.png ├── d3_temporal_attention_plot_solar.png ├── d3_temporal_attention_plot_wind.png ├── memory_leak_test.png ├── model_architecture_schematic.svg ├── model_architecture_schematic_markup.png ├── performance_breakdown_markup.png ├── solar_spatial_attentions_animation.gif └── tabular_performance_breakdown.png /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Probabilistic Forecasting of Renewable Energy Generation and Wholesale Market Prices Using Quantile Regression in Keras 2 | :rocket: Blog post on personal website :link: [Probabilistic Forecasting of Renewable Generation & Wholesale Prices with Quantile-Regression](https://richardfindlay.co.uk/probabilistic-forecasting-of-renewable-generation-and-wholesale-prices-with-quantile-regression-2) 3 | 4 |

5 | 6 | screenshot of interactive d3.js plots illustrating probabilistic forecasting performance 7 |

8 | 9 | ### Project Description :open_book:: 10 | This repository demonstrates the use of deep learning techniques in combination with quantile regression to produce probabilistic forecasts. The above figure depicts the consecutive DA quantile forecasts for each of the investigated variables over one week, with further quantification and discussion given on the forecast performance given in the accompanying [blog post](https://richardfindlay.co.uk/probabilistic-forecasting-of-renewable-generation-and-wholesale-prices-with-quantile-regression-2). 11 | 12 | The code investigates the performance of four different deep-learning architectures; Bi-directional LSTM, Seq-2-Seq, Seq-2-Seq with Temporal Attention and Seq-2-Seq with Temporal and Spatial Attention. To help give context, comparisons are made to a simplistic daily persistence forecasting technique, as well as to the Transmission System Operator's forecast (TSO). The models are predicated off the notion that there is an increased complexity added at each iteration, which accompanied the hypothesis that an increased performance should be observed between each iteration, which was not the case when test performance was investigated. 13 | 14 |

15 | 16 | model architecture schematic for encoder-decoder with spatial and temporal attention mechanisms as implemented in keras 17 |

18 | 19 | ### Performance Overview :racing_car:: 20 | The above figure illustrates the pinnacle of the model complexity investigated as part of this project. With both temporal and spatial attention mechanisms, the novel encoder-decoder architecture does not always prevail as the best preforming technique but shows encourging performance and may merit further investigation and fine-tuning. 21 | 22 |

23 | 24 |

25 | 26 | The above plot illustrates the performance of the temporal attention mechanism for the prior 7-days of features inputted into the model, the attention weights show there's a recognition of temporal patterns within the data, paying particular attention to the previous day for the proceeding forecast. Similarly, the below gif depicts the performance of the spatial attention weights in the solar generation forecast, again this shows some promising indication of the mechanism recognising the influence of solar irradiance to the forecast. 27 | 28 |

29 | 30 |

31 | 32 | Quantative performance breakdown of all investigated deep learning architectures, given below, alongside TSO and persistence forecasting performances. 33 | 34 |

35 | 36 |

37 | 38 | ### Notes on Code :notebook:: 39 | Install python dependencies for repository: 40 | ``` 41 | $ pip install -r requirements.txt 42 | ``` 43 | 44 | :weight_lifting: Training for all models was conducted on a Google Colab Pro+ subscription. 45 | 46 | ### Further Work :telescope:: 47 | - [ ] Insightfulness of study could be broadened by analysing additional ML architectures alongside the variations of RNNs examined here, particularly XGBoost and transformers. 48 | - [ ] Problem pushes limitations of high-level DL frameworks, adopting Pytorch or Tensorflow could allow for increased efficiency and performance. 49 | 50 | ### To Do :test_tube:: 51 | - [ ] Code links and references to be validated since re-organisation. 52 | - [ ] Clean code, especially interactive d3 plots. 53 | - [ ] Further validate environments and optimisation scripts. 54 | 55 | ### Resources :gem:: 56 | + [https://www.elexon.co.uk/documents/training-guidance/bsc-guidance-notes/bmrs-api-and-data-push-user-guide-2/](https://www.elexon.co.uk/documents/training-guidance/bsc-guidance-notes/bmrs-api-and-data-push-user-guide-2/) 57 | + [https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly](https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly) 58 | + [https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview) 59 | + [https://colah.github.io/posts/2015-08-Understanding-LSTMs](https://colah.github.io/posts/2015-08-Understanding-LSTMs) 60 | + [https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html](https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html) 61 | + [https://colab.research.google.com/github/kmkarakaya/ML_tutorials/blob/master/seq2seq_Part_D_Encoder_Decoder_with_Teacher_Forcing.ipynb](https://colab.research.google.com/github/kmkarakaya/ML_tutorials/blob/master/seq2seq_Part_D_Encoder_Decoder_with_Teacher_Forcing.ipynb) 62 | -------------------------------------------------------------------------------- /data/processed/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data/raw/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/bilstm/demand/q_all_bilstm/demand_bilstm.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/demand/q_all_bilstm/demand_bilstm.h5 -------------------------------------------------------------------------------- /models/bilstm/price/q_all_bilstm/price_bilstm.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/price/q_all_bilstm/price_bilstm.h5 -------------------------------------------------------------------------------- /models/bilstm/solar/q_all_bilstm/solar_bilstm.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/solar/q_all_bilstm/solar_bilstm.h5 -------------------------------------------------------------------------------- /models/bilstm/wind/q_all_bilstm/wind_bilstm.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/wind/q_all_bilstm/wind_bilstm.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_main.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_main.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_spatial_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_spatial_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_temporal_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_temporal_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_main.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_main.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_spatial_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_spatial_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_temporal_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_temporal_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_main.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_main.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_spatial_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_spatial_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_temporal_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_temporal_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal.h5 -------------------------------------------------------------------------------- /models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal_enc.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal_enc.h5 -------------------------------------------------------------------------------- /models/seq2seq/demand/q_all_seq2seq/demand_seq2seq.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/demand/q_all_seq2seq/demand_seq2seq.h5 -------------------------------------------------------------------------------- /models/seq2seq/price/q_all_seq2seq/price_seq2seq.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/price/q_all_seq2seq/price_seq2seq.h5 -------------------------------------------------------------------------------- /models/seq2seq/solar/q_all_seq2seq/solar_seq2seq.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/solar/q_all_seq2seq/solar_seq2seq.h5 -------------------------------------------------------------------------------- /models/seq2seq/wind/q_all_seq2seq/wind_seq2seq.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/wind/q_all_seq2seq/wind_seq2seq.h5 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.11.0 2 | aiohttp==3.7.4 3 | alabaster==0.7.12 4 | alpaca==1.0.0 5 | alpaca-trade-api==1.4.0 6 | appdirs==1.4.4 7 | arrow==0.17.0 8 | astunparse==1.6.3 9 | asv==0.4.2 10 | async-timeout==3.0.1 11 | attrs==20.3.0 12 | Babel==2.9.0 13 | beautifulsoup4==4.9.3 14 | black==20.8b1 15 | boto3==1.16.23 16 | botocore==1.19.23 17 | cachetools==4.1.1 18 | cdsapi==0.5.1 19 | certifi==2020.6.20 20 | cfgv==3.2.0 21 | cftime==1.1.3 22 | chardet==3.0.4 23 | click==7.1.2 24 | cloudpickle==1.6.0 25 | coloredlogs==15.0 26 | configparser==5.0.1 27 | cycler==0.10.0 28 | decorator==4.4.2 29 | distlib==0.3.1 30 | docopt==0.6.2 31 | docutils==0.16 32 | ez-setup==0.9 33 | filelock==3.0.12 34 | flatbuffers==1.12 35 | FLORIS==2.2.3 36 | future==0.18.2 37 | gast==0.3.3 38 | google-auth==1.18.0 39 | google-auth-oauthlib==0.4.1 40 | google-pasta==0.2.0 41 | grpcio==1.32.0 42 | gym==0.18.0 43 | h5py==2.10.0 44 | humanfriendly==9.1 45 | identify==1.5.12 46 | idna==2.10 47 | ImageHash==4.2.0 48 | imageio==2.9.0 49 | imageio-ffmpeg==0.4.3 50 | imagesize==1.2.0 51 | iniconfig==1.1.1 52 | iris==1.0.7 53 | Jinja2==2.11.2 54 | jmespath==0.10.0 55 | joblib==0.16.0 56 | Js2Py==0.70 57 | Keras==2.4.3 58 | Keras-Preprocessing==1.1.2 59 | kiwisolver==1.2.0 60 | lxml==4.6.3 61 | Markdown==3.2.2 62 | MarkupSafe==1.1.1 63 | matplotlib==3.3.0 64 | mdolab-baseclasses==1.4.0 65 | memory-profiler==0.57.0 66 | moviepy==1.0.3 67 | msgpack==1.0.2 68 | multidict==5.2.0 69 | mypy-extensions==0.4.3 70 | netCDF4==1.5.3 71 | nodeenv==1.5.0 72 | nose==1.3.7 73 | numpy==1.21.2 74 | oauthlib==3.1.0 75 | opt-einsum==3.3.0 76 | packaging==20.8 77 | pandas==1.2.3 78 | pathspec==0.8.1 79 | petsc==3.14.3 80 | petsc4py==3.14.1 81 | Pillow==6.2.2 82 | pipwin==0.5.0 83 | pluggy==0.13.1 84 | pockets==0.9.1 85 | pre-commit==2.9.3 86 | proglog==0.1.9 87 | proj==0.2.0 88 | protobuf==3.12.2 89 | psutil==5.7.2 90 | py==1.10.0 91 | pyasn1==0.4.8 92 | pyasn1-modules==0.2.8 93 | pyglet==1.5.0 94 | Pygments==2.7.4 95 | pyjsparser==2.7.1 96 | pyparsing==2.4.7 97 | PyPrind==2.11.2 98 | pyproj==3.0.0.post1 99 | pySmartDL==1.3.4 100 | pytest==6.2.1 101 | python-dateutil==2.8.1 102 | pytz==2020.1 103 | PyWavelets==1.1.1 104 | PyYAML==5.4.1 105 | regex==2020.11.13 106 | requests==2.24.0 107 | requests-oauthlib==1.3.0 108 | rsa==4.6 109 | s3transfer==0.3.3 110 | scikit-learn==0.23.1 111 | scipy==1.6.2 112 | seaborn==0.11.1 113 | six==1.15.0 114 | sklearn==0.0 115 | snowballstemmer==2.0.0 116 | soupsieve==2.1 117 | Sphinx==3.4.3 118 | sphinx-copybutton==0.3.1 119 | sphinx-gallery==0.8.2 120 | sphinx-panels==0.5.2 121 | sphinx-rtd-theme==0.5.1 122 | sphinxcontrib-applehelp==1.0.2 123 | sphinxcontrib-devhelp==1.0.2 124 | sphinxcontrib-htmlhelp==1.0.3 125 | sphinxcontrib-jsmath==1.0.1 126 | sphinxcontrib-napoleon==0.7 127 | sphinxcontrib-qthelp==1.0.3 128 | sphinxcontrib-serializinghtml==1.1.4 129 | sqlitedict==1.7.0 130 | tensorboard==2.4.0 131 | tensorboard-plugin-wit==1.7.0 132 | tensorflow==2.4.1 133 | tensorflow-estimator==2.4.0 134 | termcolor==1.1.0 135 | threadpoolctl==2.1.0 136 | toml==0.10.2 137 | tqdm==4.59.0 138 | typed-ast==1.4.2 139 | typing-extensions==3.7.4.3 140 | tzlocal==2.1 141 | urllib3==1.25.9 142 | virtualenv==20.3.1 143 | websocket-client==1.2.1 144 | websockets==9.1 145 | Werkzeug==1.0.1 146 | wget==3.2 147 | wrapt==1.12.1 148 | yarl==1.7.0 149 | -------------------------------------------------------------------------------- /results/demand/bilstm/forecasted_time_series_demand_bilstm.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/bilstm/forecasted_time_series_demand_bilstm.pkl -------------------------------------------------------------------------------- /results/demand/bilstm/preformance_summary_demand_bilstm.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 91.43518519,90,1.435185185,1459.753738,4.733158189,1903.185308 -------------------------------------------------------------------------------- /results/demand/bilstm/q_all_bilstm/demand_bilstm.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/bilstm/q_all_bilstm/demand_bilstm.h5 -------------------------------------------------------------------------------- /results/demand/seq2seq+temporal+spatial/forecasted_time_series_demand_seq2seq+temporal+spatial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq+temporal+spatial/forecasted_time_series_demand_seq2seq+temporal+spatial.pkl -------------------------------------------------------------------------------- /results/demand/seq2seq+temporal+spatial/preformance_summary_demand_seq2seq+temporal+spatial.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 70.2119883,90,-19.7880117,1519.480105,4.961981752,1903.396953 -------------------------------------------------------------------------------- /results/demand/seq2seq+temporal/forecasted_time_series_demand_seq2seq+temporal.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq+temporal/forecasted_time_series_demand_seq2seq+temporal.pkl -------------------------------------------------------------------------------- /results/demand/seq2seq+temporal/preformance_summary_demand_seq2seq+temporal.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 86.70808967,90,-3.291910331,1457.563216,4.907357214,1890.307257 -------------------------------------------------------------------------------- /results/demand/seq2seq/forecasted_time_series_demand_seq2seq.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq/forecasted_time_series_demand_seq2seq.pkl -------------------------------------------------------------------------------- /results/demand/seq2seq/preformance_summary_demand_seq2seq.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 90.83820663,90,0.838206628,1315.00029,4.395793398,1715.146304 -------------------------------------------------------------------------------- /results/price/bilstm/forecasted_time_series_price_bilstm.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/bilstm/forecasted_time_series_price_bilstm.pkl -------------------------------------------------------------------------------- /results/price/bilstm/preformance_summary_price_bilstm.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 72.4537037,90,-17.5462963,6.375387473,inf,8.02840373 -------------------------------------------------------------------------------- /results/price/seq2seq+temporal+spatial/forecasted_time_series_price_seq2seq+temporal+spatial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal+spatial/forecasted_time_series_price_seq2seq+temporal+spatial.pkl -------------------------------------------------------------------------------- /results/price/seq2seq+temporal+spatial/preformance_summary_price_seq2seq+temporal+spatial.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 69.57846004,90,-20.42153996,7.507209148,inf,9.576368247 -------------------------------------------------------------------------------- /results/price/seq2seq+temporal/attention_data_price_seq2seq+temporal.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal/attention_data_price_seq2seq+temporal.pkl -------------------------------------------------------------------------------- /results/price/seq2seq+temporal/forecasted_time_series_price_seq2seq+temporal.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal/forecasted_time_series_price_seq2seq+temporal.pkl -------------------------------------------------------------------------------- /results/price/seq2seq+temporal/preformance_summary_price_seq2seq+temporal.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 80.17787524,90,-9.822124756,6.526100553,inf,8.47807083 -------------------------------------------------------------------------------- /results/price/seq2seq/forecasted_time_series_price_seq2seq.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq/forecasted_time_series_price_seq2seq.pkl -------------------------------------------------------------------------------- /results/price/seq2seq/preformance_summary_price_seq2seq.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 45.45565302,90,-44.54434698,6.49537643,inf,8.052062264 -------------------------------------------------------------------------------- /results/solar/bilstm/forecasted_time_series_solar_bilstm.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/bilstm/forecasted_time_series_solar_bilstm.pkl -------------------------------------------------------------------------------- /results/solar/bilstm/preformance_summary_solar_bilstm.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 92.10526316,90,2.105263158,327.3727615,,689.229032 -------------------------------------------------------------------------------- /results/solar/seq2seq+temporal+spatial/forecasted_time_series_solar_seq2seq+temporal+spatial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal+spatial/forecasted_time_series_solar_seq2seq+temporal+spatial.pkl -------------------------------------------------------------------------------- /results/solar/seq2seq+temporal+spatial/preformance_summary_solar_seq2seq+temporal+spatial.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 95.27290448,90,5.272904483,270.9945811,,585.2357481 -------------------------------------------------------------------------------- /results/solar/seq2seq+temporal+spatial/spatial_attention_data_solar.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal+spatial/spatial_attention_data_solar.pkl -------------------------------------------------------------------------------- /results/solar/seq2seq+temporal/forecasted_time_series_solar_seq2seq+temporal.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal/forecasted_time_series_solar_seq2seq+temporal.pkl -------------------------------------------------------------------------------- /results/solar/seq2seq+temporal/preformance_summary_solar_seq2seq+temporal.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 98.0994152,90,8.099415205,291.0802181,,613.8738376 -------------------------------------------------------------------------------- /results/solar/seq2seq/forecasted_time_series_solar_seq2seq.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq/forecasted_time_series_solar_seq2seq.pkl -------------------------------------------------------------------------------- /results/solar/seq2seq/preformance_summary_solar_seq2seq.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,PINAW,PINRW,MAE,MAPE,RMSE 2 | 96.0891812865497,89.99999999999999,6.089181286549717,7.249654247663978,1.6922971719864661,300.5354471646511,,655.2354701170318 3 | -------------------------------------------------------------------------------- /results/wind/bilstm/forecasted_time_series_wind_bilstm.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/bilstm/forecasted_time_series_wind_bilstm.pkl -------------------------------------------------------------------------------- /results/wind/bilstm/preformance_summary_wind_bilstm.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 87.8411306,90,-2.158869396,1318.7222,30.59062064,1760.6124 -------------------------------------------------------------------------------- /results/wind/seq2seq+temporal+spatial/forecasted_time_series_wind_seq2seq+temporal+spatial.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq+temporal+spatial/forecasted_time_series_wind_seq2seq+temporal+spatial.pkl -------------------------------------------------------------------------------- /results/wind/seq2seq+temporal+spatial/preformance_summary_wind_seq2seq+temporal+spatial.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 85.47758285,90,-4.522417154,1062.046295,21.25483009,1383.115011 -------------------------------------------------------------------------------- /results/wind/seq2seq+temporal/forecasted_time_series_wind_seq2seq+temporal.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq+temporal/forecasted_time_series_wind_seq2seq+temporal.pkl -------------------------------------------------------------------------------- /results/wind/seq2seq+temporal/preformance_summary_wind_seq2seq+temporal.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 90.09502924,90,0.09502924,998.0828,19.72910166,1320.7335 -------------------------------------------------------------------------------- /results/wind/seq2seq/forecasted_time_series_wind_seq2seq.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq/forecasted_time_series_wind_seq2seq.pkl -------------------------------------------------------------------------------- /results/wind/seq2seq/preformance_summary_wind_seq2seq.csv: -------------------------------------------------------------------------------- 1 | PICP,PINC,ACE,MAE,MAPE,RMSE 2 | 94.79775828,90,4.797758285,937.0275,19.33659166,1251.3384 -------------------------------------------------------------------------------- /scripts/models/_shared/__pycache__/attention_layer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/models/_shared/__pycache__/attention_layer.cpython-38.pyc -------------------------------------------------------------------------------- /scripts/models/_shared/__pycache__/timeseries_data_generator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/models/_shared/__pycache__/timeseries_data_generator.cpython-38.pyc -------------------------------------------------------------------------------- /scripts/models/_shared/attention_layer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.keras 3 | import tensorflow.keras.backend as K 4 | from tensorflow.keras import Model 5 | from tensorflow.keras.layers import Dense 6 | from tensorflow.keras.layers import Activation, concatenate, Dot 7 | 8 | 9 | 10 | class attention(tf.keras.layers.Layer): 11 | 12 | def __init__(self, hidden_units, **kwargs): 13 | # super(attention, self).__init__(hidden_units) 14 | self.hidden_units = hidden_units 15 | super(attention, self).__init__(**kwargs) 16 | 17 | 18 | def build(self, input_shape): 19 | 20 | input_dim = int(input_shape[-1]) 21 | 22 | self.attention_score_vec = Dense(64, name='attention_score_vec') 23 | self.h_t = Dense(64, name='ht') 24 | self.attention_score = Dot(axes=[1, 2], name='attention_score') 25 | self.attention_weight = Activation('softmax', name='attention_weight') 26 | self.context_vector = Dot(axes=[1, 1], name='context_vector') 27 | self.attention_vector = Dense(self.hidden_units, activation='tanh', name='attention_vector') 28 | 29 | super(attention, self).build(input_shape) 30 | 31 | def call(self, enc_output, enc_out, h_state, c_state): 32 | 33 | 34 | score_first_part = self.attention_score_vec(enc_output) 35 | # score_first_part dot last_hidden_state => attention_weights 36 | # (batch_size, time_steps, hidden_size) dot (batch_size, hidden_size) => (batch_size, time_steps) 37 | h_t = concatenate([h_state, enc_out[:,0,:]]) 38 | h_t = self.h_t(h_t) 39 | 40 | score = self.attention_score([h_t, score_first_part]) 41 | 42 | attention_weights = self.attention_weight(score) 43 | # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size) 44 | context_vector = self.context_vector([enc_output, attention_weights]) 45 | pre_activation = concatenate([context_vector, h_t]) 46 | attention_vector = self.attention_vector(pre_activation) 47 | 48 | attention_weights = K.expand_dims(attention_weights, axis=-1) 49 | attention_vector = K.expand_dims(attention_vector, axis=1) 50 | 51 | return [attention_weights, attention_vector] 52 | 53 | def compute_output_shape(self): 54 | return [(input_shape[0], Tx, 1), (input_shape[0], 1, n_s)] 55 | 56 | def get_config(self): 57 | config = super(attention, self).get_config() 58 | config.update({"hidden_units": self.hidden_units}) 59 | return config 60 | -------------------------------------------------------------------------------- /scripts/models/_shared/timeseries_data_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow 3 | 4 | # as adapted from: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly 5 | class DataGenerator(tensorflow.keras.utils.Sequence): 6 | 7 | def __init__(self, dataset_name, x_length, y_length, hidden_states, batch_size, shuffle): 8 | self.dataset_name = dataset_name 9 | self.batch_size = batch_size 10 | self.shuffle = shuffle 11 | self.n_s = hidden_states 12 | self.xlen = x_length 13 | self.ylen = y_length 14 | self.index_ref = 0 15 | self.on_epoch_end() 16 | 17 | def __len__(self): 18 | # 'number of batches per Epoch' 19 | return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size)) 20 | 21 | def __getitem__(self, index): 22 | 23 | # input and output indexes relative current batch size and data generator index reference 24 | input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))] 25 | output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))] 26 | 27 | # Generate data 28 | (X_train1, X_train2, X_train3, X_train4, s0, c0), y_train = self.__data_generation(input_indexes, output_indexes) 29 | 30 | # replicate labels for each quantile 31 | y_trues = [y_train for i in quantiles] 32 | 33 | # extend true values for spatial and temporal attention (only relavant if compiled model used for inference) 34 | # y_trues.extend([[], []]) 35 | 36 | return (X_train1, X_train2, X_train3, X_train4, s0, c0), (y_trues) # pass empty training outputs to extract extract attentions 37 | 38 | def on_epoch_end(self): 39 | # set length of indexes for each epoch 40 | self.input_indexes = np.arange(self.xlen) 41 | self.output_indexes = np.arange(self.ylen) 42 | 43 | if self.shuffle == True: 44 | np.random.shuffle(self.input_indexes) 45 | 46 | def to_sequence(self, x1, x2, x3, x4, y): 47 | # convert timeseries batch in sequences 48 | input_start, output_start = 0, 0 49 | 50 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], [] 51 | 52 | while (input_start + input_seq_size) <= len(x1): 53 | # offset handled during pre-processing 54 | input_end = input_start + input_seq_size 55 | output_end = output_start + output_seq_size 56 | 57 | # inputs 58 | seqX1.append(x1[input_start:input_end]) 59 | seqX2.append(x2[input_start:input_end]) 60 | 61 | # outputs 62 | seqX3.append(x3[output_start:output_end]) 63 | seqX4.append(x4[output_start:output_end]) 64 | seqY.append(y[output_start:output_end]) 65 | 66 | input_start += 1 67 | output_start += 1 68 | 69 | # convert to numpy arrays 70 | seqX1, seqX2, seqX3, seqX4, seqY = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY) 71 | 72 | return seqX1, seqX2, seqX3, seqX4, seqY 73 | 74 | def __data_generation(self, input_indexes, output_indexes): 75 | 76 | # load data for current batch 77 | f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r") 78 | X_train1 = f['train_set']['X1_train'][input_indexes] # main feature array 79 | X_train2 = f['train_set']['X2_train'][input_indexes] # input time features from feature engineering 80 | X_train3 = f['train_set']['X3_train'][output_indexes] # output time features from feature engineering 81 | 82 | # no spatial data if model is training for price forecasting 83 | if model_type != 'price': 84 | X_train4 = f['train_set']['X1_train'][output_indexes][:,:,:,1:] # all nwp features apart from the generation itself 85 | X_train4 = np.average(X_train4, axis=(1,2)) 86 | else: 87 | X_train4 = f['train_set']['X1_train'][output_indexes][:,1:] 88 | 89 | y_train = f['train_set']['y_train'][output_indexes] 90 | 91 | f.close() 92 | 93 | # convert to sequence data 94 | X_train1, X_train2, X_train3, X_train4, y_train = self.to_sequence(X_train1, X_train2, X_train3, X_train4, y_train) 95 | 96 | s0 = np.zeros((self.batch_size, self.n_s)) 97 | c0 = np.zeros((self.batch_size, self.n_s)) 98 | 99 | return (X_train1, X_train2, X_train3, X_train4, s0, c0), y_train -------------------------------------------------------------------------------- /scripts/models/bilstm_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys, os 3 | import h5py 4 | import tensorflow as tf 5 | import tensorflow.keras 6 | import tensorflow.keras.backend as K 7 | from tensorflow.keras import Model 8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape 10 | from keras.backend import sigmoid 11 | from keras.utils.generic_utils import get_custom_objects 12 | 13 | np.set_printoptions(threshold=sys.maxsize) 14 | tf.random.set_seed(180) 15 | 16 | ###########################################_____SET_MODEL_PARAMETERS_____############################################ 17 | model_type ="solar" 18 | 19 | # declare dataset file 20 | dataset_name = f'dataset_{model_type}.hdf5' 21 | 22 | # declare quantiles for model 23 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95] 24 | 25 | # get useful size parameters 26 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r") 27 | features = np.empty_like(f['train_set']['X1_train'][0]) 28 | times_in = np.empty_like(f['train_set']['X2_train'][0]) 29 | times_out = np.empty_like(f['train_set']['X3_train'][0]) 30 | labels = np.empty_like(f['train_set']['y_train'][0]) 31 | x_len = f['train_set']['X1_train'].shape[0] 32 | y_len = f['train_set']['y_train'].shape[0] 33 | f.close() 34 | 35 | # input / output sequence sizes 36 | input_seq_size = 336 37 | output_seq_size = 48 38 | n_s = 32 # number of hidden states used through model 39 | 40 | ###########################################_____DATA_GENERATOR_____################################################# 41 | 42 | # data generator input parameters - avoid shuffle in this case 43 | params = {'batch_size': 64, 44 | 'shuffle': False } 45 | 46 | class DataGenerator(tensorflow.keras.utils.Sequence): 47 | 48 | def __init__(self, dataset_name, x_length, y_length, batch_size, shuffle): 49 | self.dataset_name = dataset_name 50 | self.batch_size = batch_size 51 | self.shuffle = shuffle 52 | self.xlen = x_length 53 | self.ylen = y_length 54 | self.index_ref = 0 55 | self.on_epoch_end() 56 | 57 | def __len__(self): 58 | # 'number of batches per Epoch' 59 | # return int(np.floor((self.xlen - (input_seq_size-1)) / self.batch_size)) 60 | return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size)) 61 | 62 | def __getitem__(self, index): 63 | 64 | input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))] 65 | output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))] 66 | 67 | # Generate data 68 | (X_train1, X_train2), y_train = self.__data_generation(input_indexes, output_indexes) 69 | 70 | y_trues = [y_train for i in quantiles] 71 | 72 | return (X_train1, X_train2), (y_trues) # pass empty training outputs to extract extract attentions 73 | 74 | def on_epoch_end(self): 75 | # set length of indexes for each epoch 76 | self.input_indexes = np.arange(self.xlen) 77 | self.output_indexes = np.arange(self.ylen) 78 | 79 | if self.shuffle == True: 80 | np.random.shuffle(self.input_indexes) 81 | 82 | def to_sequence(self, x1, x2, y): 83 | # convert timeseries batch in sequences 84 | input_start, output_start = 0, 0 85 | 86 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], [] 87 | 88 | while (input_start + input_seq_size) <= len(x1): 89 | # offset handled during pre-processing 90 | input_end = input_start + input_seq_size 91 | output_end = output_start + output_seq_size 92 | 93 | # inputs 94 | seqX1.append(x1[input_start:input_end]) 95 | seqX2.append(x2[input_start:input_end]) 96 | 97 | # outputs 98 | seqY.append(y[output_start:output_end]) 99 | 100 | input_start += 1 101 | output_start += 1 102 | 103 | seqX1, seqX2, seqY = np.array(seqX1), np.array(seqX2), np.array(seqY) 104 | 105 | return seqX1, seqX2, seqY 106 | 107 | def __data_generation(self, input_indexes, output_indexes): 108 | 109 | f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r") 110 | 111 | X_train2 = f['train_set']['X2_train'][input_indexes] 112 | 113 | if model_type != 'price': 114 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:,:,:] 115 | X_train1 = np.average(X_train1, axis=(1,2)) 116 | else: 117 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:] 118 | 119 | 120 | y_train = f['train_set']['y_train'][output_indexes] 121 | # decoder_input = f['train_set']['y_train'][output_indexes] 122 | f.close() 123 | 124 | # convert to sequence data 125 | X_train1, X_train2, y_train = self.to_sequence(X_train1, X_train2, y_train) 126 | 127 | 128 | return (X_train1, X_train2), y_train 129 | 130 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, **params) 131 | 132 | ###########################################_____MODEL_ARCHITECTURE_____################################################# 133 | 134 | # cpature some more useful dimensions 135 | Tx = input_seq_size 136 | Ty = output_seq_size 137 | 138 | channels = features.shape[-1] 139 | 140 | times_in_dim = times_in.shape[-1] 141 | times_out_dim = times_out.shape[-1] 142 | 143 | # make custom activation - swish 144 | def swish(x, beta = 1): 145 | return (x * sigmoid(beta * x)) 146 | 147 | # add swish activation to keras 148 | get_custom_objects().update({'swish': Activation(swish)}) 149 | 150 | # define inputs for model 151 | x_input = Input(shape=(Tx, channels)) 152 | 153 | times_in = Input(shape=(Tx, times_in_dim)) 154 | times_out = Input(shape=(Ty, times_out_dim)) 155 | out_nwp = Input(shape=(Ty, channels-1)) 156 | s_state0 = Input(shape=(32,)) 157 | c_state0 = Input(shape=(32,)) 158 | 159 | # create empty list for outputs 160 | quantile_predictions = [] 161 | 162 | for q in quantiles: 163 | 164 | combined_inputs = concatenate([x_input, times_in], axis=-1, name=f'concat_q_{q}') 165 | 166 | layer1, _, _, _, _ = Bidirectional(LSTM(32, return_sequences = False, return_state = True), name=f'biLSTM_q_{q}')(combined_inputs) 167 | layer2 = Dense(48, name=f'dense1_q_{q}')(layer1) 168 | 169 | if model_type == 'solar': 170 | layer2 = Activation('relu', name=f'relu_act_q_{q}')(layer2) 171 | 172 | quantile_predictions.append(layer2) 173 | 174 | model = Model(inputs = [x_input, times_in], outputs = quantile_predictions) 175 | 176 | 177 | ###########################################_____MODEL_TRAINING_____################################################# 178 | 179 | #include clipvalue in optmisier 180 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001) 181 | 182 | # define loss for each quantile 183 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles] 184 | 185 | # append additional empty losses for temporal and spatial encoders 186 | # q_losses.append([None,None]) 187 | 188 | # compile and train model 189 | model.compile(loss = q_losses, optimizer= optimizer) 190 | print(model.summary()) 191 | model.fit(training_generator, epochs = 20) 192 | 193 | # save models - saving encoders individually for inference 194 | os.mkdir(f'../../models/bilstm/{model_type}') 195 | model.save(f'../../models/bilstm/{model_type}/{model_type}_bilstm.h5') 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | -------------------------------------------------------------------------------- /scripts/models/inference+testing/bilstm_seq2seq_predictions.py: -------------------------------------------------------------------------------- 1 | import keras 2 | from keras.models import load_model, model_from_json 3 | from keras.backend import sigmoid 4 | from tensorflow.keras.layers import Input, Activation, concatenate, Lambda 5 | import numpy as np 6 | import h5py 7 | from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error 8 | from pickle import load, dump 9 | import matplotlib.pyplot as plt 10 | from keras.utils.generic_utils import get_custom_objects 11 | 12 | 13 | from keras.utils.generic_utils import get_custom_objects# import custom classes 14 | from _shared.attention_layer import attention 15 | 16 | # script to produce test-set predictions for Bi-directional LSTM model 17 | 18 | # declare model type 19 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal 20 | 21 | # indicate model type 22 | forecast_var = 'price' 23 | 24 | # quantiles - needed for key references - ensure aligns with trained model 25 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95] 26 | 27 | # define swish function for use within comptile model 28 | def swish(x, beta = 1): 29 | return (x * sigmoid(beta * x)) 30 | 31 | # Below in place of swish you can take any custom key for the name 32 | get_custom_objects().update({'swish': Activation(swish)}) 33 | 34 | # load trainined model 35 | if model_type != 'seq2seq+temporal': 36 | model = load_model(f'../../../Models/{model_type}/{forecast_var}/q_all_{model_type}/{forecast_var}_{model_type}.h5', custom_objects = {'': lambda y,f: defined_loss(q,y,f)}) 37 | else: 38 | model = load_model(f'../../../Models/{model_type}/{forecast_var}/q_all_{model_type}/{forecast_var}_{model_type}.h5', custom_objects = {'': lambda y,f: defined_loss(q,y,f), 'attention': attention, 'Activation': Activation(swish)}) 39 | 40 | # load time references 41 | with open(f'../../../data/processed/{forecast_var}/time_refs_{forecast_var}_v2.pkl', 'rb') as time_file: 42 | time_refs = load(time_file) 43 | 44 | input_times = time_refs[f'input_times_test'] 45 | output_times = time_refs[f'output_times_test'] 46 | 47 | time_file.close() 48 | 49 | # load and process data 50 | f = h5py.File(f"../../../data/processed/{forecast_var}/dataset_{forecast_var}.hdf5", "r") 51 | 52 | set_type = 'test' 53 | X_train1 = f[f'{set_type}_set'][f'X1_{set_type}'] 54 | X_train2 = f[f'{set_type}_set'][f'X2_{set_type}'] 55 | X_train3 = f[f'{set_type}_set'][f'X3_{set_type}'] 56 | X_train4 = f[f'{set_type}_set'][f'X1_{set_type}'] 57 | y_train = f[f'{set_type}_set'][f'y_{set_type}'] 58 | 59 | input_seq_size = 336 60 | output_seq_size = 48 61 | 62 | input_start, output_start = 0, input_seq_size 63 | 64 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], [] 65 | 66 | times_in, times_out = [], [] 67 | 68 | # sequence the data 69 | while (output_start + output_seq_size) <= len(y_train): 70 | # offset handled during pre-processing 71 | input_end = input_start + input_seq_size 72 | output_end = output_start + output_seq_size 73 | 74 | # inputs 75 | seqX1.append(X_train1[input_start:input_end]) 76 | seqX2.append(X_train2[input_start:input_end]) 77 | 78 | times_in.append(input_times[input_start:input_end]) 79 | 80 | # outputs 81 | seqY.append(y_train[output_start:output_end]) 82 | seqX3.append(X_train3[output_start:output_end]) 83 | seqX4.append(X_train4[output_start:output_end]) 84 | times_out.append(output_times[output_start:output_end]) 85 | 86 | input_start += output_seq_size 87 | output_start += output_seq_size 88 | 89 | x1, x2, x3, x4, y = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY) 90 | times_in, times_out = np.array(times_in), np.array(times_out) 91 | 92 | f.close() 93 | 94 | # load scaler 95 | scaler = load(open(f'../../../data/processed/{forecast_var}/_scaler/scaler_{forecast_var}.pkl', 'rb')) 96 | 97 | # average inputs over spatial dimensions 98 | if forecast_var != 'price': 99 | if model_type != 'seq2seq+temporal': 100 | x1 = np.average(x1, axis=(2,3)) 101 | 102 | x4 = np.average(x4, axis=(2,3)) 103 | x4 = x4[:,:,1:] 104 | else: 105 | x4 = x4[:,:,:-1] 106 | 107 | # cache test set length 108 | test_len = y.shape[0] 109 | 110 | # delcare intial hidden states 111 | s0 = np.zeros((y.shape[0], 32,)) 112 | c0 = np.zeros((y.shape[0], 32,)) 113 | 114 | 115 | print('predicting') 116 | if model_type == 'bilstm': 117 | results = model.predict([x1, x2]) 118 | elif model_type == 'seq2seq+temporal': 119 | results = model.predict([x1, x2, x3, x4, s0, c0]) 120 | quantile_temporal_attns = results[-1] 121 | else: 122 | results = model.predict([x1, x2, x3, x4]) 123 | 124 | 125 | results_dict = {} 126 | 127 | # inverse transform predictions + transfer to dictionary 128 | for idx in range(len(quantiles)): 129 | results_dict[str(quantiles[idx])] = scaler.inverse_transform(results[idx].reshape(-1,1)).reshape(test_len, output_seq_size, 1) 130 | 131 | # inverse transform true values 132 | y_true = scaler.inverse_transform(y.reshape(-1,1)).reshape(test_len, output_seq_size, 1) 133 | 134 | # create time_refs dictionary 135 | times_refs = {'input_times': times_in, 'output_times': times_out} 136 | 137 | # create results dictionary for performance analysis / plotting 138 | results_dict['time_refs'] = times_refs 139 | results_dict['y_true'] = y_true 140 | 141 | print(results_dict.keys()) 142 | 143 | # save results - forecasted timeseries matrix 144 | with open(f'../../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'wb') as ts_file: 145 | dump(results_dict, ts_file) 146 | 147 | # save results - forecasted tempotal attention matrix 148 | if (model_type == 'seq2seq+temporal') and (forecast_var == 'price'): 149 | 150 | # construct attention results dictionary 151 | attention_results = {} 152 | attention_results['0.5'] = quantile_temporal_attns 153 | attention_results['time_refs'] = times_refs 154 | attention_results['input_features'] = x1 155 | 156 | with open(f'../../../results/{forecast_var}/{model_type}/attention_data_{forecast_var}_seq2seq+temporal.pkl', 'wb') as attention_file: 157 | dump(attention_results, attention_file) 158 | 159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /scripts/models/inference+testing/inference_model_seq2seq+spatial+temporal_attn.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | import sys 5 | from sklearn.preprocessing import MinMaxScaler 6 | import tensorflow as tf 7 | from keras.models import load_model 8 | from keras import Model 9 | import tensorflow.keras 10 | import tensorflow.keras.backend as K 11 | from tensorflow.keras.layers import Input, Activation, concatenate, Lambda 12 | from tensorflow.keras.layers import Reshape 13 | from keras.callbacks import ModelCheckpoint 14 | from keras.backend import sigmoid 15 | from keras.utils.generic_utils import get_custom_objects 16 | from pickle import load 17 | import matplotlib.pyplot as plt 18 | import scipy 19 | from sklearn.metrics import mean_absolute_error, mean_squared_error 20 | import h5py 21 | 22 | import matplotlib.pyplot as plt 23 | import matplotlib.gridspec as gridspec 24 | from matplotlib.animation import FuncAnimation 25 | import seaborn as sns 26 | from pickle import dump, load 27 | 28 | import geopandas 29 | import contextily as ctx 30 | 31 | # import custom classes 32 | from _shared.attention_layer import attention 33 | 34 | 35 | 36 | # choose model type to run test for 37 | model_type ="solar" 38 | 39 | # declare dataset file 40 | dataset_name = f'dataset_{model_type}.hdf5' 41 | 42 | # choose to activate plot functions 43 | plot_temporal_attention = False 44 | plot_spatial_attention = False 45 | 46 | # declare quantiles 47 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95] 48 | 49 | # index to declare which test result to plot 50 | plot_ref = 0 51 | 52 | # load scaler 53 | scaler = load(open(f'../../data/processed/{model_type}/_scaler/scaler_{model_type}.pkl', 'rb')) 54 | 55 | # collect param sizes 56 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r") 57 | features = np.empty_like(f['train_set']['X1_train'][0]) 58 | times_in = np.empty_like(f['train_set']['X2_train'][0]) 59 | times_out = np.empty_like(f['train_set']['X3_train'][0]) 60 | labels = np.empty_like(f['train_set']['y_train'][0]) 61 | x_len = f['train_set']['X1_train'].shape[0] 62 | y_len = f['train_set']['y_train'].shape[0] 63 | print('size parameters loaded') 64 | 65 | # additional params dependent on wether spatial data is present 66 | if model_type != "price": 67 | height, width, channels = features.shape[0], features.shape[1], features.shape[2] 68 | else: 69 | channels = features.shape[-1] 70 | 71 | times_in_dim = times_in.shape[-1] 72 | times_out_dim = times_out.shape[-1] 73 | 74 | # decalre additional usefule params 75 | Tx = 336 76 | Ty = 48 77 | n_s = 32 78 | input_seq_size = Tx 79 | output_seq_size = Ty 80 | 81 | # define swish function for use within comptile model 82 | def swish(x, beta = 1): 83 | return (x * sigmoid(beta * x)) 84 | 85 | # Below in place of swish you can take any custom key for the name 86 | get_custom_objects().update({'swish': Activation(swish)}) 87 | 88 | # load main model 89 | model = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_main.h5', custom_objects = {'': lambda y,f: defined_loss(q,y,f), 'attention': attention, 'Activation': Activation(swish)}) 90 | 91 | # read encoder models - igoring the spatail encoder in the price forecasting case 92 | temporal_enc = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_temporal_enc.h5') 93 | 94 | if model_type != "price": 95 | spatial_enc = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_spatial_enc.h5') 96 | 97 | # load and process data 98 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r") 99 | 100 | # load test or train data - too much memory to load all data, so just load segment 101 | set_type = 'test' 102 | X_train1 = f[f'{set_type}_set'][f'X1_{set_type}'] 103 | X_train2 = f[f'{set_type}_set'][f'X2_{set_type}'] 104 | X_train3 = f[f'{set_type}_set'][f'X3_{set_type}'] 105 | X_train4 = f[f'{set_type}_set'][f'X1_{set_type}'] 106 | y_train = f[f'{set_type}_set'][f'y_{set_type}'] 107 | 108 | # get time relevant time references 109 | with open(f'../../data/processed/{model_type}/time_refs_{model_type}.pkl', 'rb') as time_file: 110 | time_refs = load(time_file) 111 | 112 | input_times = time_refs[f'input_times_{set_type}'] 113 | output_times = time_refs[f'output_times_{set_type}'] 114 | 115 | time_file.close() 116 | 117 | # begin sequencing of data 118 | input_start, output_start = 0, input_seq_size 119 | 120 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], [] 121 | 122 | times_in, times_out = [], [] 123 | 124 | while (output_start + output_seq_size) <= len(y_train): 125 | # increment indexes for windowing of data 126 | input_end = input_start + input_seq_size 127 | output_end = output_start + output_seq_size 128 | 129 | # inputs 130 | seqX1.append(X_train1[input_start:input_end]) 131 | seqX2.append(X_train2[input_start:input_end]) 132 | times_in.append(input_times[input_start:input_end]) 133 | 134 | # outputs 135 | seqX3.append(X_train3[output_start:output_end]) 136 | if model_type != 'price': 137 | nwp_data = X_train4[output_start:output_end][:,:,:,1:] 138 | nwp_data = np.average(nwp_data, axis=(1,2)) 139 | else: 140 | nwp_data = X_train4[output_start:output_end][:,1:] 141 | seqX4.append(nwp_data) 142 | seqY.append(y_train[output_start:output_end]) 143 | times_out.append(output_times[output_start:output_end]) 144 | 145 | input_start += output_seq_size 146 | output_start += output_seq_size 147 | 148 | # make sure all are numpy arrays 149 | x1, x2, x3, x4, y = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY) 150 | times_in, times_out = np.array(times_in), np.array(times_out) 151 | f.close() 152 | 153 | # scale actual values 154 | y_idx = y.shape[0] 155 | y = scaler.inverse_transform(y.reshape(-1,1)).reshape(y_idx, Ty, 1) 156 | 157 | # declare intial hidden states 158 | s0 = np.zeros((1, n_s)) 159 | c0 = np.zeros((1, n_s)) 160 | 161 | # function for inference decoder model - one for each quantile 162 | def inference_dec_model(quantile): 163 | 164 | # Encoder outputs for setup 165 | ccn_enc_output_test = Input(shape=(320, 128)) 166 | lstm_enc_output_test = Input(shape=(Tx, n_s*2)) #+ times_in_dim 167 | prev_prediction = Input(shape=(1, 1)) 168 | 169 | # Decoder Input 170 | times_in = Input(shape=(1, times_in_dim)) 171 | times_out = Input(shape=(1, times_out_dim)) 172 | out_nwp = Input(shape=(1, channels-1)) 173 | s_state0 = Input(shape=(32,)) 174 | c_state0 = Input(shape=(32,)) 175 | if model_type != "price": 176 | decoder_input = Input(shape=(1, times_out_dim + (channels-1))) 177 | else: 178 | decoder_input = Input(shape=(1, times_out_dim)) 179 | 180 | # define input for encoder 181 | if model_type != 'price': 182 | enc_in = concatenate([out_nwp, times_out], axis=-1) 183 | else: 184 | enc_in = times_out 185 | 186 | # context and previous output 187 | attn_weights_temp_test, context = model.get_layer(f'temporal_attention_q_{quantile}')(lstm_enc_output_test, enc_in, s_state0, c_state0) 188 | 189 | if model_type != 'price': 190 | attn_weights_spat_test, context_spat_test = model.get_layer(f'spatial_attention_q_{quantile}')(ccn_enc_output_test, enc_in, s_state0, c_state0) 191 | 192 | # context & previous output combine 193 | context = concatenate([context, context_spat_test], axis=-1) 194 | 195 | decoder_input_with_prev = concatenate([decoder_input, prev_prediction]) 196 | 197 | # Decoder inference 198 | dec_output, s_state, c_state = model.get_layer(f'decoder_q_{quantile}')(decoder_input_with_prev, initial_state=[s_state0, c_state0]) 199 | 200 | # combine context and prediction 201 | prediction = concatenate([context, K.expand_dims(dec_output,axis=1)]) 202 | 203 | # final dense layer 204 | pred_test = model.get_layer(f'dense1_q_{quantile}')(prediction) 205 | pred_test = model.get_layer(f'dense3_q_{quantile}')(pred_test) 206 | 207 | if model_type == "solar": 208 | pred_test = model.get_layer(f'relu_act_q_{quantile}')(pred_test) 209 | 210 | # Inference Model 211 | if model_type != 'price': 212 | deoceder_test_model = Model(inputs=[times_in, times_out, out_nwp, decoder_input, ccn_enc_output_test, lstm_enc_output_test, prev_prediction, s_state0, c_state0], outputs=[pred_test, s_state, c_state, attn_weights_temp_test, attn_weights_spat_test]) 213 | else: 214 | deoceder_test_model = Model(inputs=[times_in, times_out, out_nwp, decoder_input, lstm_enc_output_test, prev_prediction, s_state0, c_state0], outputs=[pred_test, s_state, c_state, attn_weights_temp_test]) 215 | return deoceder_test_model 216 | 217 | # dictionary to store decoder models 218 | decoder_models = {} 219 | 220 | # instantiate model for each quantile 221 | for q in quantiles: 222 | decoder_models[f'{q}'] = inference_dec_model(q) 223 | 224 | # store predictions 225 | predictions = {} 226 | quantile_temporal_attns = {} 227 | quantile_spatial_attns = {} 228 | 229 | # loop through each sample, passing individually to model 230 | for q in quantiles: 231 | print(q) 232 | 233 | # set hidden states to zero 234 | s_state, c_state = s0, c0 235 | 236 | # empty arrays to store all results 237 | total_pred = np.empty((x1.shape[0], Ty, 1)) 238 | total_temp = np.empty((x1.shape[0], Tx, Ty)) 239 | 240 | if model_type != 'price': 241 | total_spat = np.empty((x1.shape[0], 320, Ty)) # 320 is the fixed spatial attention res 242 | 243 | decoder = decoder_models[f'{q}'] 244 | 245 | for idx in range(x1.shape[0]): # loop through each sample, to keep track of hidden states 246 | 247 | # create empty results for results per sample 248 | outputs = [] 249 | spatial_attns = [] 250 | temporal_attns = [] 251 | 252 | # create final inference model 253 | lstm_enc_output, enc_s_state, enc_c_state = temporal_enc([x1[idx:idx+1], x2[idx:idx+1]]) 254 | 255 | if model_type != 'price': 256 | ccn_enc_output = spatial_enc(x1[idx:idx+1]) 257 | intial_in = np.average(x1[idx:idx+1], axis=(2,3)) 258 | else: 259 | intial_in = x1[idx:idx+1] 260 | 261 | prev_prediction = intial_in[:,-1:,0:1] 262 | 263 | for ts in range(Ty): 264 | 265 | if model_type != 'price': 266 | # declare decoder input 267 | if ts > 0: 268 | decoder_input = concatenate([x4[idx:idx+1,ts-1:ts,:], x3[idx:idx+1,ts-1:ts,:]], axis=-1) 269 | else: 270 | decoder_input = concatenate([intial_in[:,-1:,1:], x2[idx:idx+1,-1:,:]], axis=-1) 271 | else: 272 | if ts > 0: 273 | decoder_input = x3[idx:idx+1,ts-1:ts,:] 274 | else: 275 | decoder_input = x2[idx:idx+1,-1:,:] 276 | 277 | if model_type != 'price': 278 | pred, s_state, c_state, attn_weights_temp_test, attn_weights_spat_test = decoder([x2[idx:idx+1,ts:ts+1,:], x3[idx:idx+1,ts:ts+1,:], x4[idx:idx+1,ts:ts+1,:], decoder_input, ccn_enc_output, lstm_enc_output, prev_prediction, s_state, c_state]) 279 | spatial_attns.append(attn_weights_spat_test) 280 | else: 281 | pred, s_state, c_state, attn_weights_temp_test = decoder([x2[idx:idx+1,ts:ts+1,:], x3[idx:idx+1,ts:ts+1,:], x4[idx:idx+1,ts:ts+1,:], decoder_input, lstm_enc_output, prev_prediction, s_state, c_state]) 282 | 283 | prev_prediction = pred 284 | 285 | outputs.append(pred) 286 | temporal_attns.append(attn_weights_temp_test) 287 | 288 | combined_outputs = np.concatenate(outputs, axis=1) 289 | combined_temp_attn = np.concatenate(temporal_attns, axis=-1) 290 | combined_spat_attn = np.concatenate(spatial_attns, axis=-1) 291 | 292 | total_pred[idx, : , :] = scaler.inverse_transform(combined_outputs[0,:,:]) 293 | total_temp[idx, : , :] = combined_temp_attn 294 | 295 | if model_type != 'price': 296 | combined_spat_attn = np.concatenate(spatial_attns, axis=-1) 297 | total_spat[idx, : , :] = combined_spat_attn 298 | 299 | predictions[f'{q}'] = total_pred 300 | quantile_temporal_attns[f'{q}'] = total_temp 301 | quantile_spatial_attns[f'{q}'] = total_spat 302 | 303 | # plot predictions for specified index 304 | for idx, (key, values) in enumerate(predictions.items()): 305 | plt.plot(values[plot_ref:plot_ref+7,:].flatten(), label=f"prediction_{key}") 306 | 307 | plt.plot(y[plot_ref:plot_ref+7,:,0].flatten(), label="actual") 308 | plt.legend() 309 | plt.show() 310 | 311 | 312 | # plot temporal attention (quantile 0.5) 313 | att_w_temp = np.transpose(quantile_temporal_attns['0.5'][plot_ref]) 314 | if model_type != "price": 315 | x = np.average(x1, axis=(2,3))[plot_ref, :] 316 | else: 317 | x = x1[plot_ref, :] 318 | 319 | y_attn = y[plot_ref, :, 0] 320 | y_hat = predictions['0.5'][plot_ref, :] 321 | 322 | #make attention plotting function 323 | def temporal_attention_graph(x, y, att_w_temp): 324 | 325 | fig = plt.figure(figsize=(24, 8)) 326 | gs = gridspec.GridSpec(ncols=90, nrows=100) 327 | 328 | upper_axis = fig.add_subplot(gs[0:20, 10:75]) 329 | left_axis = fig.add_subplot(gs[25:, 0:8]) 330 | atten_axis = fig.add_subplot(gs[25:, 10:]) 331 | 332 | upper_axis.plot(x) 333 | upper_axis.set_xlim([0, Tx]) 334 | upper_axis.set_ylim([0, 1]) 335 | upper_axis.set_xticks(range(0, Tx)) 336 | upper_axis.set_xticklabels(range(0, Tx)) 337 | 338 | left_axis.plot(y, range(0,Ty), label='Prediction') 339 | left_axis.plot(y_hat, range(0,Ty), label='True') 340 | left_axis.set_ylim([0, Ty]) 341 | left_axis.set_yticks(range(0, Ty, 6)) 342 | left_axis.set_yticklabels(range(0, Ty, 6)) 343 | left_axis.invert_yaxis() 344 | 345 | sns.heatmap(att_w_temp, cmap='flare', ax = atten_axis, vmin=0, vmax=0.001) 346 | atten_axis.set_xticks(range(0, Tx)) 347 | atten_axis.set_xticklabels(range(0, Tx)) 348 | atten_axis.set_yticks(range(0, Ty, 4)) 349 | atten_axis.set_yticklabels(range(0, Ty, 4)) 350 | 351 | plt.show() 352 | 353 | 354 | if plot_temporal_attention is True: 355 | temporal_attention_graph(x, y_attn, att_w_temp) 356 | 357 | 358 | 359 | # plot spatial attention 360 | def plot_spatial_predictions(spatial_data, title, height_scale, width_scale, frame_num): 361 | 362 | fig = plt.figure(figsize=[8,10]) # a new figure window 363 | ax_set = fig.add_subplot(1, 1, 1) 364 | 365 | # create baseline map 366 | # spatial data on UK basemap 367 | df = pd.DataFrame({ 368 | 'LAT': [49.78, 61.03], 369 | 'LON': [-11.95, 1.55], 370 | }) 371 | 372 | geo_df = geopandas.GeoDataFrame(df, crs = {'init': 'epsg:4326'}, 373 | geometry=geopandas.points_from_xy(df.LON, df.LAT)).to_crs(epsg=3857) 374 | 375 | ax = geo_df.plot( 376 | figsize= (8,10), 377 | alpha = 0, 378 | ax=ax_set, 379 | ) 380 | 381 | plt.title(title) 382 | ax.set_axis_off() 383 | 384 | # add basemap 385 | url = 'http://tile.stamen.com/terrain/{z}/{x}/{y}.png' 386 | zoom = 10 387 | xmin, xmax, ymin, ymax = ax.axis() 388 | basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, url=url) 389 | ax.imshow(basemap, extent=extent, interpolation='gaussian') 390 | attn_over = np.resize(spatial_data[0], (height_scale, width_scale)) 391 | 392 | gb_shape = geopandas.read_file("./Data/shapefiles/GBR_adm/GBR_adm0.shp").to_crs(epsg=3857) 393 | irl_shape = geopandas.read_file("./Data/shapefiles/IRL_adm/IRL_adm0.shp").to_crs(epsg=3857) 394 | gb_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4) 395 | irl_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4) 396 | overlay = ax.imshow(attn_over, cmap='viridis', alpha=0.5, extent=extent) 397 | # ax.axis((xmin, xmax, ymin, ymax)) 398 | txt = fig.text(.5, 0.09, '', ha='center') 399 | 400 | 401 | def update(i): 402 | spatial_over = np.resize(spatial_data[i], (height_scale, width_scale)) 403 | # overlay = ax.imshow(spatial_over, cmap='viridis', alpha=0.5, extent=extent) 404 | overlay.set_data(spatial_over) 405 | txt.set_text(f"Timestep: {i}") 406 | # plt.cla() 407 | 408 | return [overlay, txt] 409 | 410 | animation_ = FuncAnimation(fig, update, frames=frame_num, blit=False, repeat=False) 411 | plt.show(block=True) 412 | # animation_.save(f'{title}_animation.gif', writer='imagemagick') 413 | 414 | if plot_spatial_attention is True: 415 | # transpose spatial attention results 416 | att_w_spat = np.transpose(total_spat[plot_ref]) 417 | # plot attention weights 418 | plot_spatial_predictions(att_w_spat, 'Spatial Context', 16, 20, 48) 419 | 420 | 421 | 422 | 423 | 424 | # add date references to result dictionaries 425 | time_refs = {'input_times': times_in, 'output_times': times_out} 426 | 427 | predictions['time_refs'] = time_refs 428 | quantile_temporal_attns['time_refs'] = time_refs 429 | 430 | # add x-input data 431 | quantile_temporal_attns['input_features'] = x1 432 | 433 | # add true value for reference to prediction dictionary 434 | predictions['y_true'] = y 435 | 436 | # performance evaluation 437 | # evaluate_predictions(predictions) 438 | 439 | 440 | # save results - forecasted timeseries matrix 441 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/forecasted_time_series_{model_type}.pkl', 'wb') as ts_file: 442 | dump(predictions, ts_file) 443 | 444 | # save results - forecasted temporal attention matrix 445 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/attention_data_{model_type}.pkl', 'wb') as attention_file: 446 | dump(quantile_temporal_attns, attention_file) 447 | 448 | # save results - forecasted spatial attention matrix 449 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/attention_data_{model_type}.pkl', 'wb') as spatial_file: 450 | dump(quantile_spatial_attns, spatial_file) 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | -------------------------------------------------------------------------------- /scripts/models/seq2seq+spatial+temporal_attn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys, os 3 | import h5py 4 | import tensorflow as tf 5 | import tensorflow.keras 6 | import tensorflow.keras.backend as K 7 | from tensorflow.keras import Model 8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape 10 | from keras.backend import sigmoid 11 | from keras.utils.generic_utils import get_custom_objects 12 | 13 | # import custom classes 14 | from _shared.attention_layer import attention 15 | from _shared.timeseries_data_generator import DataGenerator 16 | 17 | np.set_printoptions(threshold=sys.maxsize) 18 | tf.random.set_seed(180) 19 | 20 | ###########################################_____SET_MODEL_PARAMETERS_____############################################ 21 | model_type ="solar" 22 | 23 | # declare dataset file 24 | dataset_name = f'dataset_{model_type}.hdf5' 25 | 26 | # declare quantiles for model 27 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95] 28 | 29 | # get useful size parameters 30 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r") 31 | features = np.empty_like(f['train_set']['X1_train'][0]) 32 | times_in = np.empty_like(f['train_set']['X2_train'][0]) 33 | times_out = np.empty_like(f['train_set']['X3_train'][0]) 34 | labels = np.empty_like(f['train_set']['y_train'][0]) 35 | x_len = f['train_set']['X1_train'].shape[0] 36 | y_len = f['train_set']['y_train'].shape[0] 37 | f.close() 38 | 39 | # input / output sequence sizes 40 | input_seq_size = 336 41 | output_seq_size = 48 42 | n_s = 32 # number of hidden states used through model 43 | 44 | ###########################################_____DATA_GENERATOR_____################################################# 45 | 46 | # data generator input parameters - avoid shuffle in this case 47 | params = {'batch_size': 16, 48 | 'shuffle': False } 49 | 50 | # instantiate data generator object 51 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, hidden_states = n_s, **params) 52 | 53 | ###########################################_____MODEL_ARCHITECTURE_____################################################# 54 | 55 | # cpature some more useful dimensions 56 | Tx = input_seq_size 57 | Ty = output_seq_size 58 | 59 | if model_type != "price": 60 | height, width, channels = features.shape[0], features.shape[1], features.shape[2] 61 | else: 62 | channels = features.shape[-1] 63 | 64 | times_in_dim = times_in.shape[-1] 65 | times_out_dim = times_out.shape[-1] 66 | 67 | # spatial encoder 68 | def cnn_encoder(ccn_input): 69 | # input shape -> (batch, time, width, height, features) 70 | # output shape -> (batch, time, width x height, embedding_size) 71 | 72 | ccn_enc_output = TimeDistributed(Conv2D(16, kernel_size=3, strides=1, activation="relu"))(ccn_input) 73 | ccn_enc_output = BatchNormalization()(ccn_enc_output) 74 | ccn_enc_output = TimeDistributed(AveragePooling2D(pool_size=(2, 2), data_format="channels_last"))(ccn_enc_output) 75 | ccn_enc_output = TimeDistributed(Conv2D(32, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output) 76 | ccn_enc_output = BatchNormalization()(ccn_enc_output) 77 | ccn_enc_output = TimeDistributed(Conv2D(64, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output) 78 | ccn_enc_output = BatchNormalization()(ccn_enc_output) 79 | ccn_enc_output = TimeDistributed(Conv2D(128, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output) 80 | ccn_enc_output = BatchNormalization()(ccn_enc_output) 81 | 82 | ccn_enc_output = Reshape((ccn_enc_output.shape[1], -1, ccn_enc_output.shape[-1]))(ccn_enc_output) 83 | 84 | ccn_enc_output = K.mean(ccn_enc_output, axis=1) 85 | 86 | return ccn_enc_output 87 | 88 | # temporal encoder layers 89 | lstm_encoder = Bidirectional(LSTM(n_s*2, return_sequences = True, return_state = True)) 90 | 91 | def encoder(input, times_in): 92 | 93 | # accomodate for case without 2D dataset 94 | if model_type != "price": 95 | enc_output = K.mean(input, axis=(2,3)) 96 | else: 97 | enc_output = input 98 | 99 | # concat input time features with input 100 | enc_output = concatenate([enc_output, times_in], axis=-1) 101 | 102 | enc_output, forward_h, forward_c, backward_h, backward_c = lstm_encoder(enc_output) 103 | # enc_output, enc_h, enc_s = lstm_encoder(enc_output) 104 | 105 | enc_h = concatenate([forward_h, backward_h], axis=-1) 106 | enc_s = concatenate([forward_c, backward_c], axis=-1) 107 | 108 | # # concat input time features with input 109 | # enc_output = concatenate([enc_output, times_in], axis=-1) 110 | 111 | return enc_output, enc_h, enc_s 112 | 113 | # declare decoder layer 114 | lstm_decoder = LSTM(n_s, return_sequences = True, return_state = True) 115 | 116 | def decoder(context, h_state, cell_state): 117 | 118 | # concat encoder input and time features 119 | # context = concatenate([context, times_out], axis=-1) 120 | 121 | dec_output, h_state , c_state = state = lstm_decoder(context, initial_state = [h_state, cell_state]) 122 | 123 | return dec_output, h_state, c_state 124 | 125 | # make custom activation - swish 126 | def swish(x, beta = 1): 127 | return (x * sigmoid(beta * x)) 128 | 129 | # add swish activation to keras 130 | get_custom_objects().update({'swish': Activation(swish)}) 131 | 132 | # define inputs for model 133 | if model_type != "price": 134 | x_input = Input(shape=(Tx, height, width, channels)) 135 | else: 136 | x_input = Input(shape=(Tx, channels)) 137 | 138 | times_in = Input(shape=(Tx, times_in_dim)) 139 | times_out = Input(shape=(Ty, times_out_dim)) 140 | out_nwp = Input(shape=(Ty, channels-1)) 141 | s_state0 = Input(shape=(32,)) 142 | c_state0 = Input(shape=(32,)) 143 | 144 | # create empty list for outputs 145 | qunatile_predictions = [] 146 | temporal_attns = [] 147 | spatial_attns = [] 148 | 149 | # call CCN_encoder function 150 | if model_type != "price": 151 | ccn_enc_output = cnn_encoder(x_input) 152 | 153 | # call LSTM_encoder function 154 | lstm_enc_output, enc_s_state, enc_c_state = encoder(x_input, times_in) 155 | 156 | # call decoder 157 | for q in quantiles: 158 | 159 | # reset model parameters for each qunatile prediction 160 | ts_predictions = [] 161 | temp_attns = [] 162 | spatial_attns = [] 163 | 164 | if model_type != "price": 165 | intial_in = K.mean(x_input, axis=(2,3)) 166 | prev_prediction = intial_in[:,-1:,0:1] 167 | 168 | decoder = LSTM(32, return_sequences = False, return_state = True, name=f'decoder_q_{q}') 169 | spatial_attention = attention(n_s, name=f"spatial_attention_q_{q}") 170 | temporal_attention = attention(n_s, name=f"temporal_attention_q_{q}") 171 | 172 | output_1 = Dense(32, activation="swish", name=f'dense1_q_{q}') 173 | output_2 = Dense(1, name=f'dense3_q_{q}') 174 | final_act = Activation('relu', name=f'relu_act_q_{q}') 175 | 176 | # reset hidden states 177 | s_state = s_state0 178 | c_state = c_state0 179 | 180 | # make prediction for each output timestep 181 | for ts in range(Ty): 182 | 183 | if model_type != "price": 184 | enc_out = concatenate([out_nwp[:,ts:ts+1,:], times_out[:,ts:ts+1,:]], axis=-1, name=f'concat1_q_{q}_{ts}') 185 | else: 186 | enc_out = times_out[:,ts:ts+1,:] 187 | 188 | # get context matrix (temporal) 189 | attn_weights_temp, context = temporal_attention(lstm_enc_output, enc_out, s_state, c_state) 190 | 191 | # get context matrix (spatial) 192 | if model_type != "price": 193 | attn_weights_spat, context_spat = spatial_attention(ccn_enc_output, enc_out, s_state, c_state) 194 | 195 | # combine spatial and temporal context 196 | context = concatenate([context, context_spat], axis=-1, name=f'concat1.5_q_{q}_{ts}') 197 | 198 | # make decoder input - nwp + time features if not price predictions, other wise just time features 199 | if ts > 0: 200 | decoder_input = concatenate([out_nwp[:,ts-1:ts,:], times_out[:,ts-1:ts,:]], axis=-1, name=f'concat2_q_{q}_{ts}') 201 | else: 202 | decoder_input = concatenate([intial_in[:,-1:,1:], times_in[:,-1:,:]], axis=-1, name=f'concat3_q_{q}_{ts}') 203 | else: 204 | if ts > 0: 205 | decoder_input = times_out[:,ts-1:ts,:] 206 | else: 207 | decoder_input = times_in[:,-1:,:] 208 | 209 | # call decoder 210 | dec_output, s_state, c_state = decoder(decoder_input, initial_state = [s_state, c_state]) 211 | 212 | # combine context with decoder output 213 | prediction = concatenate([context, K.expand_dims(dec_output,axis=1)], axis=-1, name=f'concat5_q_{q}_{ts}') 214 | 215 | # pass through MLP 216 | output = output_1(prediction) 217 | output = output_2(output) 218 | 219 | if model_type == "solar": 220 | output = final_act(output) 221 | 222 | # collect outputs for final predictions 223 | prev_prediction = output 224 | ts_predictions.append(output) 225 | temp_attns.append(attn_weights_temp) 226 | 227 | if model_type != "price": 228 | spatial_attns.append(attn_weights_spat) 229 | 230 | ts_predictions_total = concatenate(ts_predictions, axis = 1) 231 | temp_attns_total = concatenate(temp_attns, axis = -1) 232 | 233 | if model_type != "price": 234 | sptial_attns_total = concatenate(spatial_attns, axis = -1) 235 | 236 | qunatile_predictions.append(ts_predictions_total) 237 | 238 | # append spatial and temporal predictions - if using final model as inference 239 | # qunatile_predictions.extend([temp_attns_total]) 240 | # qunatile_predictions.extend([sptial_attns_total]) 241 | 242 | # instantiate model 243 | model = Model(inputs = [x_input, times_in, times_out, out_nwp, s_state0, c_state0], outputs = qunatile_predictions) 244 | 245 | 246 | ###########################################_____MODEL_TRAINING_____################################################# 247 | 248 | #include clipvalue in optmisier 249 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001) 250 | 251 | # define loss for each quantile 252 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles] 253 | 254 | # append additional empty losses for temporal and spatial encoders 255 | # q_losses.append([None,None]) 256 | 257 | # compile and train model 258 | model.compile(loss = q_losses, optimizer= optimizer) 259 | print(model.summary()) 260 | model.fit(training_generator, epochs = 20) 261 | 262 | # save models - saving encoders individually for inference 263 | os.mkdir(f'../../models/{model_type}') 264 | model.save(f'../../models/{model_type}/{model_type}_main.h5') 265 | 266 | # save some additional models for inference 267 | enoder_temporal_model = Model(inputs = [x_input, times_in], outputs=[lstm_enc_output, enc_s_state, enc_c_state]) 268 | enoder_temporal_model.save(f'../../models/{model_type}/{model_type}_temporal_enc.h5') 269 | 270 | # save spatial encoders if not price forecasting 271 | if model_type != 'price': 272 | enoder_spatial_model = Model(x_input, ccn_enc_output) 273 | enoder_spatial_model.save(f'../../models/{model_type}/{model_type}_spatial_enc.h5') 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | -------------------------------------------------------------------------------- /scripts/models/seq2seq+temporal_attn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys, os 3 | import h5py 4 | import tensorflow as tf 5 | import tensorflow.keras 6 | import tensorflow.keras.backend as K 7 | from tensorflow.keras import Model 8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape 10 | from keras.backend import sigmoid 11 | from keras.utils.generic_utils import get_custom_objects 12 | 13 | # import custom classes 14 | from _shared.attention_layer import attention 15 | from _shared.timeseries_data_generator import DataGenerator 16 | 17 | np.set_printoptions(threshold=sys.maxsize) 18 | tf.random.set_seed(180) 19 | 20 | ###########################################_____SET_MODEL_PARAMETERS_____############################################ 21 | model_type ="solar" 22 | 23 | # declare dataset file 24 | dataset_name = f'dataset_{model_type}.hdf5' 25 | 26 | # declare quantiles for model 27 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95] 28 | 29 | # get useful size parameters 30 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r") 31 | features = np.empty_like(f['train_set']['X1_train'][0]) 32 | times_in = np.empty_like(f['train_set']['X2_train'][0]) 33 | times_out = np.empty_like(f['train_set']['X3_train'][0]) 34 | labels = np.empty_like(f['train_set']['y_train'][0]) 35 | x_len = f['train_set']['X1_train'].shape[0] 36 | y_len = f['train_set']['y_train'].shape[0] 37 | f.close() 38 | 39 | # input / output sequence sizes 40 | input_seq_size = 336 41 | output_seq_size = 48 42 | n_s = 32 # number of hidden states used through model 43 | 44 | ###########################################_____DATA_GENERATOR_____################################################# 45 | 46 | # data generator input parameters - avoid shuffle in this case 47 | params = {'batch_size': 16, 48 | 'shuffle': False } 49 | 50 | # instantiate data generator object 51 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, hidden_states = n_s, **params) 52 | 53 | ###########################################_____MODEL_ARCHITECTURE_____################################################# 54 | 55 | # cpature some more useful dimensions 56 | Tx = input_seq_size 57 | Ty = output_seq_size 58 | 59 | if model_type != "price": 60 | height, width, channels = features.shape[0], features.shape[1], features.shape[2] 61 | else: 62 | channels = features.shape[-1] 63 | 64 | times_in_dim = times_in.shape[-1] 65 | times_out_dim = times_out.shape[-1] 66 | 67 | 68 | # temporal encoder layers 69 | lstm_encoder = Bidirectional(LSTM(n_s*2, return_sequences = True, return_state = True)) 70 | 71 | def encoder(input, times_in): 72 | 73 | # accomodate for case without 2D dataset 74 | if model_type != "price": 75 | enc_output = K.mean(input, axis=(2,3)) 76 | else: 77 | enc_output = input 78 | 79 | # concat input time features with input 80 | enc_output = concatenate([enc_output, times_in], axis=-1) 81 | 82 | enc_output, forward_h, forward_c, backward_h, backward_c = lstm_encoder(enc_output) 83 | # enc_output, enc_h, enc_s = lstm_encoder(enc_output) 84 | 85 | enc_h = concatenate([forward_h, backward_h], axis=-1) 86 | enc_s = concatenate([forward_c, backward_c], axis=-1) 87 | 88 | # concat input time features with input 89 | # enc_output = concatenate([enc_output, times_in], axis=-1) 90 | 91 | return enc_output, enc_h, enc_s 92 | 93 | # declare decoder layer 94 | lstm_decoder = LSTM(n_s, return_sequences = True, return_state = True) 95 | 96 | def decoder(context, h_state, cell_state): 97 | 98 | # concat encoder input and time features 99 | # context = concatenate([context, times_out], axis=-1) 100 | 101 | dec_output, h_state , c_state = state = lstm_decoder(context, initial_state = [h_state, cell_state]) 102 | 103 | return dec_output, h_state, c_state 104 | 105 | # make custom activation - swish 106 | def swish(x, beta = 1): 107 | return (x * sigmoid(beta * x)) 108 | 109 | # add swish activation to keras 110 | get_custom_objects().update({'swish': Activation(swish)}) 111 | 112 | # define inputs for model 113 | x_input = Input(shape=(Tx, channels)) 114 | 115 | times_in = Input(shape=(Tx, times_in_dim)) 116 | times_out = Input(shape=(Ty, times_out_dim)) 117 | out_nwp = Input(shape=(Ty, channels-1)) 118 | s_state0 = Input(shape=(32,)) 119 | c_state0 = Input(shape=(32,)) 120 | 121 | # create empty list for outputs 122 | qunatile_predictions = [] 123 | temporal_attns = [] 124 | 125 | # call LSTM_encoder function 126 | lstm_enc_output, enc_s_state, enc_c_state = encoder(x_input, times_in) 127 | 128 | # call decoder 129 | for q in quantiles: 130 | 131 | # reset model parameters for each qunatile prediction 132 | ts_predictions = [] 133 | temp_attns = [] 134 | spatial_attns = [] 135 | 136 | if model_type != "price": 137 | intial_in = K.mean(x_input, axis=(2,3)) 138 | prev_prediction = intial_in[:,-1:,0:1] 139 | 140 | decoder = LSTM(32, return_sequences = False, return_state = True, name=f'decoder_q_{q}') 141 | spatial_attention = attention(n_s, name=f"spatial_attention_q_{q}") 142 | temporal_attention = attention(n_s, name=f"temporal_attention_q_{q}") 143 | 144 | output_1 = Dense(32, activation="swish", name=f'dense1_q_{q}') 145 | output_2 = Dense(1, name=f'dense3_q_{q}') 146 | final_act = Activation('relu', name=f'relu_act_q_{q}') 147 | 148 | # reset hidden states 149 | s_state = s_state0 150 | c_state = c_state0 151 | 152 | # make prediction for each output timestep 153 | for ts in range(Ty): 154 | 155 | if model_type != "price": 156 | enc_out = concatenate([out_nwp[:,ts:ts+1,:], times_out[:,ts:ts+1,:]], axis=-1, name=f'concat1_q_{q}_{ts}') 157 | else: 158 | enc_out = times_out[:,ts:ts+1,:] 159 | 160 | # get context matrix (temporal) 161 | attn_weights_temp, context = temporal_attention(lstm_enc_output, enc_out, s_state, c_state) 162 | 163 | # get context matrix (spatial) 164 | if model_type != "price": 165 | 166 | # make decoder input - nwp + time features if not price predictions, other wise just time features 167 | if ts > 0: 168 | decoder_input = concatenate([out_nwp[:,ts-1:ts,:], times_out[:,ts-1:ts,:]], axis=-1, name=f'concat2_q_{q}_{ts}') 169 | else: 170 | decoder_input = concatenate([intial_in[:,-1:,1:], times_in[:,-1:,:]], axis=-1, name=f'concat3_q_{q}_{ts}') 171 | else: 172 | if ts > 0: 173 | decoder_input = times_out[:,ts-1:ts,:] 174 | else: 175 | decoder_input = times_in[:,-1:,:] 176 | 177 | # call decoder 178 | dec_output, s_state, c_state = decoder(decoder_input, initial_state = [s_state, c_state]) 179 | 180 | # combine context with decoder output 181 | prediction = concatenate([context, K.expand_dims(dec_output,axis=1)], axis=-1, name=f'concat5_q_{q}_{ts}') 182 | 183 | # pass through MLP 184 | output = output_1(prediction) 185 | output = output_2(output) 186 | 187 | if model_type == "solar": 188 | output = final_act(output) 189 | 190 | # collect outputs for final predictions 191 | prev_prediction = output 192 | ts_predictions.append(output) 193 | temp_attns.append(attn_weights_temp) 194 | 195 | ts_predictions_total = concatenate(ts_predictions, axis = 1) 196 | temp_attns_total = concatenate(temp_attns, axis = -1) 197 | 198 | qunatile_predictions.append(ts_predictions_total) 199 | 200 | # append spatial and temporal predictions - if using final model as inference 201 | # qunatile_predictions.extend([temp_attns_total]) 202 | # qunatile_predictions.extend([sptial_attns_total]) 203 | 204 | # instantiate model 205 | model = Model(inputs = [x_input, times_in, times_out, out_nwp, s_state0, c_state0], outputs = qunatile_predictions) 206 | 207 | 208 | ###########################################_____MODEL_TRAINING_____################################################# 209 | 210 | #include clipvalue in optmisier 211 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001) 212 | 213 | # define loss for each quantile 214 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles] 215 | 216 | # append additional empty losses for temporal and spatial encoders 217 | # q_losses.append([None,None]) 218 | 219 | # compile and train model 220 | model.compile(loss = q_losses, optimizer= optimizer) 221 | print(model.summary()) 222 | model.fit(training_generator, epochs = 20) 223 | 224 | # save models - saving encoders individually for inference 225 | os.mkdir(f'../../models/seq2seq+temporal/{model_type}') 226 | model.save(f'../../models//seq2seq+temporal/{model_type}/{model_type}_main.h5') 227 | 228 | # save some additional models for inference 229 | enoder_temporal_model = Model(inputs = [x_input, times_in], outputs=[lstm_enc_output, enc_s_state, enc_c_state]) 230 | enoder_temporal_model.save(f'../../models/seq2seq+temporal/{model_type}/{model_type}_temporal_enc.h5') 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /scripts/models/seq2seq_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys, os 3 | import h5py 4 | import tensorflow as tf 5 | import tensorflow.keras 6 | import tensorflow.keras.backend as K 7 | from tensorflow.keras import Model 8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape 10 | from keras.backend import sigmoid 11 | from keras.utils.generic_utils import get_custom_objects 12 | 13 | 14 | 15 | np.set_printoptions(threshold=sys.maxsize) 16 | tf.random.set_seed(180) 17 | 18 | ###########################################_____SET_MODEL_PARAMETERS_____############################################ 19 | model_type ="solar" 20 | 21 | # declare dataset file 22 | dataset_name = f'dataset_{model_type}.hdf5' 23 | 24 | # declare quantiles for model 25 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95] 26 | 27 | # get useful size parameters 28 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r") 29 | features = np.empty_like(f['train_set']['X1_train'][0]) 30 | times_in = np.empty_like(f['train_set']['X2_train'][0]) 31 | times_out = np.empty_like(f['train_set']['X3_train'][0]) 32 | labels = np.empty_like(f['train_set']['y_train'][0]) 33 | x_len = f['train_set']['X1_train'].shape[0] 34 | y_len = f['train_set']['y_train'].shape[0] 35 | f.close() 36 | 37 | # input / output sequence sizes 38 | input_seq_size = 336 39 | output_seq_size = 48 40 | n_s = 32 # number of hidden states used through model 41 | 42 | ###########################################_____DATA_GENERATOR_____################################################# 43 | 44 | # data generator input parameters - avoid shuffle in this case 45 | 46 | params = {'batch_size': 64, 47 | 'shuffle': False } 48 | 49 | class DataGenerator(tensorflow.keras.utils.Sequence): 50 | 51 | def __init__(self, dataset_name, x_length, y_length, batch_size, shuffle): 52 | self.dataset_name = dataset_name 53 | self.batch_size = batch_size 54 | self.shuffle = shuffle 55 | self.xlen = x_length 56 | self.ylen = y_length 57 | self.index_ref = 0 58 | self.on_epoch_end() 59 | 60 | def __len__(self): 61 | # 'number of batches per Epoch' 62 | # return int(np.floor((self.xlen - (input_seq_size-1)) / self.batch_size)) 63 | return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size)) 64 | 65 | def __getitem__(self, index): 66 | 67 | 68 | input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))] 69 | output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))] 70 | 71 | # Generate data 72 | (X_train1, X_train2, X_train3, X_train4), y_train = self.__data_generation(input_indexes, output_indexes) 73 | 74 | y_trues = [y_train for i in quantiles] 75 | 76 | return (X_train1, X_train2, X_train3, X_train4), (y_trues) # pass empty training outputs to extract extract attentions 77 | 78 | def on_epoch_end(self): 79 | # set length of indexes for each epoch 80 | self.input_indexes = np.arange(self.xlen) 81 | self.output_indexes = np.arange(self.ylen) 82 | 83 | if self.shuffle == True: 84 | np.random.shuffle(self.input_indexes) 85 | 86 | def to_sequence(self, x1, x2, x3, x4, y): 87 | # convert timeseries batch in sequences 88 | input_start, output_start = 0, 0 89 | 90 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], [] 91 | 92 | while (input_start + input_seq_size) <= len(x1): 93 | # offset handled during pre-processing 94 | input_end = input_start + input_seq_size 95 | output_end = output_start + output_seq_size 96 | 97 | # inputs 98 | seqX1.append(x1[input_start:input_end]) 99 | seqX2.append(x2[input_start:input_end]) 100 | 101 | # outputs 102 | seqX3.append(x3[output_start:output_end]) 103 | seqX4.append(x4[output_start:output_end]) 104 | seqY.append(y[output_start:output_end]) 105 | 106 | input_start += 1 107 | output_start += 1 108 | 109 | seqX1, seqX2, seqX3, seqX4, seqY = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY) 110 | 111 | return seqX1, seqX2, seqX3, seqX4, seqY 112 | 113 | def __data_generation(self, input_indexes, output_indexes): 114 | 115 | f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r") 116 | # X_train1 = f['train_set']['X1_train'][input_indexes] 117 | X_train2 = f['train_set']['X2_train'][input_indexes] 118 | X_train3 = f['train_set']['X3_train'][output_indexes] 119 | 120 | if model_type != 'price': 121 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:,:,:] 122 | X_train1 = np.average(X_train1, axis=(1,2)) 123 | 124 | X_train4 = f['train_set']['X1_train'][output_indexes][:,:,:,:] 125 | X_train4 = np.average(X_train4, axis=(1,2)) 126 | X_train4 = X_train4[:,1:] 127 | 128 | else: 129 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:] 130 | 131 | X_train4 = f['train_set']['X1_train'][output_indexes][:,:] 132 | X_train4 = X_train4[:,:-1] 133 | 134 | 135 | y_train = f['train_set']['y_train'][output_indexes] 136 | f.close() 137 | 138 | # convert to sequence data 139 | X_train1, X_train2, X_train3, X_train4, y_train = self.to_sequence(X_train1, X_train2, X_train3, X_train4, y_train) 140 | 141 | return (X_train1, X_train2, X_train3, X_train4), y_train 142 | 143 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, **params) 144 | 145 | ###########################################_____MODEL_ARCHITECTURE_____################################################# 146 | 147 | # cpature some more useful dimensions 148 | Tx = input_seq_size 149 | Ty = output_seq_size 150 | 151 | channels = features.shape[-1] 152 | 153 | times_in_dim = times_in.shape[-1] 154 | times_out_dim = times_out.shape[-1] 155 | 156 | # make custom activation - swish 157 | def swish(x, beta = 1): 158 | return (x * sigmoid(beta * x)) 159 | 160 | # add swish activation to keras 161 | get_custom_objects().update({'swish': Activation(swish)}) 162 | 163 | # define inputs for model 164 | x_input = Input(shape=(Tx, channels)) 165 | 166 | times_in = Input(shape=(Tx, times_in_dim)) 167 | times_out = Input(shape=(Ty, times_out_dim)) 168 | out_nwp = Input(shape=(Ty, channels-1)) 169 | s_state0 = Input(shape=(32,)) 170 | c_state0 = Input(shape=(32,)) 171 | 172 | # create empty list for outputs 173 | quantile_predictions = [] 174 | temporal_attns = [] 175 | 176 | for q in quantiles: 177 | 178 | combined_inputs = concatenate([x_input, times_in], axis=-1, name=f'concat_q_{q}') 179 | 180 | encoder_output, forward_h, forward_c, backward_h, backward_c = Bidirectional(LSTM(32, return_sequences = False, return_state = True), name=f'biLSTM_q_{q}')(combined_inputs) 181 | repeat_layer = RepeatVector(48)(encoder_output) 182 | 183 | enc_h = concatenate([forward_h, backward_h]) 184 | enc_s = concatenate([backward_h, backward_h]) 185 | 186 | decoder_input = concatenate([repeat_layer, times_out]) 187 | decoder_input = concatenate([decoder_input, out_nwp]) 188 | 189 | decoder_out, _, _ = LSTM(64, return_sequences = True, return_state = True, name=f'decoder_LSTM_q_{q}')(decoder_input, initial_state = [enc_h, enc_s]) 190 | 191 | dense_out = TimeDistributed(Dense(1, name=f'dense_q_{q}'))(decoder_out) 192 | 193 | if model_type == 'solar': 194 | dense_out = Activation('relu', name=f'relu_act_q_{q}')(dense_out) 195 | 196 | quantile_predictions.append(dense_out) 197 | 198 | model = Model(inputs = [x_input, times_in, times_out, out_nwp], outputs = quantile_predictions) 199 | 200 | 201 | ###########################################_____MODEL_TRAINING_____################################################# 202 | 203 | #include clipvalue in optmisier 204 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001) 205 | 206 | # define loss for each quantile 207 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles] 208 | 209 | # append additional empty losses for temporal and spatial encoders 210 | # q_losses.append([None,None]) 211 | 212 | # compile and train model 213 | model.compile(loss = q_losses, optimizer= optimizer) 214 | print(model.summary()) 215 | model.fit(training_generator, epochs = 20) 216 | 217 | # save models - saving encoders individually for inference 218 | os.mkdir(f'../../models/{model_type}') 219 | model.save(f'../../models/{model_type}/{model_type}_seq2seq.h5') 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | -------------------------------------------------------------------------------- /scripts/postprocessing/d3_scripts/forecasting_graph.js: -------------------------------------------------------------------------------- 1 | 2 | 3 | function prob_forecast(file, ref, color_array) { 4 | 5 | 6 | // set the dimensions and margins of the graph 7 | var margin = {top: 10, right: 0, bottom: 50, left: 80}, 8 | width = 1000 - margin.left - margin.right, 9 | height = 600 - margin.top - margin.bottom; 10 | 11 | 12 | 13 | // append the svg object to the body of the page 14 | var svg = d3.select("#my_dataviz_" + ref) 15 | .append("svg") 16 | // .attr("width", width + margin.left + margin.right) 17 | // .attr("height", height + margin.top + margin.bottom) 18 | .attr("width", "100%") 19 | .attr("height", "100%") 20 | .attr("viewBox", "0 0 1000 600") 21 | .attr("preserveAspectRatio", "xMinYMin meet") 22 | .append("g") 23 | .attr("transform", 24 | "translate(" + margin.left + "," + margin.top + ")"); 25 | 26 | // var svg = d3.select("#my_dataviz") 27 | // .append("svg") 28 | // .attr("width", "50%") 29 | // .attr("height", "50%") 30 | // .attr("viewBox", "0 0 740 800"); 31 | 32 | svg.append("rect") 33 | .attr("x",0) 34 | .attr("y",0) 35 | .attr("height", height) 36 | .attr("width", width) 37 | .style("fill","#DEDEDE") //EBEBEB 38 | .style("stroke","none") 39 | .style("opacity", 0.3) 40 | 41 | // svg.append('text') 42 | // .attr("x",width/2) 43 | // .attr("y",height/2) 44 | // .attr('font-family', 'FontAwesome') 45 | // .attr('font-size', 100) 46 | // .text(function(d) { return '\uf185' }) 47 | // .style("fill","white") 48 | // .style("opacity", 0.4) ; 49 | 50 | 51 | 52 | 53 | // Parse the Data 54 | d3.csv(file, 55 | 56 | function(d){ 57 | return { date: d3.timeParse("%d/%m/%Y %H:%M")(d.Datetime), 58 | one: d.q_05 = +d.q_05, 59 | second: d.q_15 = +d.q_15, 60 | third: d.q_25 = +d.q_25, 61 | fourth: d.q_35 = +d.q_35, 62 | five: d.q_5 = +d.q_5, 63 | six: d.q_65 = +d.q_65, 64 | seven: d.q_75 = +d.q_75, 65 | eight: d.q_85 = +d.q_85, 66 | nine: d.q_95 = +d.q_95, 67 | actual: d.actual = +d.actual, 68 | } 69 | }, 70 | 71 | function(data) { 72 | 73 | // data.forEach(function(d) { 74 | // d.actual= +d.actual; 75 | // d.five= +d.five; 76 | // d.date = +d.date; 77 | // // d.Datetime = d3.timeParse(d.Datetime); 78 | // }); 79 | 80 | //declare parse dates 81 | var parseDate = d3.timeParse("%A"); 82 | 83 | 84 | 85 | // List of groups = header of the csv files 86 | var keys = data.columns.slice(1) 87 | 88 | 89 | 90 | // Add X axis 91 | var x = d3.scaleTime() 92 | .domain(d3.extent(data, function(d) { return d.date; })) 93 | .range([ 0, width ]) 94 | 95 | svg.append("g") 96 | .attr("transform", "translate(0," + height + ")") 97 | .call(d3.axisBottom(x).tickFormat(d3.timeFormat(parseDate)).tickSizeInner(-height).tickSizeOuter(0).ticks(7).tickPadding(20)) //.tickFormat(d3.timeFormat(parseDate)) 98 | .selectAll(".tick text") 99 | .attr("transform", "translate(" + (width / 7) / 2 + ",0)") 100 | .style("text-transform", "uppercase") 101 | .style("font-size", "16px") 102 | .style("opacity", 0.5) 103 | // .tickArguments([5]) 104 | // .tickCenterLabel(true) 105 | .select(".domain").remove() 106 | 107 | svg.append("g") 108 | .attr("transform", "translate(0," + height + ")") 109 | .call(d3.axisBottom(x).tickFormat(d3.timeFormat("(%d/%m/%y)")).tickSizeInner(-height).tickSizeOuter(0).ticks(7).tickPadding(20)) //.tickFormat(d3.timeFormat(parseDate)) 110 | .selectAll(".tick text") 111 | .attr("transform", "translate(" + (width / 7) / 2 + ",17)") 112 | .style("text-transform", "uppercase") 113 | .style("font-size", "14px") 114 | .style("font-style", "italic") 115 | .style("opacity", 1) 116 | .select(".domain").remove() 117 | 118 | 119 | // x-axis mini tick marks 120 | // d3.svg.axis() 121 | // .scale() 122 | // .orient('bottom') 123 | // .tickFormat('') 124 | // .tickSize(30) 125 | // .tickPadding(6) 126 | 127 | 128 | // Add X axis label: 129 | svg.append("text") 130 | .attr("text-anchor", "middle") 131 | .attr("x", width/2) 132 | .attr("y", height + margin.top + 30) 133 | // .text("Day") 134 | .style("font", "12px arial") 135 | 136 | 137 | // Add Y axis label: 138 | if (ref === "price") { 139 | svg.append("text") 140 | .attr("text-anchor", "end") 141 | // .attr("y", +margin.left) 142 | // .attr("x", -margin.top + height/2) 143 | .attr("y", -margin.left + 35) 144 | .attr("x", -height/2 + 60) 145 | .text(ref +" (£/MW)") 146 | .style("font", "14px arial") 147 | .style("text-transform", "uppercase") 148 | // .attr("transform", 149 | // "translate(" + (height/2) + ")") 150 | .attr("transform", "rotate(-90)"); 151 | } else if (ref === "demand") { 152 | svg.append("text") 153 | .attr("text-anchor", "end") 154 | // .attr("y", +margin.left) 155 | // .attr("x", -margin.top + height/2) 156 | .attr("y", -margin.left + 35) 157 | .attr("x", -height/2 + 60) 158 | .text(ref +" Demand (GW)") 159 | .style("font", "14px arial") 160 | .style("text-transform", "uppercase") 161 | // .attr("transform", 162 | // "translate(" + (height/2) + ")") 163 | .attr("transform", "rotate(-90)"); 164 | } else { 165 | svg.append("text") 166 | .attr("text-anchor", "end") 167 | // .attr("y", +margin.left) 168 | // .attr("x", -margin.top + height/2) 169 | .attr("y", -margin.left + 35) 170 | .attr("x", -height/2 + 95) 171 | .text(ref +" Generation (GW)") 172 | .style("font", "14px arial") 173 | .style("text-transform", "uppercase") 174 | // .attr("transform", 175 | // "translate(" + (height/2) + ")") 176 | .attr("transform", "rotate(-90)"); 177 | } 178 | 179 | // Add Y axis 180 | var y = d3.scaleLinear() 181 | .domain([d3.min(data, function(d) { return +d.one; }) * 0.95, d3.max(data, function(d) { return +d.nine; }) * 1.05]) 182 | .range([ height, 0 ]) 183 | svg.append("g") 184 | .call(d3.axisLeft(y).tickSizeInner(-width).ticks(8).tickPadding(12.5)) 185 | .style("font", "15px arial") 186 | .select(".domain").remove(); 187 | svg.selectAll(".tick line").attr("stroke", "white").attr('stroke-width',1) 188 | 189 | 190 | 191 | // group the data 192 | var sumstat = d3.nest() 193 | .key(function(d) { return d.name;}) 194 | .entries(data); 195 | 196 | //stack the data 197 | var stackedData = d3.stack() 198 | // .offset(d3.stackOffsetSilhouette) 199 | .keys(keys) 200 | // .value(function(d, key){ 201 | // return d.values[key] 202 | // }) 203 | (data) 204 | console.log(stackedData.keys) 205 | 206 | // create a tooltip 207 | var Tooltip = svg 208 | .select("#my_dataviz_" + ref) 209 | .append("text") 210 | .attr("x", 0) 211 | .attr("y", 0) 212 | .style("opacity", 0) 213 | .style("font-size", 17) 214 | 215 | // Three function that change the tooltip when user hover / move / leave a cell 216 | var mouseover = function(d) { 217 | 218 | Tooltip.style("opacity", 0.5) 219 | d3.selectAll(".myArea").style("opacity", .2) 220 | d3.select(this) 221 | .style("stroke", "black") 222 | .style("opacity", 0.5) 223 | } 224 | var mousemove = function(d,i) { 225 | grp = keys[i] 226 | Tooltip.text(grp) 227 | } 228 | 229 | var mouseleave = function(d) { 230 | Tooltip.style("opacity", 0) 231 | d3.selectAll(".myArea").style("opacity", 0.5).style("stroke", "none") 232 | } 233 | 234 | // Area generator 235 | var area = d3.area() 236 | .curve(d3.curveMonotoneX) 237 | .x(function(d) { return x(d.data.date); }) 238 | .y0(function(d) { return y(d.data.one); }) 239 | .y1(function(d) { return y(d.data.nine); }) 240 | 241 | // Area generator 242 | var area2 = d3.area() 243 | .curve(d3.curveMonotoneX) 244 | .x(function(d) { return x(d.data.date); }) 245 | .y0(function(d) { return y(d.data.second); }) 246 | .y1(function(d) { return y(d.data.eight); }) 247 | 248 | // Area generator 249 | var area3 = d3.area() 250 | .curve(d3.curveMonotoneX) 251 | .x(function(d) { return x(d.data.date); }) 252 | .y0(function(d) { return y(d.data.third); }) 253 | .y1(function(d) { return y(d.data.seven); }) 254 | 255 | // Area generator 256 | var area4 = d3.area() 257 | .curve(d3.curveMonotoneX) 258 | .x(function(d) { return x(d.data.date); }) 259 | .y0(function(d) { return y(d.data.fourth); }) 260 | .y1(function(d) { return y(d.data.six); }) 261 | 262 | // Area generator 263 | var line = d3.line() 264 | // .curve(d3.curveMonotoneX) 265 | .x(function(d) { return x(d.data.date); }) 266 | .y(function(d) { return y(d.data.actual); }) 267 | 268 | 269 | // Area generator 270 | var line2 = d3.line() 271 | .curve(d3.curveMonotoneX) 272 | .x(function(d) { return x(d.data.date); }) 273 | .y(function(d) { return y(d.data.five); }) 274 | 275 | // Area generator 276 | var line3 = d3.line() 277 | .curve(d3.curveMonotoneX) 278 | .x(function(d) { return x(d.data.date); }) 279 | .y(function(d) { return y(d.data.one); }) 280 | 281 | // Area generator 282 | var line4 = d3.line() 283 | .curve(d3.curveMonotoneX) 284 | .x(function(d) { return x(d.data.date); }) 285 | .y(function(d) { return y(d.data.nine); }) 286 | 287 | // graph colors 288 | var legendColors = d3.scaleOrdinal().range(color_array) 289 | 290 | var areas = [area, area2] 291 | 292 | // var line = d3.svg.line() 293 | // .x(function(d) { return x(d.data.date) }) 294 | // .y(function(d) { return y(y(d[0])); }); 295 | 296 | // d3.selectAll('.line') 297 | // .attr("d", line) 298 | 299 | // Show the areas 300 | svg 301 | .selectAll("mylayers") 302 | .data(stackedData) 303 | .enter() 304 | .append("path") 305 | .attr("class", "myArea") 306 | .style("fill",legendColors(0)) 307 | .attr("d", area) 308 | .attr("clip-path", "url(#clip)"); 309 | // .on("mouseover", mouseover) 310 | // // .on("mousemove", mousemove) 311 | // .on("mouseleave", mouseleave) 312 | // .attr("fill-opacity","0.3") 313 | 314 | svg 315 | .selectAll("mylayers") 316 | .data(stackedData) 317 | .enter() 318 | .append("path") 319 | .attr("class", "myArea") 320 | .style("fill" ,legendColors(1)) 321 | .attr("d", area2) 322 | .attr("clip-path", "url(#clip)"); 323 | // .on("mouseover", mouseover) 324 | // // .on("mousemove", mousemove) 325 | // .on("mouseleave", mouseleave) 326 | // .attr("fill-opacity","0.5") 327 | 328 | svg 329 | .selectAll("mylayers") 330 | .data(stackedData) 331 | .enter() 332 | .append("path") 333 | .attr("class", "myArea") 334 | .style("fill",legendColors(2)) 335 | // .attr("fill-opacity","0.9") 336 | .attr("d", area3) 337 | .attr("clip-path", "url(#clip)"); 338 | // .on("mouseover", mouseover) 339 | // // .on("mousemove", mousemove) 340 | // .on("mouseleave", mouseleave) 341 | 342 | var area4 = svg 343 | .selectAll("mylayers") 344 | .data(stackedData) 345 | .enter() 346 | .append("path") 347 | .attr("class", "myArea") 348 | .style("fill", legendColors(3)) 349 | // .attr("fill-opacity","0.5") 350 | .attr("d", area4) 351 | .attr("clip-path", "url(#clip)"); 352 | // .on("mouseover", mouseover) 353 | // // .on("mousemove", mousemove) 354 | // .on("mouseleave", mouseleave) 355 | 356 | 357 | 358 | var totalLength = 50000 359 | var totalLength2 = area4.node().getTotalLength(); 360 | 361 | // mean predictions 362 | var path2 = svg 363 | .selectAll("mylayers") 364 | .data(stackedData) 365 | .enter() 366 | .append("path") 367 | .attr("class", "test-line") 368 | .style("fill", 'none') 369 | .attr("stroke", "white") 370 | .attr("stroke-width", 0.05) 371 | .attr("clip-path", "url(#clip)") 372 | .attr("d", line2) 373 | 374 | var path3 = svg 375 | .selectAll("mylayers") 376 | .data(stackedData) 377 | .enter() 378 | .append("path") 379 | .attr("class", "test-line") 380 | .style("fill", 'none') 381 | .attr("stroke", legendColors(4)) 382 | .attr("stroke-width", 0) 383 | .attr("clip-path", "url(#clip)") 384 | .attr("d", line3) 385 | 386 | var path4 = svg 387 | .selectAll("mylayers") 388 | .data(stackedData) 389 | .enter() 390 | .append("path") 391 | .attr("class", "test-line") 392 | .style("fill", 'none') 393 | .attr("stroke", legendColors(4)) 394 | .attr("stroke-width", 0) 395 | .attr("clip-path", "url(#clip)") 396 | .attr("d", line4) 397 | 398 | // var clip = svg.append("clipPath") 399 | // .attr("id", "clip"); 400 | // var clipRect = clip.append("rect") 401 | // .attr("width", 100) 402 | // .attr("height", height) 403 | 404 | // clipRect 405 | // .transition() 406 | // .delay(1000) 407 | // .duration(8000) 408 | // .ease(d3.easeLinear) 409 | // .attr("width", width) 410 | 411 | // path 412 | // .attr("stroke-dasharray", totalLength + " " + totalLength) 413 | // .attr("stroke-dashoffset", totalLength) 414 | // .transition() 415 | // .duration(9000) 416 | // .ease(d3.easeLinear) 417 | // .attr("stroke-dashoffset", 0) 418 | // .on("end") 419 | 420 | 421 | // legend 422 | var count = ['1','2','3','4','5','6'] 423 | var legendKeys = d3.scaleOrdinal().range(['Quantile 5 - 95', 'Quantile 15 - 85', 'Quantile 25 - 75', 'Quantile 35 - 65', 'Mean', 'Actual']); 424 | 425 | 426 | // Add one dot in the legend for each name. 427 | var size = 12.5 428 | svg.selectAll("myrects") 429 | .data(count) 430 | .enter() 431 | .append("rect") 432 | .attr("x", width - 150) 433 | .attr("y", function(d,i){ if(i < 4) {return 20 + i*(size+10)}; if(i >= 4) {return 25 + i*(size+10)}; }) 434 | .attr("width", size) 435 | .attr("height", function(d,i){ if(i < 4) {return size}; if(i >= 4) {return size/5}; }) 436 | .style("fill", function(d, i){ return legendColors(i) }) 437 | 438 | // Add one dot in the legend for each name. 439 | svg.selectAll("mylabels") 440 | .data(count) 441 | .enter() 442 | .append("text") 443 | .attr("x", (width - 150) + size*1.5) 444 | .attr("y", function(d,i){ return 20 + i*(size+10.25) + (size/2)}) 445 | .style("fill", '#000000') 446 | .text(function(d, i){ return legendKeys(i)}) 447 | .style("font", "14px arial") 448 | .style("fill", "grey") 449 | // .style("text-transform", "uppercase") 450 | .attr("text-anchor", "left") 451 | .style("alignment-baseline", "middle") 452 | 453 | // actual, measured data 454 | var path = svg 455 | .selectAll("mylayers") 456 | .data(stackedData) 457 | .enter() 458 | .append("path") 459 | .attr("class", "test-line") 460 | .style("fill", 'none') 461 | .attr("stroke", '#1c2f33') //D21404 462 | .attr("stroke-width", 0.15) 463 | .attr("stroke-opacity", 0.9) 464 | .attr("d", line) 465 | 466 | // create cursor highlight ////////////////////////////////////// 467 | 468 | var mouseG = svg 469 | .append("g") 470 | .attr("class", "mouse-over-effects"); 471 | 472 | mouseG 473 | .append("path") // this is the black vertical line to follow mouse 474 | .attr("class", "mouse-line") 475 | .style("stroke", "#393B45") //6E7889 476 | .style("stroke-width", "0.5px") 477 | .style("opacity", 0.75) 478 | 479 | mouseG.append("text") 480 | .attr("class", "mouse-text") 481 | // .style("font-size", "200%") 482 | // .text("test") 483 | .style("opacity", 0) 484 | 485 | // var lines = document.getElementsByClassName('line'); 486 | var lines = [path, path3, path4] 487 | 488 | var mousePerLine = mouseG.selectAll('.mouse-per-line') 489 | .data(data) 490 | .enter() 491 | .append("g") 492 | .attr("class", "mouse-per-line"); 493 | 494 | var res = sumstat.map(function(d){ return d.key }) 495 | var color = d3.scaleOrdinal() 496 | .domain(res) 497 | .range(['darkblue','darkblue','darkblue','darkblue']) 498 | 499 | 500 | mousePerLine.append("circle") 501 | .attr("r", 7) 502 | .style("stroke", function(d, i) { 503 | return color(i); 504 | }) 505 | .style("fill", "none") 506 | .style("stroke-width", "1px") 507 | .style("opacity", "0"); 508 | 509 | mousePerLine.append("text") 510 | .attr("transform", "translate(10,3)"); 511 | 512 | mousePerLine.append("text") 513 | .attr("class", "timetext"); 514 | 515 | mouseG 516 | .append('svg:rect') // append a rect to catch mouse movements on canvas 517 | .attr('width', width) // can't catch mouse events on a g element 518 | .attr('height', height) 519 | .attr('fill', 'none') 520 | .attr('pointer-events', 'all') 521 | .on('mouseout touchout', function() { // on mouse out hide line, circles and text 522 | d3.select("#my_dataviz_" + ref) 523 | .select(".mouse-line ") 524 | .style("opacity", "0" ); 525 | d3.select("#my_dataviz_" + ref) 526 | .select(".mouse-text") 527 | .style("opacity", "0"); 528 | d3.select("#my_dataviz_" + ref) 529 | .selectAll(".mouse-per-line circle") 530 | .style("opacity", "0"); 531 | d3.select("#my_dataviz_" + ref) 532 | .selectAll(".mouse-per-line text") 533 | .style("opacity", "0") 534 | }) 535 | .on('mouseover touchover', function() { // on mouse in show line, circles and text 536 | d3.select("#my_dataviz_" + ref) 537 | .select(".mouse-line") 538 | .style("opacity", "1"); 539 | d3.select("#my_dataviz_" + ref) 540 | .select(".mouse-text") 541 | .style("opacity", "1"); 542 | // d3.selectAll(".mouse-per-line circle") 543 | // .style("opacity", "1"); 544 | d3.select("#my_dataviz_" + ref) 545 | .selectAll(".mouse-per-line text" ) 546 | .style("opacity", "1"); 547 | 548 | }) 549 | .on('mousemove touchmove', function() { // mouse moving over canvas 550 | var mouse = d3.mouse(this); 551 | d3.select("#my_dataviz_" + ref) 552 | .select(".mouse-text") 553 | .attr("x", mouse[0]) 554 | .attr("transform", "translate(10,30)") 555 | d3.select("#my_dataviz_" + ref) 556 | .select(".mouse-line") 557 | .attr("d", function() { 558 | var d = "M" + mouse[0] + "," + height; 559 | d += " " + mouse[0] + "," + 0; 560 | return d; 561 | }) 562 | 563 | 564 | d3.select("#my_dataviz_" + ref) 565 | .selectAll(".mouse-per-line") 566 | .attr("transform", function(d, i) { 567 | if (i >= 4){ return null }; 568 | 569 | var xDate = x.invert(mouse[0]) 570 | time = d3.timeFormat("%H:%M %p")(xDate) 571 | 572 | // bisect = d3.bisector(function(d) { return d.date; }).left; 573 | // idx = bisect(data, xDate, 1); 574 | 575 | var beginning = 0, 576 | // end = lines[i].node().getTotalLength() 577 | end = totalLength 578 | target = null; 579 | 580 | while (true){ 581 | 582 | target = Math.floor((beginning + end) / 2); 583 | pos = lines[i].node().getPointAtLength(target); 584 | // pos = target; 585 | if ((target === end || target === beginning) && pos.x !== mouse[0]) { 586 | break; 587 | } 588 | if (pos.x > mouse[0]) end = target; 589 | else if (pos.x < mouse[0]) beginning = target; 590 | 591 | else break; //position found 592 | } 593 | 594 | if (ref == 'price') { 595 | unit = ' £/MWh' 596 | } else { 597 | unit = ' GW' 598 | } 599 | 600 | if (i === 0) { 601 | d3.select(this).select('text') 602 | .text(y.invert(pos.y).toFixed(1) + unit) 603 | .attr("transform", "translate(10,0)") 604 | .style("font", "18px arial") 605 | .style('fill', 'blue') 606 | } else { 607 | d3.select(this).select('text') 608 | .text(y.invert(pos.y).toFixed(1) + unit) 609 | .attr("transform", "translate(-75,0)") 610 | .style("font", "16px arial") 611 | .style('fill', 'black'); 612 | } 613 | 614 | d3.select(this).select('circle') 615 | .style("opacity", 1) 616 | var parseDate = d3.timeParse("%a %d"); 617 | var timestamp = d3.select("#my_dataviz_" + ref).select('.mouse-text') 618 | .text(time) 619 | .style("opacity", 0.5) 620 | .style("text-transform", "uppercase") 621 | .style("font", "arial") 622 | .style("font-size", "22.5px") 623 | 624 | return "translate(" + mouse[0] + "," + pos.y +")"; 625 | }); 626 | }) 627 | 628 | 629 | // Add Y line: 630 | svg.append("line") 631 | // .attr("transform", "rotate(-90)") 632 | .attr("y1", height) 633 | .attr("x1", 0) 634 | .style("stroke-width", 1) 635 | .style("stroke", "#263238") 636 | 637 | // Add X line: 638 | svg.append("line") 639 | // .attr("transform", "rotate(-90)") 640 | .attr("y1", height) 641 | .attr("x1", 0) 642 | .attr("y2", height) 643 | .attr("x2", width) 644 | .style("stroke-width", 1) 645 | .style("stroke", "#263238") 646 | 647 | 648 | //add minor tick marks to x-axis 649 | var m 650 | for (m = 0; m < width; ){ 651 | svg.append("line") 652 | .attr("y1", height) 653 | .attr("x1", m ) 654 | .attr("y2", height + 5) 655 | .attr("x2", m ) 656 | .style("stroke-width", 1) 657 | .style("stroke", "#263238") 658 | .style("opacity", 0.5); 659 | m = m + (width / 167.5 ) 660 | } 661 | 662 | //add main tick marks to x-axis 663 | var i 664 | for (i = (width / 7); i < width; i++){ 665 | svg.append("line") 666 | .attr("y1", height) 667 | .attr("x1", i ) 668 | .attr("y2", height + 20) 669 | .attr("x2", i ) 670 | .style("stroke-width", 1.5) 671 | .style("stroke", "#263238"); 672 | i = i + (width / 7) - 0.5 673 | } 674 | 675 | //add noon tick marks to x-axis 676 | var n 677 | for (n = (width / 14); n < width; n++){ 678 | svg.append("line") 679 | .attr("y1", height) 680 | .attr("x1", n ) 681 | .attr("y2", height + 12) 682 | .attr("x2", n ) 683 | .style("stroke-width", 1.5) 684 | .style("stroke", "#263238"); 685 | n = n + (width / 7) - 0.5 686 | } 687 | 688 | //add main tick marks to x-axis 689 | var i 690 | for (i = (width / 7); i < width; i++){ 691 | svg.append("line") 692 | .attr("y1", height) 693 | .attr("x1", i ) 694 | .attr("y2", 0) 695 | .attr("x2", i ) 696 | .style("stroke-width", 0.5) 697 | .style("stroke-dasharray", ("3, 3")) 698 | .style("stroke", "#263238"); 699 | i = i + (width / 7) - 0.5 700 | } 701 | 702 | 703 | 704 | 705 | //add y-axis tick marks to y-axis 706 | // var u 707 | // for (u = 0; u < height; u++){ 708 | // svg.append("line") 709 | // .attr("y1", u) 710 | // .attr("x1", -5) 711 | // .attr("y2", u) 712 | // .attr("x2", 0) 713 | // .style("stroke-width", 1.0) 714 | // .style("stroke", "#263238"); 715 | // u = u + (height / 9) - 1 716 | // } 717 | 718 | }) 719 | } 720 | -------------------------------------------------------------------------------- /scripts/postprocessing/format_results_Qforecast_plot.py: -------------------------------------------------------------------------------- 1 | # format prediction results for qunatile forecasting d3 plot 2 | import numpy as np 3 | import pandas as pd 4 | from datetime import datetime, timedelta 5 | import csv 6 | from pickle import load 7 | from sklearn.preprocessing import MinMaxScaler 8 | 9 | 10 | 11 | # declare model type 12 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal, seq2seq+temporal+spatial 13 | 14 | # forecasting model 15 | forecast_var = 'price' 16 | 17 | # select start example index reference, 7-days plotted from here 18 | ex_idx = 4 19 | 20 | # load prediction data 21 | with open(f'../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'rb') as forecast_data: 22 | predictions = load(forecast_data) 23 | 24 | print(len(predictions['0.5'])) 25 | 26 | # get start date 27 | out_start_time = predictions['time_refs']['output_times'][ex_idx][0] 28 | 29 | print(out_start_time) 30 | 31 | # produce date range for week-long predictions 32 | ouput_sequence_len = 336 # (Half-Hours) 33 | input_num_of_days = ouput_sequence_len / 48 34 | # start_date = datetime.strptime(str(out_start_time)[:10], "%Y-%m-%d") 35 | # out_date_range = pd.date_range(start=start_date, end=start_date + timedelta(days=input_num_of_days) , freq="30min")[:-1]# remove HH entry form unwanted day 36 | 37 | out_start_time = predictions['time_refs']['output_times'][ex_idx:ex_idx+int(input_num_of_days)] 38 | out_date_range = pd.to_datetime(out_start_time.ravel(), format='%Y-%m-%d') 39 | 40 | # index ref 41 | idx_ref = [x for x in range(1, ouput_sequence_len+1)] 42 | 43 | # final params for df 44 | final_params = {'year': idx_ref, 45 | 'Datetime': out_date_range } 46 | 47 | # loop through to write results for each quantile 48 | for q in list(predictions.keys())[:-2]: 49 | 50 | final_params[f'q_{q[2:]}'] = predictions[str(q)][ex_idx:ex_idx+7, :, 0].reshape((-1)) 51 | 52 | # add actual values for reference 53 | final_params['actual'] = predictions['y_true'][ex_idx:ex_idx+7, :, 0].reshape((-1)) 54 | 55 | print(final_params.keys()) 56 | 57 | # convert to pandas df 58 | df = pd.DataFrame(dict([(keys ,pd.Series(values, dtype = 'object')) for keys, values in final_params.items()])) # set all as objects to avoid warning on empty cells 59 | 60 | # divide to GW 61 | if forecast_var != "price": 62 | df.iloc[:,2:] = df.iloc[:,2:] / 1000 63 | 64 | # copy to clipboard 65 | df.to_clipboard() 66 | 67 | # save data to file 68 | df.to_csv(f'../../results/{forecast_var}/{model_type}/quantile_prediction_results_{forecast_var}_{model_type}.csv', index=False) 69 | 70 | -------------------------------------------------------------------------------- /scripts/postprocessing/format_results_attn_plot.py: -------------------------------------------------------------------------------- 1 | # format attention results for context d3 plot 2 | import numpy as np 3 | import pandas as pd 4 | from datetime import datetime, timedelta 5 | import csv 6 | from pickle import load 7 | from sklearn.preprocessing import MinMaxScaler 8 | 9 | # forecasting model 10 | type = 'solar' # 'wind', 'solar', 'price', 'demand' 11 | 12 | # select example refernce 13 | ex_idx = 26 14 | 15 | # load attention data 16 | if type != "price": 17 | with open(f'../../results/{type}/seq2seq+temporal+spatial/attention_data_{type}_seq2seq+temporal+spatial.pkl', 'rb') as attention_data: 18 | attention_results = load(attention_data) 19 | 20 | # load prediction data 21 | with open(f'../../results/{type}/seq2seq+temporal+spatial/forecasted_time_series_{type}_seq2seq+temporal+spatial.pkl', 'rb') as forecast_data: 22 | predictions = load(forecast_data) 23 | else: 24 | with open(f'../../results/{type}/seq2seq+temporal/attention_data_{type}_seq2seq+temporal.pkl', 'rb') as attention_data: 25 | attention_results = load(attention_data) 26 | 27 | # load prediction data 28 | with open(f'../../results/{type}/seq2seq+temporal/forecasted_time_series_{type}_seq2seq+temporal.pkl', 'rb') as forecast_data: 29 | predictions = load(forecast_data) 30 | 31 | 32 | print(attention_results.keys()) 33 | 34 | # get start dates for inputs and outputs 35 | in_start_time = attention_results['time_refs']['input_times'][ex_idx][0] 36 | out_start_time = attention_results['time_refs']['output_times'][ex_idx][0] 37 | 38 | # log start date of selected index 39 | print(f'input time start date: {in_start_time}') 40 | print(f'output time start date: {out_start_time}') 41 | 42 | # input data for reference 43 | if type != 'price': 44 | input_data = np.average(attention_results['input_features'][ex_idx, :, :, :, 0], axis=(1,2)) 45 | else: 46 | input_data = attention_results['input_features'][ex_idx, :, -1:] 47 | 48 | 49 | # get prediction result for current index 50 | current_prediction = predictions['0.5'][ex_idx, :, 0] 51 | 52 | # attention values for current index 53 | current_attention_vals = attention_results['0.5'][ex_idx] 54 | 55 | attention_vals = np.empty((current_attention_vals.shape[0] * current_attention_vals.shape[1])) 56 | 57 | # make sure attention values are in correct format 58 | iidx = 0 59 | for idx in range(current_attention_vals.shape[0]): 60 | attention_vals[iidx:iidx+48] = current_attention_vals[idx, :] 61 | iidx += 48 62 | 63 | # input params 64 | input_sequence_len = 336 65 | input_num_of_days = input_sequence_len / 48 66 | start_date = datetime.strptime(str(in_start_time)[:10], "%Y-%m-%d") 67 | target_data = datetime.strptime(str(out_start_time)[:10], "%Y-%m-%d") 68 | input_date_range = pd.date_range(start=start_date, end=start_date + timedelta(days=input_num_of_days) , freq="30min")[:-1]# remove HH entry form unwanted day 69 | 70 | # out_start_time = predictions['time_refs']['output_times'][ex_idx:ex_idx+int(input_num_of_days)] 71 | # input_date_range = pd.to_datetime(out_start_time.ravel(), format='%Y-%m-%d') 72 | 73 | # create index values 74 | group_index = [48 * [idx] for idx in range(input_sequence_len)] 75 | variable_index = [[idx for idx in range(48)] for iidx in range(input_sequence_len)] 76 | 77 | # flatten lists if lists 78 | group_index = sum(group_index, []) 79 | variable_index = sum(variable_index, []) 80 | 81 | # create data ranges 82 | group = [48 * [date_time] for date_time in input_date_range] 83 | variable = [pd.date_range(start=target_data, end=target_data + timedelta(days=1) , freq="30min").tolist()[:-1] for idx in range(input_sequence_len)] # remove HH entry for next day 84 | 85 | # flatten timestamps into single list 86 | group = sum(group, []) 87 | variable = sum(variable, []) 88 | 89 | # create output time idxs 90 | output_time_ref = [idx for idx in range(48)] 91 | 92 | # create input time idxs 93 | input_time_ref = [idx for idx in range(input_sequence_len)] 94 | 95 | # input times 96 | input_time = [date_time for date_time in input_date_range] 97 | 98 | # output times 99 | output_time = pd.date_range(start=target_data, end=target_data + timedelta(days=1) , freq="30min").tolist()[:-1] 100 | 101 | # load and apply scaler 102 | # load scaler 103 | scaler = load(open(f'../../data/processed/{type}/_scaler/scaler_{type}_v2.pkl', 'rb')) 104 | 105 | input_data = np.squeeze(input_data) 106 | 107 | # transform input data 108 | input_data = scaler.inverse_transform(input_data) 109 | 110 | attention_vals_int = attention_vals 111 | 112 | # take log of attention values 113 | # scaler = MinMaxScaler(feature_range = (0, 1)) 114 | # attention_vals_scaled = scaler.fit_transform(attention_vals.reshape(-1,1)).reshape(-1) 115 | 116 | # attention_vals_scaled = np.sqrt(attention_vals_scaled) 117 | 118 | attention_vals_scaled = attention_vals 119 | 120 | # get true values for reference 121 | y_true = predictions['y_true'][ex_idx][:,0] 122 | 123 | # final params for df 124 | final_params = {'group_index': group_index, 125 | 'variable_index': variable_index, 126 | 'group': group, 127 | 'variable': variable, 128 | 'value_scaled': attention_vals_scaled, 129 | 'value': attention_vals_int, 130 | 'input_time_ref': input_time_ref, 131 | 'input_time': input_time, 132 | 'input_values': input_data, 133 | 'output_time_ref': output_time_ref, 134 | 'output_time': output_time, 135 | 'prediction': current_prediction, 136 | 'y_true': y_true } 137 | 138 | # convert to pandas df 139 | df = pd.DataFrame(dict([(keys ,pd.Series(values, dtype = 'object')) for keys, values in final_params.items()])) # set all as objects to avoid warning on empty cells 140 | 141 | # copy to clipboard 142 | df.to_clipboard() 143 | 144 | # save data to file 145 | # df.to_csv(f'../../results/{type}/attention_plot_results_{type}.csv', index=False) 146 | 147 | 148 | -------------------------------------------------------------------------------- /scripts/postprocessing/results_summary.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import scipy 4 | from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error 5 | from pickle import load 6 | 7 | 8 | # declare model type 9 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal, seq2seq+temporal+spatial 10 | 11 | # desired var to run analysis 12 | forecast_var = 'price' 13 | 14 | # load quantile prediction results 15 | with open(f'../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'rb') as forecast_data: 16 | results = load(forecast_data) 17 | 18 | 19 | def mean_absolute_percentage_error(y_true, y_pred): 20 | 21 | y_true, y_pred = np.array(y_true), np.array(y_pred) 22 | return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 23 | 24 | def smape(y_true, y_pred): 25 | return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred))) 26 | 27 | # function to evaluate general & quantile performance 28 | def evaluate_predictions(predictions): 29 | ''' 30 | Theory from Bazionis & Georgilakis (2021): https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&ved=2ahUKEwiUprb39qbyAhXNgVwKHWVsA50QFnoECAMQAQ&url=https%3A%2F%2Fwww.mdpi.com%2F2673-4826%2F2%2F1%2F2%2Fpdf&usg=AOvVaw1AWP-zHuNGrw8pgDfUS09e 31 | func to caluclate probablistic forecast performance 32 | Prediction Interval Coverage Probability (PICP) 33 | Prediction Interval Nominla Coverage (PINC) 34 | Average Coverage Error (ACE) [PICP - PINC] 35 | ''' 36 | test_len = len(predictions['y_true']) 37 | 38 | print(test_len) 39 | 40 | y_true = predictions['y_true'].ravel() 41 | lower_pred = predictions[list(predictions.keys())[0]].ravel() 42 | upper_pred = predictions[list(predictions.keys())[-3]].ravel() 43 | central_case = predictions['0.5'].ravel() 44 | 45 | alpha = float(list(predictions.keys())[-3]) - float(list(predictions.keys())[0]) 46 | 47 | # picp_ind = np.sum((y_true > lower_pred) & (y_true <= upper_pred)) 48 | 49 | picp = ((np.sum((y_true >= lower_pred) & (y_true <= upper_pred))) / (test_len * 48) ) * 100 50 | 51 | pinc = alpha * 100 52 | 53 | ace = (picp - pinc) # closer to '0' higher the reliability 54 | 55 | r = np.max(y_true) - np.min(y_true) 56 | 57 | # PI normalised width 58 | pinaw = (1 / (test_len * r)) * np.sum((upper_pred - lower_pred)) 59 | 60 | # PI normalised root-mean-sqaure width 61 | pinrw = (1/r) * np.sqrt( (1/test_len) * np.sum((upper_pred - lower_pred)**2)) 62 | 63 | # calculate MAE & RMSE 64 | mae = mean_absolute_error(y_true, central_case) 65 | mape = mean_absolute_percentage_error(y_true, central_case) 66 | rmse = mean_squared_error(y_true, central_case, squared=False) 67 | 68 | # calculate MAE & RMSE for persistence 69 | persistence_prediction = predictions['y_true'][:-1].ravel() 70 | persistence_true = predictions['y_true'][1:].ravel() 71 | 72 | mae_base = mean_absolute_error(persistence_true, persistence_prediction) 73 | mape_base = mean_absolute_percentage_error(persistence_true, persistence_prediction) 74 | rmse_base = mean_squared_error(persistence_true, persistence_prediction, squared=False) 75 | 76 | # create pandas df 77 | metrics = pd.DataFrame({'PICP': picp, 'PINC': pinc, 'ACE': ace, 'PINAW': pinaw, 'PINRW': pinrw, 'MAE': mae, 'MAPE': mape, 'RMSE': rmse}, index={alpha}) 78 | metrics.index.name = 'Prediction_Interval' 79 | 80 | # create pandas df for baseline 81 | metrics_base = pd.DataFrame({'MAE': mae_base, 'MAPE': mape_base, 'RMSE': rmse_base}, index={'basline_persistence'}) 82 | 83 | print(metrics.to_string()) 84 | print(metrics_base.to_string()) 85 | 86 | # save performance metrics 87 | metrics.to_csv(f'../../results/{forecast_var}/{model_type}/preformance_summary_{forecast_var}_{model_type}.csv', index=False) 88 | 89 | return metrics 90 | 91 | 92 | # function to evaluate trends 93 | def correlation_analysis(X, Y): 94 | 95 | rs = np.empty((X.shape[0], 1)) 96 | #caclulate 'R^2' for each feature - average over all days 97 | for l in range(X.shape[0]): 98 | slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(X[l,:,0], Y[l,:,0]) 99 | rs[l, 0] =r_value**2 100 | 101 | print('mean' + '\n R**2: %s' %rs.mean()) 102 | print('max' + '\n R**2: %s' %rs.max()) 103 | print('min' + '\n R**2: %s' %rs.min()) 104 | 105 | #get best 106 | best_fit = np.argmax(rs, axis=0) 107 | worst_fit = np.argmin(rs, axis=0) 108 | print(best_fit) 109 | print(worst_fit) 110 | 111 | return 112 | 113 | # call evaluate performance 114 | evaluate_predictions(results) 115 | 116 | 117 | -------------------------------------------------------------------------------- /scripts/postprocessing/spatial_attention_plots.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import geopandas 5 | import contextily as ctx 6 | from pickle import load 7 | from matplotlib.animation import FuncAnimation 8 | 9 | 10 | # plot spatial attention 11 | def plot_spatial_predictions(spatial_data, title, height_scale, width_scale, frame_num): 12 | 13 | fig = plt.figure(figsize=[8,10]) # a new figure window 14 | ax_set = fig.add_subplot(1, 1, 1) 15 | 16 | # create baseline map 17 | # spatial data on UK basemap 18 | df = pd.DataFrame({ 19 | 'LAT': [49.78, 61.03], 20 | 'LON': [-11.95, 1.55], 21 | }) 22 | 23 | geo_df = geopandas.GeoDataFrame(df, crs = {'init': 'epsg:4326'}, 24 | geometry=geopandas.points_from_xy(df.LON, df.LAT)).to_crs(epsg=3857) 25 | 26 | ax = geo_df.plot( 27 | figsize= (8,10), 28 | alpha = 0, 29 | ax=ax_set, 30 | ) 31 | 32 | plt.title(title) 33 | ax.set_axis_off() 34 | 35 | # add basemap 36 | url = 'http://tile.stamen.com/terrain/{z}/{x}/{y}.png' 37 | zoom = 10 38 | xmin, xmax, ymin, ymax = ax.axis() 39 | basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, source=url) 40 | ax.imshow(basemap, extent=extent, interpolation='gaussian') 41 | attn_over = np.resize(spatial_data[0], (height_scale, width_scale)) 42 | 43 | gb_shape = geopandas.read_file("../../data/raw/_mapping/shapefiles/GBR_adm/GBR_adm0.shp").to_crs(epsg=3857) 44 | irl_shape = geopandas.read_file("../../data/raw/_mapping/shapefiles/IRL_adm/IRL_adm0.shp").to_crs(epsg=3857) 45 | gb_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4) 46 | irl_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4) 47 | overlay = ax.imshow(attn_over, cmap='viridis', alpha=0.5, extent=extent) 48 | # ax.axis((xmin, xmax, ymin, ymax)) 49 | txt = fig.text(.5, 0.09, '', ha='center') 50 | 51 | 52 | def update(i): 53 | spatial_over = np.resize(spatial_data[i], (height_scale, width_scale)) 54 | print(spatial_over.shape) 55 | # overlay = ax.imshow(spatial_over, cmap='viridis', alpha=0.5, extent=extent) 56 | overlay.set_data(spatial_over) 57 | txt.set_text(f"Timestep: {i}") 58 | # plt.cla() 59 | 60 | return [overlay, txt] 61 | 62 | 63 | animation_ = FuncAnimation(fig, update, frames=frame_num, blit=False, repeat=False) 64 | # plt.show(block=True) 65 | animation_.save(f'{title}_animation.gif', writer='imagemagick') 66 | 67 | 68 | 69 | # define model type to plot 70 | model_type = 'solar' 71 | 72 | idx = 0 73 | 74 | # load spatial attention data 75 | # save results - forecasted spatial attention matrix 76 | with open(f'../../results/{model_type}/seq2seq+temporal+spatial/spatial_attention_data_{model_type}.pkl', 'rb') as spatial_file: 77 | spatial_data = load(spatial_file) 78 | 79 | 80 | # grab relevant example 81 | spatial_data = spatial_data['0.5'][idx,:,:] 82 | 83 | spatial_data = np.transpose(spatial_data) 84 | 85 | print(spatial_data.shape) 86 | print(spatial_data[30, :]) 87 | 88 | # exit() 89 | 90 | 91 | # call plot function 92 | plot_spatial_predictions(spatial_data=spatial_data, title='Solar Spatial Attention', height_scale=16, width_scale=20, frame_num=48) -------------------------------------------------------------------------------- /scripts/preprocessing/ERA5_downloader.py: -------------------------------------------------------------------------------- 1 | import cdsapi 2 | import os 3 | 4 | 5 | os.chdir("PATH TO ERA5 DOWNLOADER FILE") 6 | print(os.getcwd()) 7 | 8 | 9 | c = cdsapi.Client() 10 | 11 | years = ['2020', '2021'] 12 | variables = ['surface_net_solar_radiation'] 13 | 14 | for l, var in enumerate(variables): 15 | for i, year in enumerate(years): 16 | 17 | print(f'year:{year}, var:{var}') 18 | 19 | if year == '2021': 20 | c.retrieve( 21 | f'reanalysis-era5-single-levels', 22 | { 23 | 'product_type': 'reanalysis', 24 | 'format': 'netcdf', 25 | 'variable': [ 26 | var, 27 | ], 28 | 'year': [ 29 | year, 30 | ], 31 | 'month': [ 32 | '01', '02', '03', 33 | '04', '05', '06', 34 | # '07', '08', '09', 35 | # '10', '11', '12', 36 | ], 37 | 'day': [ 38 | '01', '02', '03', 39 | '04', '05', '06', 40 | '07', '08', '09', 41 | '10', '11', '12', 42 | '13', '14', '15', 43 | '16', '17', '18', 44 | '19', '20', '21', 45 | '22', '23', '24', 46 | '25', '26', '27', 47 | '28', '29', '30', 48 | '31', 49 | ], 50 | 'time': [ 51 | '00:00', '01:00', '02:00', 52 | '03:00', '04:00', '05:00', 53 | '06:00', '07:00', '08:00', 54 | '09:00', '10:00', '11:00', 55 | '12:00', '13:00', '14:00', 56 | '15:00', '16:00', '17:00', 57 | '18:00', '19:00', '20:00', 58 | '21:00', '22:00', '23:00', 59 | ], 60 | 'area': [ 61 | 61.19, -11.95, 49.78, 62 | 1.76, 63 | ], 64 | }, 65 | str(var) + '_' + str(year) + '.nc') 66 | 67 | 68 | else: 69 | c.retrieve( 70 | f'reanalysis-era5-single-levels', 71 | { 72 | 'product_type': 'reanalysis', 73 | 'format': 'netcdf', 74 | 'variable': [ 75 | var, 76 | ], 77 | 'year': [ 78 | year, 79 | ], 80 | 'month': [ 81 | '01', '02', '03', 82 | '04', '05', '06', 83 | '07', '08', '09', 84 | '10', '11', '12', 85 | ], 86 | 'day': [ 87 | '01', '02', '03', 88 | '04', '05', '06', 89 | '07', '08', '09', 90 | '10', '11', '12', 91 | '13', '14', '15', 92 | '16', '17', '18', 93 | '19', '20', '21', 94 | '22', '23', '24', 95 | '25', '26', '27', 96 | '28', '29', '30', 97 | '31', 98 | ], 99 | 'time': [ 100 | '00:00', '01:00', '02:00', 101 | '03:00', '04:00', '05:00', 102 | '06:00', '07:00', '08:00', 103 | '09:00', '10:00', '11:00', 104 | '12:00', '13:00', '14:00', 105 | '15:00', '16:00', '17:00', 106 | '18:00', '19:00', '20:00', 107 | '21:00', '22:00', '23:00', 108 | ], 109 | 'area': [ 110 | 61.19, -11.95, 49.78, 111 | 1.76, 112 | ], 113 | }, 114 | str(var) + '_' + str(year) + '.nc') 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /scripts/preprocessing/__pycache__/preprocessing_funcs.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/preprocessing/__pycache__/preprocessing_funcs.cpython-38.pyc -------------------------------------------------------------------------------- /scripts/preprocessing/data_preprocessing_demand.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import sys 4 | import os 5 | from pickle import dump, load 6 | import h5py 7 | 8 | from preprocessing_funcs import demand_data_processing 9 | 10 | np.set_printoptions(threshold=sys.maxsize) 11 | 12 | ###########################################_____LOAD & PRE-PROCESS DATA_____########################################### 13 | 14 | #cache current working directory of main script 15 | workingDir = os.getcwd() 16 | 17 | # paths to nc files for x_value features: 18 | filepaths = { 19 | 'temperature': '../../data/raw/temperature', 20 | } 21 | 22 | #load labels (solar generation per HH) 23 | demandGenLabels = pd.read_csv('../../data/raw/demand_labels/HH_demand_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True) 24 | 25 | # call main pre-processing function - sequence windowing no longer utilised 26 | dataset, time_refs = demand_data_processing(filepaths = filepaths, labels = demandGenLabels, workingDir = workingDir) 27 | 28 | # print data summaries 29 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n') 30 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n') 31 | 32 | # #save time timeseries (inputs & outputs) for reference 33 | print('saving data...') 34 | with open("../../data/processed/demand/time_refs_demand_v4.pkl", "wb") as times: 35 | dump(time_refs, times) 36 | 37 | # save training set as dictionary (h5py dump) 38 | f = h5py.File('../../data/processed/demand/dataset_demand_v4.hdf5', 'w') 39 | 40 | for group_name in dataset: 41 | group = f.create_group(group_name) 42 | for dset_name in dataset[group_name]: 43 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name]) 44 | f.close() 45 | 46 | -------------------------------------------------------------------------------- /scripts/preprocessing/data_preprocessing_price.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.preprocessing import MinMaxScaler, StandardScaler 4 | from pickle import dump 5 | import h5py 6 | 7 | from workalendar.europe import UnitedKingdom 8 | cal = UnitedKingdom() 9 | 10 | # load input data 11 | windGen_data = pd.read_csv('../../data/raw/wind_labels/HH_windGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True) 12 | solarGen_data = pd.read_csv('../../data/raw/solar_labels/HH_PVGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True) 13 | demand_data = pd.read_csv('../../data/raw/demand_labels/HH_demand_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True) 14 | 15 | # load labels 16 | price_data = pd.read_csv('../../data/raw/price_labels/N2EX_UK_DA_Auction_Hourly_Prices_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True) 17 | 18 | # interpolate hourly prices into HH resolution 19 | price_data = price_data.reindex(pd.date_range(start=price_data.index.min(), end=price_data.index.max() + pd.Timedelta(minutes=30), freq='30T')) 20 | price_data = price_data.interpolate() 21 | 22 | # combine vars into feature array 23 | arrays = [windGen_data.values, solarGen_data.values, demand_data.values] 24 | 25 | feature_array = [] 26 | 27 | # normalise feature array 28 | for i, array in enumerate(arrays): 29 | scaler = StandardScaler() 30 | feature_array.append(scaler.fit_transform(array)) 31 | 32 | # normalise labels 33 | # scaler = MinMaxScaler() #normalise data 34 | scaler = StandardScaler() 35 | price_data = scaler.fit_transform(price_data.values) 36 | 37 | # save price data scaler 38 | dump(scaler, open('../../data/processed/price/_scaler/scaler_price_v2.pkl', 'wb')) 39 | 40 | # stack features 41 | feature_array = np.concatenate(feature_array, axis=-1) 42 | 43 | # mask data (eliminate nans) 44 | wind_mask = windGen_data.iloc[:,-1].isna().groupby(windGen_data.index.normalize()).transform('any') 45 | solar_mask = solarGen_data.iloc[:,-1].isna().groupby(solarGen_data.index.normalize()).transform('any') 46 | demand_mask = demand_data.iloc[:,-1].isna().groupby(demand_data.index.normalize()).transform('any') 47 | price_mask = demand_data.iloc[:,-1].isna().groupby(demand_data.index.normalize()).transform('any') 48 | 49 | # eliminate all missing values with common mask 50 | mask_all = wind_mask | solar_mask | demand_mask | price_mask 51 | 52 | # apply mask, removing days with more than one nan value 53 | feature_array = feature_array[~mask_all] 54 | 55 | price_data = price_data[~mask_all] 56 | 57 | # combine price data to other features for complete feature array 58 | feature_array = [feature_array, price_data] 59 | feature_array = np.concatenate(feature_array, axis=-1) 60 | 61 | # time refs 62 | time_refs = windGen_data.index 63 | time_refs = time_refs[~mask_all] 64 | 65 | # time data engineering 66 | df_times_outputs = pd.DataFrame() 67 | df_times_outputs['date'] = time_refs.date 68 | df_times_outputs['hour'] = time_refs.hour 69 | df_times_outputs['month'] = time_refs.month - 1 70 | df_times_outputs['year'] = time_refs.year 71 | df_times_outputs['day_of_week'] = time_refs.dayofweek 72 | df_times_outputs['day_of_year'] = time_refs.dayofyear - 1 73 | df_times_outputs['weekend'] = df_times_outputs['day_of_week'].apply(lambda x: 1 if x>=5 else 0) 74 | 75 | # account for bank / public holidays 76 | start_date = time_refs.min() 77 | end_date = time_refs.max() 78 | start_year = df_times_outputs['year'].min() 79 | end_year = df_times_outputs['year'].max() 80 | 81 | holidays = set(holiday[0] 82 | for year in range(start_year, end_year + 1) 83 | for holiday in cal.holidays(year) 84 | if start_date <= holiday[0] <= end_date) 85 | 86 | df_times_outputs['holiday'] = df_times_outputs['date'].isin(holidays).astype(int) 87 | 88 | #process output times for half hours 89 | for idx, row in df_times_outputs.iterrows(): 90 | if idx % 2 != 0: 91 | df_times_outputs.iloc[idx, 1] = df_times_outputs.iloc[idx, 1] + 0.5 92 | 93 | # create sin / cos of output hour 94 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 95 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 96 | 97 | # create sin / cos of output month 98 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 99 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 100 | 101 | # create sin / cos of output year 102 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1) 103 | 104 | # create sin / cos of output day of week 105 | times_out_DoW_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1) 106 | times_out_DoW_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1) 107 | 108 | # create sin / cos of output day of year 109 | times_out_DoY_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1) 110 | times_out_DoY_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1) 111 | 112 | weekends = np.expand_dims(df_times_outputs['weekend'].values, axis =-1) 113 | holidays = np.expand_dims(df_times_outputs['holiday'].values, axis =-1) 114 | 115 | time_features = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_DoW_sin, times_out_DoW_cos, 116 | times_out_DoY_sin, times_out_DoY_cos, times_out_year, weekends, holidays), axis=-1) 117 | 118 | # combine demand / solar / wind with time features 119 | # combined_data = np.concatenate([feature_array, output_times], axis=-1) 120 | 121 | test_split_seq = 8544 # larger test test to compensate adverse demand from COVID 122 | 123 | # split data into train and test sets 124 | dataset = { 125 | 'train_set' : { 126 | 'X1_train': feature_array[:-test_split_seq], 127 | 'X2_train': time_features[:-test_split_seq], 128 | 'X3_train': time_features[:-test_split_seq], 129 | 'y_train': price_data[:-test_split_seq] 130 | }, 131 | 'test_set' : { 132 | 'X1_test': feature_array[-test_split_seq:], 133 | 'X2_test': time_features[-test_split_seq:], 134 | 'X3_test': time_features[-test_split_seq:], 135 | 'y_test': price_data[-test_split_seq:] 136 | } 137 | } 138 | 139 | time_refs = { 140 | 'input_times_train': time_refs[:-test_split_seq], 141 | 'input_times_test': time_refs[-test_split_seq:], 142 | 'output_times_train': time_refs[:-test_split_seq], 143 | 'output_times_test': time_refs[-test_split_seq:] 144 | } 145 | 146 | # print data for info 147 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n') 148 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n') 149 | 150 | # save dataset 151 | with open("../../data/processed/price/time_refs_price_v2.pkl", "wb") as times: 152 | dump(time_refs, times) 153 | 154 | # save training set as dictionary (h5py dump) 155 | f = h5py.File('../../data/processed/price/dataset_price_v2.hdf5', 'w') 156 | 157 | for group_name in dataset: 158 | group = f.create_group(group_name) 159 | for dset_name in dataset[group_name]: 160 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name]) 161 | f.close() 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /scripts/preprocessing/data_preprocessing_solar.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import sys 4 | import os 5 | from pickle import dump, load 6 | import h5py 7 | 8 | from preprocessing_funcs import solar_data_processing 9 | 10 | np.set_printoptions(threshold=sys.maxsize) 11 | 12 | ###########################################_____LOAD & PRE-PROCESS DATA_____########################################### 13 | 14 | #cache current working directory of main script 15 | workingDir = os.getcwd() 16 | 17 | 18 | # paths to nc files for x_value features: 19 | filepaths = { 20 | 'solarRad': '../../data/raw/net_solar_radiation', 21 | 'lowcloudcover': '../../data/raw/low_cloud_Cover', 22 | 'temperature': '../../data/raw/temperature' 23 | } 24 | 25 | # load labels (solar generation per HH) 26 | solarGenLabels = pd.read_csv('../../data/raw/solar_labels/HH_PVGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True) 27 | 28 | # call main pre-processing function - sequence windowing no longer utilised 29 | dataset, time_refs = solar_data_processing(filepaths = filepaths, labels = solarGenLabels, input_seq_size = 336, output_seq_size = 48, workingDir = workingDir) 30 | 31 | # print data summaries 32 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n') 33 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n') 34 | 35 | # save time timeseries (inputs & outputs) for reference 36 | print('saving data...') 37 | with open("../../data/processed/solar/time_refs_solar_min_v4.pkl", "wb") as times: 38 | dump(time_refs, times) 39 | 40 | # save training set as dictionary (h5py dump) 41 | f = h5py.File('../../data/processed/solar/dataset_solar_min_v4.hdf5', 'w') 42 | 43 | for group_name in dataset: 44 | group = f.create_group(group_name) 45 | for dset_name in dataset[group_name]: 46 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name]) 47 | f.close() 48 | 49 | 50 | -------------------------------------------------------------------------------- /scripts/preprocessing/data_preprocessing_wind.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import sys 4 | import os 5 | from pickle import dump, load 6 | import h5py 7 | 8 | from preprocessing_funcs import wind_data_processing 9 | 10 | 11 | np.set_printoptions(threshold=sys.maxsize) 12 | 13 | ###########################################_____LOAD & PRE-PROCESS DATA_____########################################### 14 | 15 | #cache current working directory of main script 16 | workingDir = os.getcwd() 17 | 18 | # paths to nc files for x_value features: 19 | filepaths = { 20 | 'u_wind_component_10': '../../data/raw/10m_u_component_of_wind', 21 | 'v_wind_component_10': '../../data/raw/10m_v_component_of_wind', 22 | 'u_wind_component_100': '../../data/raw/100m_u_component_of_wind', 23 | 'v_wind_component_100': '../../data/raw/100m_v_component_of_wind', 24 | 'instantaneous_10m_wind_gust': '../../data/raw/instantaneous_10m_wind_gust', 25 | 'surface_pressure': '../../data/raw/surface_pressure', 26 | 'temperature': '../../data/raw/temperature' 27 | } 28 | 29 | #load labels (wind generation per HH) 30 | windGenLabels = pd.read_csv('../../data/raw/wind_labels/HH_windGen_v4.csv', parse_dates=True, index_col=0, header=0, dayfirst=True) 31 | 32 | # call main pre-processing function - sequence windowing no longer utilised 33 | dataset, time_refs = wind_data_processing(filepaths = filepaths, labels = windGenLabels, input_seq_size = 336, output_seq_size = 48, workingDir = workingDir) 34 | 35 | # print data summaries 36 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n') 37 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n') 38 | 39 | # #save time timeseries references (inputs & outputs) for reference 40 | print('saving data...') 41 | with open("../../data/processed/wind/time_refs_wind_v4.pkl", "wb") as times: 42 | dump(time_refs, times) 43 | 44 | # save training set as dictionary (h5py dump) 45 | f = h5py.File('../../data/processed/wind/dataset_wind_v4.hdf5', 'w') 46 | 47 | for group_name in dataset: 48 | group = f.create_group(group_name) 49 | for dset_name in dataset[group_name]: 50 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name]) 51 | f.close() 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /scripts/preprocessing/preprocessing_funcs.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import netCDF4 4 | from netCDF4 import Dataset 5 | import os 6 | import glob 7 | import sys 8 | from datetime import datetime 9 | from sklearn.preprocessing import MinMaxScaler, StandardScaler 10 | import scipy 11 | import matplotlib.pyplot as plt 12 | from pickle import dump, load 13 | import time 14 | import h5py 15 | 16 | import h5py 17 | from workalendar.europe import UnitedKingdom 18 | 19 | # define calender reference (allows for easy identification of holidays) 20 | cal = UnitedKingdom() 21 | 22 | # function to extract data from yearly .nc files passing directory 23 | def ncExtract(directory, current_wrk_dir): # will append files if multiple present 24 | 25 | #intialising parameters 26 | os.chdir(directory) 27 | files = [] 28 | readVariables = {} 29 | consistentVars = ['longitude', 'latitude', 'time'] 30 | 31 | #read files in directory 32 | for file in glob.glob("*.nc"): 33 | files.append(file) 34 | files.sort() 35 | 36 | for i, file in enumerate(files): 37 | print(file) 38 | #read nc file using netCDF4 39 | ncfile = Dataset(file) 40 | varaibles = list(ncfile.variables.keys()) 41 | #find unique vars 42 | uniqueVars = list(set(varaibles) - set(consistentVars)) 43 | 44 | #iteriate and concat each unique variable 45 | for variable in uniqueVars: 46 | 47 | if i == 0: 48 | readVariables['data'] = np.empty([0,ncfile.variables['latitude'].shape[0], 49 | ncfile.variables['longitude'].shape[0]]) 50 | 51 | readVar = ncfile.variables[variable][:] 52 | 53 | readVariables['data'] = np.concatenate([readVariables['data'],readVar]) 54 | 55 | #read & collect time 56 | if i == 0: 57 | readVariables['time'] = np.empty([0]) 58 | 59 | timeVar = ncfile.variables['time'] 60 | datesVar = netCDF4.num2date(timeVar[:], timeVar.units, timeVar.calendar) 61 | readVariables['time'] = np.concatenate([readVariables['time'],datesVar]) 62 | 63 | #read lat and long 64 | readVariables['latitude'] = ncfile.variables['latitude'][:] 65 | readVariables['longitude'] = ncfile.variables['longitude'][:] 66 | 67 | #close ncfile file 68 | Dataset.close(ncfile) 69 | 70 | #change directory back 71 | os.chdir(current_wrk_dir) 72 | 73 | #define name of extracted data 74 | fileNameLoc = directory.rfind('/') + 1 75 | fileName = str(directory[fileNameLoc:]) 76 | 77 | return readVariables 78 | 79 | 80 | 81 | # helper function to filter irregular values out 82 | def lv_filter(data): 83 | #define +ve and -ve thresholds 84 | filter_thres_pos = np.mean(np.mean(data)) * (10**(-10)) 85 | filter_thres_neg = filter_thres_pos * (-1) 86 | 87 | #filter data relevant to thresholds 88 | data[(filter_thres_neg <= data) & (data <= filter_thres_pos)] = 0 89 | 90 | return data 91 | 92 | 93 | # helper function to convert 24hr input to 48hrs 94 | def interpolate_4d(array): 95 | interp_array = np.empty((array.shape[0]*2 , array.shape[1], array.shape[2], array.shape[3])) 96 | for ivar in range(array.shape[-1]): 97 | for interp_idx in range(interp_array.shape[0]): 98 | if (interp_idx % 2 == 0) or (int(np.ceil(interp_idx/2)) == array.shape[0]): 99 | interp_array[interp_idx, :, :, ivar] = array[int(np.floor(interp_idx/2)), :, :, ivar] 100 | else: 101 | interp_array[interp_idx, :, :, ivar] = (array[int(np.floor(interp_idx/2)), :, :, ivar] + array[int(np.ceil(interp_idx/2)), :, :, ivar]) / 2 102 | 103 | return interp_array 104 | 105 | 106 | # helper function to interpolate time array 107 | def interpolate_time(time_array): 108 | interp_time = np.linspace(time_array[0], time_array[-1], len(time_array)*2) 109 | 110 | return interp_time 111 | 112 | 113 | # helper function to check for missing nans - if so delete day 114 | def remove_nan_days(x_in, y_out): # assume both are 115 | # check for missing vals in outputs 116 | idx = 0 117 | for i in range(len(y_out)): 118 | if y_out[idx].isnull().values.any() or x_in[idx].isnull().values.any(): 119 | del x_in[idx] 120 | del y_out[idx] 121 | idx -= 1 122 | idx += 1 123 | 124 | return x_in, y_out 125 | 126 | # function to window time series data relative to specified input and output sequence lengths 127 | # NO LONGER USED # 128 | def format_data_into_timesteps(X1, X2, X3, Y, input_seq_size, output_seq_size, input_times_reference, output_times_reference): 129 | print('formating data into timesteps & interpolating input data') 130 | 131 | #number of timesteps to be included in each sequence 132 | seqX1, seqX2, seqX3, seqY_in, seqY, in_times, out_times = [], [], [], [], [], [], [] 133 | input_start, input_end = 0, 0 134 | output_start = input_seq_size + output_seq_size 135 | 136 | while (output_start + output_seq_size) < len(X1): 137 | 138 | x1 = np.empty((input_seq_size , X1.shape[1], X1.shape[2], X1.shape[3])) 139 | x2 = np.empty((input_seq_size , X2.shape[1])) 140 | x3 = np.empty((output_seq_size , X3.shape[1])) 141 | y_in = np.empty(((input_seq_size), 1)) 142 | y = np.empty((output_seq_size, 1)) 143 | 144 | in_time = np.empty(((input_seq_size)), dtype = 'datetime64[ns]') 145 | out_time = np.empty(((output_seq_size)), dtype = 'datetime64[ns]') 146 | 147 | #define sequences 148 | input_end = input_start + input_seq_size 149 | output_end = output_start + output_seq_size 150 | 151 | #add condition to ommit any days with nan values 152 | if np.isnan(X1[input_start:input_end]).any() == True or np.isnan(X2[input_start:input_end]).any() == True or np.isnan(Y[input_start:input_end]).any() == True: 153 | input_start += input_seq_size 154 | output_start += input_seq_size 155 | continue 156 | elif np.isnan(X3[output_start:output_end]).any() == True or np.isnan(Y[output_start:output_end]).any() == True: 157 | input_start += output_seq_size 158 | output_start += output_seq_size 159 | continue 160 | 161 | x1[:,:,:,:] = X1[input_start:input_end] 162 | seqX1.append(x1) 163 | x2[:,:] = X2[input_start:input_end] 164 | seqX2.append(x2) 165 | x3[:,:] = X3[output_start:output_end] 166 | seqX3.append(x3) 167 | y_in[:,:] = Y[input_start:input_end] 168 | # y_in[-48:,:] = 0 # elinimate metered output - only NWP available for prediction day 169 | seqY_in.append(y_in) 170 | y[:] = Y[output_start:output_end] 171 | seqY.append(y) 172 | 173 | in_time[:] = np.squeeze(input_times_reference[input_start:input_end]) 174 | in_times.append(in_time) 175 | out_time[:] = np.squeeze(output_times_reference[output_start:output_end]) 176 | out_times.append(out_time) 177 | 178 | input_start += 1 # divide by 2 to compensate for 24hr period (edited) 179 | output_start += 1 180 | 181 | print('converting to float32 numpy arrays') 182 | seqX1 = np.array(seqX1, dtype=np.float32) 183 | seqX2 = np.array(seqX2, dtype=np.float32) 184 | seqX3 = np.array(seqX3, dtype=np.float32) 185 | seqY_in = np.array(seqY_in, dtype=np.float32) 186 | seqY = np.array(seqY, dtype=np.float32) 187 | 188 | 189 | # stack 'Y_inputs' onto the spatial array 190 | print('combining feature array with lagged outputs') 191 | broadcaster = np.ones((seqX1.shape[0], seqX1.shape[1], seqX1.shape[2], seqX1.shape[3], 1), dtype=np.float32) 192 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(seqY_in, axis =2), axis=2) 193 | seqX1 = np.concatenate((broadcaster, seqX1), axis = -1) 194 | 195 | #split data for train and test sets 196 | test_set_percentage = 0.1 197 | test_split = int(len(seqX1) * (1 - test_set_percentage)) 198 | 199 | 200 | dataset = { 201 | 'train_set' : { 202 | 'X1_train': seqX1[:test_split], 203 | 'X2_train': seqX2[:test_split], # input time features 204 | 'X3_train': seqX3[:test_split], # output time features 205 | 'y_train': seqY[:test_split] 206 | }, 207 | 'test_set' : { 208 | 'X1_test': seqX1[test_split:], 209 | 'X2_test': seqX2[test_split:], 210 | 'X3_test': seqX3[test_split:], 211 | 'y_test': seqY[test_split:] 212 | } 213 | } 214 | 215 | #create dictionary for time references 216 | time_refs = { 217 | 'input_times_train': in_times[:test_split], 218 | 'input_times_test': in_times[test_split:], 219 | 'output_times_train': out_times[:test_split], 220 | 'output_times_test': out_times[test_split:] 221 | } 222 | 223 | return dataset, time_refs 224 | # train_set, test_set, time_refs 225 | 226 | 227 | ###### WIND ############################################################################################################################################## 228 | 229 | # main function for preprocessing of data - wind specific updates applied 230 | def wind_data_processing(filepaths, labels, input_seq_size, output_seq_size, workingDir): 231 | 232 | #get dictionary keys 233 | keys = list(filepaths.keys()) 234 | 235 | #dictionaries for extracted vars 236 | vars_extract = {} 237 | vars_extract_filtered = {} 238 | vars_extract_filtered_masked = {} 239 | vars_extract_filtered_masked_norm = {} 240 | 241 | #define daylight hours mask - relative to total solar radiation 242 | # solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation') 243 | # solar_rad_reference = lv_filter(solar_rad_reference['data']) 244 | # daylight_hr_mask = solar_rad_reference > 0 245 | 246 | #cache matrix dimensions 247 | # dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]] 248 | 249 | #loop to extract data features 250 | for i, key in enumerate(filepaths): 251 | vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files 252 | 253 | #break in 1-iteration to get time features & cache dimensions 254 | if i == 0: 255 | times_in = vars_extract[str(key)]['time'] 256 | dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]] 257 | 258 | vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data 259 | # vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data 260 | # scaler = MinMaxScaler() #normalise data 261 | # vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2]) 262 | 263 | # convert u and v components to wind speed and direction 264 | ws_10 = np.sqrt((vars_extract_filtered['u_wind_component_10']**2) + (vars_extract_filtered['v_wind_component_10']**2)) 265 | ws_100 = np.sqrt((vars_extract_filtered['u_wind_component_100']**2) + (vars_extract_filtered['v_wind_component_100']**2)) 266 | 267 | wd_10 = np.mod(180+np.rad2deg(np.arctan2(vars_extract_filtered['u_wind_component_10'], vars_extract_filtered['v_wind_component_10'])), 360) 268 | wd_100 = np.mod(180+np.rad2deg(np.arctan2(vars_extract_filtered['u_wind_component_100'], vars_extract_filtered['v_wind_component_100'])), 360) 269 | 270 | # convert ws and wd to float 32 271 | ws_10 = ws_10.astype('float32') 272 | wd_10 = wd_10.astype('float32') 273 | ws_100 = ws_100.astype('float32') 274 | wd_100 = wd_100.astype('float32') 275 | 276 | # combine into an array 277 | feature_array = [ws_10, wd_10, ws_100, wd_100, vars_extract_filtered['temperature'], vars_extract_filtered['surface_pressure']] 278 | 279 | #stack features into one matrix 280 | feature_array = np.stack(feature_array, axis = -1) 281 | 282 | # interpolate feature array from 24hrs to 48hrs 283 | print('interpolating data...') 284 | feature_array = interpolate_4d(feature_array) 285 | 286 | # remove nan values - by day 287 | outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any') 288 | # outputs_mask = labels['MW'].isna() 289 | 290 | # apply mask, removing days with more than one nan value 291 | feature_array = feature_array[~outputs_mask] 292 | labels = labels[~outputs_mask] 293 | 294 | dimensions = feature_array.shape 295 | feature_array_final = np.empty_like(feature_array) 296 | 297 | # normalise features 298 | for i in range(feature_array.shape[-1]): 299 | # scaler = StandardScaler(with_mean=False) #normalise data 300 | scaler = MinMaxScaler() 301 | array = feature_array[:,:,:,i] 302 | feature_array_final[:,:,:,i:i+1] = scaler.fit_transform(array.reshape(array.shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2], 1) 303 | 304 | #Do time feature engineering for input times 305 | times_in = pd.DataFrame({"datetime": times_in}) 306 | times_in['datetime'] = times_in['datetime'].astype('str') 307 | times_in['datetime'] = pd.to_datetime(times_in['datetime']) 308 | times_in.set_index('datetime', inplace = True) 309 | in_times = times_in.index 310 | 311 | # get hours and months from datetime 312 | hour_in = times_in.index.hour 313 | hour_in = np.float32(hour_in) 314 | 315 | # add HH to hours 316 | index = 0 317 | for idx, time in enumerate(hour_in): 318 | if time == 24: 319 | index += 1 320 | else: 321 | hour_in = np.insert(hour_in, index+1, time+0.5) 322 | index += 2 323 | 324 | month_in = times_in.index.month - 1 325 | year_in = times_in.index.year 326 | 327 | # duplicate months to compensate for switch from 24hr to 48hr input data 328 | index = 0 329 | for idx, month in enumerate(month_in): 330 | if idx % 24 == 0: 331 | index += 1 332 | else: 333 | month_in = np.insert(month_in, index+1, month) 334 | index += 2 335 | 336 | # create one_hot encoding input times: hour and month 337 | one_hot_months_in = pd.get_dummies(month_in, prefix='month_') 338 | one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_') 339 | 340 | times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1) 341 | times_in = times_in_df.values 342 | 343 | # create sin / cos of input times 344 | times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1) 345 | times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1) 346 | 347 | times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1) 348 | times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1) 349 | 350 | times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times)) 351 | 352 | #Process output times as secondary input for decoder 353 | #cache output times 354 | label_times = labels.index 355 | 356 | #declare 'output' time features 357 | df_times_outputs = pd.DataFrame() 358 | df_times_outputs['hour'] = labels.index.hour 359 | df_times_outputs['month'] = labels.index.month - 1 360 | df_times_outputs['year'] = labels.index.year 361 | 362 | #process output times for half hours 363 | for idx, row in df_times_outputs.iterrows(): 364 | if idx % 2 != 0: 365 | df_times_outputs.iloc[idx, 0] = df_times_outputs.iloc[idx, 0] + 0.5 366 | 367 | months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_') 368 | hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_') 369 | 370 | times_out_df = pd.concat([hours_out, months_out], axis=1) 371 | times_out = times_out_df.values 372 | 373 | # create sin / cos of input times 374 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 375 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 376 | 377 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 378 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 379 | 380 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1) 381 | 382 | # print(times_out_hour_cos[:50]) 383 | labels['MW'] = labels['MW'].astype('float32') 384 | 385 | #normalise labels 386 | scaler = StandardScaler(with_mean=False) 387 | # scaler = MinMaxScaler() 388 | labels[['MW']] = scaler.fit_transform(labels[['MW']]) 389 | 390 | # save the scaler for inference 391 | dump(scaler, open('../../data/processed/wind/_scaler/scaler_wind_v3.pkl', 'wb')) 392 | 393 | # make single array for 394 | time_refs = [in_times, label_times] 395 | 396 | # one-hot method 397 | # input_times = times_in_df.values 398 | # output_times = times_out_df.values 399 | 400 | # cyclic method 401 | output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_year), axis=-1) 402 | 403 | labels = labels.values 404 | 405 | # testing input 24hr and 48hr input data - convert to 48hrs for X2 406 | input_times = output_times 407 | 408 | # add labels to inputs 409 | broadcaster = np.ones((feature_array_final.shape[0], feature_array_final.shape[1], feature_array_final.shape[2], 1), dtype=np.float32) 410 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2) 411 | feature_array_final = np.concatenate((broadcaster, feature_array_final), axis = -1) 412 | 413 | 414 | # decalre train test split 415 | test_split_seq = 8544 # use the last 100 days, around 10% 416 | 417 | # create dataset 418 | dataset = { 419 | 'train_set' : { 420 | 'X1_train': feature_array_final[:-test_split_seq], 421 | 'X2_train': input_times[:-test_split_seq], # input time features 422 | 'X3_train': output_times[:-test_split_seq], # output time features 423 | 'y_train': labels[:-test_split_seq] 424 | }, 425 | 'test_set' : { 426 | 'X1_test': feature_array_final[-test_split_seq:], 427 | 'X2_test': input_times[-test_split_seq:], 428 | 'X3_test': output_times[-test_split_seq:], 429 | 'y_test': labels[-test_split_seq:] 430 | } 431 | } 432 | 433 | time_refs = { 434 | 'input_times_train': in_times[:-test_split_seq], 435 | 'input_times_test': in_times[-test_split_seq:], 436 | 'output_times_train': label_times[:-test_split_seq], 437 | 'output_times_test': label_times[-test_split_seq:] 438 | } 439 | 440 | return dataset, time_refs 441 | 442 | 443 | ###### SOLAR ############################################################################################################################################## 444 | 445 | # function to process data in train and test sets 446 | def solar_data_processing(filepaths, labels, input_seq_size, output_seq_size, workingDir): 447 | 448 | #get dictionary keys 449 | keys = list(filepaths.keys()) 450 | 451 | #dictionaries for extracted vars 452 | vars_extract = {} 453 | vars_extract_filtered = {} 454 | vars_extract_filtered_masked = {} 455 | vars_extract_filtered_masked_norm = {} 456 | 457 | #define daylight hours mask - relative to total solar radiation 458 | # solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation') 459 | # solar_rad_reference = lv_filter(solar_rad_reference['data']) 460 | # daylight_hr_mask = solar_rad_reference > 0 461 | 462 | #cache matrix dimensions 463 | # dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]] 464 | 465 | #loop to extract data features 466 | for i, key in enumerate(filepaths): 467 | vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files 468 | 469 | #break in 1-iteration to get time features & cache dimensions 470 | if i == 0: 471 | times_in = vars_extract[str(key)]['time'] 472 | dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]] 473 | 474 | vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data 475 | # vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data 476 | # scaler = MinMaxScaler() #normalise data 477 | scaler = StandardScaler(with_mean=False) 478 | vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2]) 479 | 480 | 481 | #stack features into one matrix 482 | feature_array = [vars_extract_filtered_masked_norm[str(i)] for i in vars_extract_filtered_masked_norm] 483 | feature_array = np.stack([x for x in vars_extract_filtered_masked_norm.values()], axis = -1) 484 | 485 | # interpolate feature array from 24hrs to 48hrs 486 | feature_array = interpolate_4d(feature_array) 487 | 488 | # remove nan values - by day 489 | outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any') 490 | 491 | 492 | # apply mask, removing days with more than one nan value 493 | feature_array = feature_array[~outputs_mask] 494 | labels = labels[~outputs_mask] 495 | 496 | dimensions = feature_array.shape 497 | 498 | #Do time feature engineering for input times 499 | times_in = pd.DataFrame({"datetime": times_in}) 500 | times_in['datetime'] = times_in['datetime'].astype('str') 501 | times_in['datetime'] = pd.to_datetime(times_in['datetime']) 502 | times_in.set_index('datetime', inplace = True) 503 | in_times = times_in.index 504 | 505 | # get hours and months from datetime 506 | hour_in = times_in.index.hour 507 | hour_in = np.float32(hour_in) 508 | 509 | # add HH to hours 510 | index = 0 511 | for idx, time in enumerate(hour_in): 512 | if time == 24: 513 | index += 1 514 | else: 515 | hour_in = np.insert(hour_in, index+1, time+0.5) 516 | index += 2 517 | 518 | month_in = times_in.index.month - 1 519 | year_in = times_in.index.year 520 | 521 | # duplicate months to compensate for switch from 24hr to 48hr input data 522 | index = 0 523 | for idx, month in enumerate(month_in): 524 | if idx % 24 == 0: 525 | index += 1 526 | else: 527 | month_in = np.insert(month_in, index+1, month) 528 | index += 2 529 | 530 | # create one_hot encoding input times: hour and month 531 | one_hot_months_in = pd.get_dummies(month_in, prefix='month_') 532 | one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_') 533 | 534 | times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1) 535 | times_in = times_in_df.values 536 | 537 | # create sin / cos of input times 538 | times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1) 539 | times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1) 540 | 541 | times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1) 542 | times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1) 543 | 544 | times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times)) 545 | 546 | #Process output times as secondary input for decoder 547 | #cache output times 548 | label_times = labels.index 549 | 550 | #declare 'output' time features 551 | df_times_outputs = pd.DataFrame() 552 | df_times_outputs['hour'] = labels.index.hour 553 | df_times_outputs['month'] = labels.index.month - 1 554 | df_times_outputs['year'] = labels.index.year 555 | 556 | #process output times for half hours 557 | for idx, row in df_times_outputs.iterrows(): 558 | if idx % 2 != 0: 559 | df_times_outputs.iloc[idx, 0] = df_times_outputs.iloc[idx, 0] + 0.5 560 | 561 | months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_') 562 | hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_') 563 | 564 | times_out_df = pd.concat([hours_out, months_out], axis=1) 565 | times_out = times_out_df.values 566 | 567 | # create sin / cos of input times 568 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 569 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 570 | 571 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 572 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 573 | 574 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1) 575 | 576 | # normalise y labels 577 | scaler = StandardScaler(with_mean=False) 578 | # scaler = MinMaxScaler() 579 | labels[['MW']] = scaler.fit_transform(labels[['MW']]) 580 | 581 | # save the scaler for inference 582 | dump(scaler, open('../../data/processed/solar/_scaler/scaler_solar_v4.pkl', 'wb')) 583 | 584 | in_times = label_times 585 | time_refs = [in_times, label_times] 586 | 587 | # one-hot method 588 | # input_times = times_in_df.values 589 | # output_times = times_out_df.values 590 | 591 | # cyclic method 592 | # input_times = np.concatenate((times_in_hour_sin, times_in_hour_cos, times_in_month_sin, times_in_month_cos), axis=-1) swtich to output times for HH periods 593 | output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_year), axis=-1) 594 | 595 | labels = labels.values 596 | 597 | # add labels to inputs 598 | print('combining feature array with lagged outputs') 599 | broadcaster = np.ones((feature_array.shape[0], feature_array.shape[1], feature_array.shape[2], 1), dtype=np.float32) 600 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2) 601 | feature_array = np.concatenate((broadcaster, feature_array), axis = -1) 602 | 603 | # testing input 24hr and 48hr input data - convert to 48hrs for X2 604 | input_times = output_times 605 | 606 | test_split_seq = 8544 # use the last 100 days, around 10% 607 | 608 | # create dataset 609 | dataset = { 610 | 'train_set' : { 611 | 'X1_train': feature_array[:-test_split_seq], 612 | 'X2_train': input_times[:-test_split_seq], # input time features 613 | 'X3_train': output_times[:-test_split_seq], # output time features 614 | 'y_train': labels[:-test_split_seq] 615 | }, 616 | 'test_set' : { 617 | 'X1_test': feature_array[-test_split_seq:], 618 | 'X2_test': input_times[-test_split_seq:], 619 | 'X3_test': output_times[-test_split_seq:], 620 | 'y_test': labels[-test_split_seq:] 621 | } 622 | } 623 | 624 | time_refs = { 625 | 'input_times_train': in_times[:-test_split_seq], 626 | 'input_times_test': in_times[-test_split_seq:], 627 | 'output_times_train': label_times[:-test_split_seq], 628 | 'output_times_test': label_times[-test_split_seq:] 629 | } 630 | 631 | return dataset, time_refs 632 | # return train_set, test_set, time_refs 633 | 634 | ###### DEMAND ############################################################################################################################################## 635 | 636 | #function to process data in train and test sets 637 | def demand_data_processing(filepaths, labels, workingDir): 638 | 639 | #get dictionary keys 640 | keys = list(filepaths.keys()) 641 | 642 | #dictionaries for extracted vars 643 | vars_extract = {} 644 | vars_extract_filtered = {} 645 | vars_extract_filtered_masked = {} 646 | vars_extract_filtered_masked_norm = {} 647 | 648 | #define daylight hours mask - relative to total solar radiation 649 | # solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation') 650 | # solar_rad_reference = lv_filter(solar_rad_reference['data']) 651 | # daylight_hr_mask = solar_rad_reference > 0 652 | 653 | #cache matrix dimensions 654 | # dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]] 655 | 656 | #loop to extract data features 657 | for i, key in enumerate(filepaths): 658 | vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files 659 | 660 | #break in 1-iteration to get time features & cache dimensions 661 | if i == 0: 662 | times_in = vars_extract[str(key)]['time'] 663 | dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]] 664 | 665 | vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data 666 | # vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data 667 | # scaler = MinMaxScaler() #normalise data 668 | scaler = StandardScaler(with_mean=False) 669 | vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2]) 670 | 671 | #stack features into one matrix 672 | feature_array = [vars_extract_filtered_masked_norm[str(i)] for i in vars_extract_filtered_masked_norm] 673 | feature_array = np.stack(feature_array, axis = -1) 674 | # feature_array = np.concatenate((feature_array, input_timefeatures), axis = -1) 675 | 676 | # interpolate feature array from 24hrs to 48hrs 677 | feature_array = interpolate_4d(feature_array) 678 | 679 | # remove nan values 680 | outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any') 681 | 682 | # apply mask, removing days with more than one nan value 683 | feature_array = feature_array[~outputs_mask] 684 | labels = labels[~outputs_mask] 685 | 686 | # do time feature engineering for input times 687 | times_in = pd.DataFrame({"datetime": times_in}) 688 | times_in['datetime'] = times_in['datetime'].astype('str') 689 | times_in['datetime'] = pd.to_datetime(times_in['datetime']) 690 | times_in.set_index('datetime', inplace = True) 691 | in_times = times_in.index 692 | 693 | # get hours and months from datetime 694 | hour_in = times_in.index.hour 695 | hour_in = np.float32(hour_in) 696 | 697 | # add HH to hours 698 | index = 0 699 | for idx, time in enumerate(hour_in): 700 | if time == 24: 701 | index += 1 702 | else: 703 | hour_in = np.insert(hour_in, index+1, time+0.5) 704 | index += 2 705 | 706 | month_in = times_in.index.month - 1 707 | year_in = times_in.index.year 708 | 709 | # duplicate months to compensate for switch from 24hr to 48hr input data 710 | index = 0 711 | for idx, month in enumerate(month_in): 712 | if idx % 24 == 0: 713 | index += 1 714 | else: 715 | month_in = np.insert(month_in, index+1, month) 716 | index += 2 717 | 718 | # create one_hot encoding input times: hour and month 719 | one_hot_months_in = pd.get_dummies(month_in, prefix='month_') 720 | one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_') 721 | 722 | times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1) 723 | times_in = times_in_df.values 724 | 725 | # create sin / cos of input times 726 | times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1) 727 | times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1) 728 | 729 | times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1) 730 | times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1) 731 | 732 | times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times)) 733 | 734 | #Process output times as secondary input for decoder 735 | #cache output times 736 | label_times = labels.index 737 | 738 | #declare 'output' time features 739 | df_times_outputs = pd.DataFrame() 740 | df_times_outputs['date'] = labels.index.date 741 | df_times_outputs['hour'] = labels.index.hour 742 | df_times_outputs['month'] = labels.index.month - 1 743 | df_times_outputs['year'] = labels.index.year 744 | df_times_outputs['day_of_week'] = labels.index.dayofweek 745 | df_times_outputs['day_of_year'] = labels.index.dayofyear - 1 746 | df_times_outputs['weekend'] = df_times_outputs['day_of_week'].apply(lambda x: 1 if x>=5 else 0) 747 | 748 | 749 | # account for bank / public holidays 750 | start_date = labels.index.min() 751 | end_date = labels.index.max() 752 | start_year = df_times_outputs['year'].min() 753 | end_year = df_times_outputs['year'].max() 754 | 755 | holidays = set(holiday[0] 756 | for year in range(start_year, end_year + 1) 757 | for holiday in cal.holidays(year) 758 | if start_date <= holiday[0] <= end_date) 759 | 760 | df_times_outputs['holiday'] = df_times_outputs['date'].isin(holidays).astype(int) 761 | 762 | #process output times for half hours 763 | for idx, row in df_times_outputs.iterrows(): 764 | if idx % 2 != 0: 765 | df_times_outputs.iloc[idx, 1] = df_times_outputs.iloc[idx, 1] + 0.5 766 | 767 | months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_') 768 | hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_') 769 | 770 | times_out_df = pd.concat([hours_out, months_out], axis=1) 771 | times_out = times_out_df.values 772 | 773 | # create sin / cos of output hour 774 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 775 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1) 776 | 777 | # create sin / cos of output month 778 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 779 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1) 780 | 781 | # create sin / cos of output year 782 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1) 783 | 784 | # create sin / cos of output day of week 785 | times_out_DoW_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1) 786 | times_out_DoW_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1) 787 | 788 | # create sin / cos of output day of year 789 | times_out_DoY_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1) 790 | times_out_DoY_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1) 791 | 792 | #normalise labels 793 | scaler = StandardScaler(with_mean=False) 794 | labels[['MW']] = scaler.fit_transform(labels[['MW']]) 795 | 796 | # save the scaler for inference 797 | dump(scaler, open('../../data/processed/demand/_scaler/scaler_demand_v2.pkl', 'wb')) 798 | 799 | time_refs = [in_times, label_times] 800 | 801 | # one-hot method 802 | # input_times = times_in_df.values 803 | # output_times = times_out_df.values 804 | 805 | weekends = np.expand_dims(df_times_outputs['weekend'].values, axis =-1) 806 | holidays = np.expand_dims(df_times_outputs['holiday'].values, axis =-1) 807 | 808 | # cyclic method 809 | # input_times = np.concatenate((times_in_hour_sin, times_in_hour_cos, times_in_month_sin, times_in_month_cos), axis=-1) swtich to output times for HH periods 810 | output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_DoW_sin, times_out_DoW_cos, 811 | times_out_DoY_sin, times_out_DoY_cos, times_out_year, weekends, holidays), axis=-1) 812 | 813 | labels = labels.values 814 | 815 | # testing input 24hr and 48hr input data - convert to 48hrs for X2 816 | input_times = output_times 817 | 818 | # add labels to inputs 819 | print('combining feature array with lagged outputs') 820 | broadcaster = np.ones((feature_array.shape[0], feature_array.shape[1], feature_array.shape[2], 1), dtype=np.float32) 821 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2) 822 | feature_array = np.concatenate((broadcaster, feature_array), axis = -1) 823 | 824 | #divide into timesteps & train and test sets 825 | # dataset, time_refs = format_data_into_timesteps(X1 = feature_array, X2 = input_times , X3 = output_times, Y = labels, input_seq_size = 240, output_seq_size = 48, input_times_reference = time_refs[1], output_times_reference = time_refs[1]) # converting from 24hr to 48hr inputs hence can use output time references 826 | # train_set, test_set, time_refs 827 | 828 | # def to_float32(input_dict): 829 | # for idx, key in enumerate(input_dict.keys()): 830 | # input_dict[key] = input_dict[key].astype(np.float32) 831 | # return input_dict 832 | 833 | # train_set = to_float32(train_set) 834 | # test_set = to_float32(test_set) 835 | 836 | test_split_seq = 8544 # use the last 100 days, around 10% 837 | 838 | # input_test_seq = test_split_seq + (input_seq_size - 1) 839 | # output_test_seq = test_split_seq + (output_seq_size - 1) 840 | 841 | # create dataset 842 | dataset = { 843 | 'train_set' : { 844 | 'X1_train': feature_array[:-test_split_seq], 845 | 'X2_train': input_times[:-test_split_seq], # input time features 846 | 'X3_train': output_times[:-test_split_seq], # output time features 847 | 'y_train': labels[:-test_split_seq] 848 | }, 849 | 'test_set' : { 850 | 'X1_test': feature_array[-test_split_seq:], 851 | 'X2_test': input_times[-test_split_seq:], 852 | 'X3_test': output_times[-test_split_seq:], 853 | 'y_test': labels[-test_split_seq:] 854 | } 855 | } 856 | 857 | time_refs = { 858 | 'input_times_train': label_times[:-test_split_seq], 859 | 'input_times_test': label_times[-test_split_seq:], 860 | 'output_times_train': label_times[:-test_split_seq], 861 | 'output_times_test': label_times[-test_split_seq:] 862 | } 863 | 864 | # def to_float32(input_dict): 865 | # for idx, key in enumerate(input_dict.keys()): 866 | # input_dict[key] = input_dict[key].astype(np.float32) 867 | # return input_dict 868 | 869 | # train_set = to_float32(train_set) 870 | # test_set = to_float32(test_set) 871 | 872 | return dataset, time_refs 873 | # return train_set, test_set, time_refs 874 | 875 | 876 | -------------------------------------------------------------------------------- /visualisations/cloud_cover_(input)_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/cloud_cover_(input)_animation.gif -------------------------------------------------------------------------------- /visualisations/d3_quantile_plot_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_quantile_plot_examples.png -------------------------------------------------------------------------------- /visualisations/d3_temporal_attention_plot_demand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_demand.png -------------------------------------------------------------------------------- /visualisations/d3_temporal_attention_plot_price.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_price.png -------------------------------------------------------------------------------- /visualisations/d3_temporal_attention_plot_solar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_solar.png -------------------------------------------------------------------------------- /visualisations/d3_temporal_attention_plot_wind.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_wind.png -------------------------------------------------------------------------------- /visualisations/memory_leak_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/memory_leak_test.png -------------------------------------------------------------------------------- /visualisations/model_architecture_schematic_markup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/model_architecture_schematic_markup.png -------------------------------------------------------------------------------- /visualisations/performance_breakdown_markup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/performance_breakdown_markup.png -------------------------------------------------------------------------------- /visualisations/solar_spatial_attentions_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/solar_spatial_attentions_animation.gif -------------------------------------------------------------------------------- /visualisations/tabular_performance_breakdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/tabular_performance_breakdown.png --------------------------------------------------------------------------------