├── .gitignore
├── README.md
├── data
├── processed
│ └── README.md
└── raw
│ └── README.md
├── models
├── bilstm
│ ├── demand
│ │ └── q_all_bilstm
│ │ │ └── demand_bilstm.h5
│ ├── price
│ │ └── q_all_bilstm
│ │ │ └── price_bilstm.h5
│ ├── solar
│ │ └── q_all_bilstm
│ │ │ └── solar_bilstm.h5
│ └── wind
│ │ └── q_all_bilstm
│ │ └── wind_bilstm.h5
├── seq2seq+temporal+spatial
│ ├── demand
│ │ └── q_all_seq2seq+temporal+spatial
│ │ │ ├── demand_main.h5
│ │ │ ├── demand_spatial_enc.h5
│ │ │ └── demand_temporal_enc.h5
│ ├── solar
│ │ └── q_all_seq2seq+temporal+spatial
│ │ │ ├── solar_main.h5
│ │ │ ├── solar_spatial_enc.h5
│ │ │ └── solar_temporal_enc.h5
│ └── wind
│ │ └── q_all_seq2seq+temporal+spatial
│ │ ├── wind_main.h5
│ │ ├── wind_spatial_enc.h5
│ │ └── wind_temporal_enc.h5
├── seq2seq+temporal
│ ├── demand
│ │ └── q_all_seq2seq+temporal
│ │ │ ├── demand_seq2seq+temporal.h5
│ │ │ └── demand_seq2seq+temporal_enc.h5
│ ├── price
│ │ └── q_all_seq2seq+temporal
│ │ │ ├── price_seq2seq+temporal.h5
│ │ │ └── price_seq2seq+temporal_enc.h5
│ ├── solar
│ │ └── q_all_seq2seq+temporal
│ │ │ ├── solar_seq2seq+temporal.h5
│ │ │ └── solar_seq2seq+temporal_enc.h5
│ └── wind
│ │ └── q_all_seq2seq+temporal
│ │ ├── wind_seq2seq+temporal.h5
│ │ └── wind_seq2seq+temporal_enc.h5
└── seq2seq
│ ├── demand
│ └── q_all_seq2seq
│ │ └── demand_seq2seq.h5
│ ├── price
│ └── q_all_seq2seq
│ │ └── price_seq2seq.h5
│ ├── solar
│ └── q_all_seq2seq
│ │ └── solar_seq2seq.h5
│ └── wind
│ └── q_all_seq2seq
│ └── wind_seq2seq.h5
├── requirements.txt
├── results
├── demand
│ ├── attention_plot_results_demand.csv
│ ├── bilstm
│ │ ├── forecasted_time_series_demand_bilstm.pkl
│ │ ├── preformance_summary_demand_bilstm.csv
│ │ ├── q_all_bilstm
│ │ │ └── demand_bilstm.h5
│ │ └── quantile_prediction_results_demand_bilstm.csv
│ ├── seq2seq+temporal+spatial
│ │ ├── forecasted_time_series_demand_seq2seq+temporal+spatial.pkl
│ │ ├── preformance_summary_demand_seq2seq+temporal+spatial.csv
│ │ └── quantile_prediction_results_demand_seq2seq+temporal+spatial.csv
│ ├── seq2seq+temporal
│ │ ├── forecasted_time_series_demand_seq2seq+temporal.pkl
│ │ ├── preformance_summary_demand_seq2seq+temporal.csv
│ │ └── quantile_prediction_results_demand_seq2seq+temporal.csv
│ └── seq2seq
│ │ ├── forecasted_time_series_demand_seq2seq.pkl
│ │ ├── preformance_summary_demand_seq2seq.csv
│ │ └── quantile_prediction_results_demand_seq2seq.csv
├── price
│ ├── attention_plot_results_price.csv
│ ├── bilstm
│ │ ├── forecasted_time_series_price_bilstm.pkl
│ │ └── preformance_summary_price_bilstm.csv
│ ├── seq2seq+temporal+spatial
│ │ ├── forecasted_time_series_price_seq2seq+temporal+spatial.pkl
│ │ └── preformance_summary_price_seq2seq+temporal+spatial.csv
│ ├── seq2seq+temporal
│ │ ├── attention_data_price_seq2seq+temporal.pkl
│ │ ├── forecasted_time_series_price_seq2seq+temporal.pkl
│ │ ├── preformance_summary_price_seq2seq+temporal.csv
│ │ └── quantile_prediction_results_price_seq2seq+temporal.csv
│ └── seq2seq
│ │ ├── forecasted_time_series_price_seq2seq.pkl
│ │ ├── preformance_summary_price_seq2seq.csv
│ │ └── quantile_prediction_results_price_seq2seq.csv
├── solar
│ ├── attention_plot_results_solar.csv
│ ├── bilstm
│ │ ├── forecasted_time_series_solar_bilstm.pkl
│ │ ├── preformance_summary_solar_bilstm.csv
│ │ └── quantile_prediction_results_solar_bilstm.csv
│ ├── seq2seq+temporal+spatial
│ │ ├── forecasted_time_series_solar_seq2seq+temporal+spatial.pkl
│ │ ├── preformance_summary_solar_seq2seq+temporal+spatial.csv
│ │ ├── quantile_prediction_results_solar_seq2seq+temporal+spatial.csv
│ │ └── spatial_attention_data_solar.pkl
│ ├── seq2seq+temporal
│ │ ├── forecasted_time_series_solar_seq2seq+temporal.pkl
│ │ ├── preformance_summary_solar_seq2seq+temporal.csv
│ │ └── quantile_prediction_results_solar_seq2seq+temporal.csv
│ └── seq2seq
│ │ ├── forecasted_time_series_solar_seq2seq.pkl
│ │ └── preformance_summary_solar_seq2seq.csv
└── wind
│ ├── attention_plot_results_wind.csv
│ ├── bilstm
│ ├── forecasted_time_series_wind_bilstm.pkl
│ └── preformance_summary_wind_bilstm.csv
│ ├── seq2seq+temporal+spatial
│ ├── forecasted_time_series_wind_seq2seq+temporal+spatial.pkl
│ ├── preformance_summary_wind_seq2seq+temporal+spatial.csv
│ └── quantile_prediction_results_wind_seq2seq+temporal+spatial.csv
│ ├── seq2seq+temporal
│ ├── forecasted_time_series_wind_seq2seq+temporal.pkl
│ ├── preformance_summary_wind_seq2seq+temporal.csv
│ └── quantile_prediction_results_wind_seq2seq+temporal.csv
│ └── seq2seq
│ ├── forecasted_time_series_wind_seq2seq.pkl
│ ├── preformance_summary_wind_seq2seq.csv
│ └── quantile_prediction_results_wind_seq2seq.csv
├── scripts
├── models
│ ├── _shared
│ │ ├── __pycache__
│ │ │ ├── attention_layer.cpython-38.pyc
│ │ │ └── timeseries_data_generator.cpython-38.pyc
│ │ ├── attention_layer.py
│ │ └── timeseries_data_generator.py
│ ├── bilstm_model.py
│ ├── inference+testing
│ │ ├── bilstm_seq2seq_predictions.py
│ │ └── inference_model_seq2seq+spatial+temporal_attn.py
│ ├── seq2seq+spatial+temporal_attn.py
│ ├── seq2seq+temporal_attn.py
│ └── seq2seq_model.py
├── postprocessing
│ ├── d3_scripts
│ │ ├── Context_graph.js
│ │ └── forecasting_graph.js
│ ├── format_results_Qforecast_plot.py
│ ├── format_results_attn_plot.py
│ ├── results_summary.py
│ └── spatial_attention_plots.py
└── preprocessing
│ ├── ERA5_downloader.py
│ ├── __pycache__
│ └── preprocessing_funcs.cpython-38.pyc
│ ├── data_preprocessing_demand.py
│ ├── data_preprocessing_price.py
│ ├── data_preprocessing_solar.py
│ ├── data_preprocessing_wind.py
│ └── preprocessing_funcs.py
└── visualisations
├── cloud_cover_(input)_animation.gif
├── d3_quantile_plot_examples.png
├── d3_temporal_attention_plot_demand.png
├── d3_temporal_attention_plot_price.png
├── d3_temporal_attention_plot_solar.png
├── d3_temporal_attention_plot_wind.png
├── memory_leak_test.png
├── model_architecture_schematic.svg
├── model_architecture_schematic_markup.png
├── performance_breakdown_markup.png
├── solar_spatial_attentions_animation.gif
└── tabular_performance_breakdown.png
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Probabilistic Forecasting of Renewable Energy Generation and Wholesale Market Prices Using Quantile Regression in Keras
2 | :rocket: Blog post on personal website :link: [Probabilistic Forecasting of Renewable Generation & Wholesale Prices with Quantile-Regression](https://richardfindlay.co.uk/probabilistic-forecasting-of-renewable-generation-and-wholesale-prices-with-quantile-regression-2)
3 |
4 |
5 |
6 | screenshot of interactive d3.js plots illustrating probabilistic forecasting performance
7 |
8 |
9 | ### Project Description :open_book::
10 | This repository demonstrates the use of deep learning techniques in combination with quantile regression to produce probabilistic forecasts. The above figure depicts the consecutive DA quantile forecasts for each of the investigated variables over one week, with further quantification and discussion given on the forecast performance given in the accompanying [blog post](https://richardfindlay.co.uk/probabilistic-forecasting-of-renewable-generation-and-wholesale-prices-with-quantile-regression-2).
11 |
12 | The code investigates the performance of four different deep-learning architectures; Bi-directional LSTM, Seq-2-Seq, Seq-2-Seq with Temporal Attention and Seq-2-Seq with Temporal and Spatial Attention. To help give context, comparisons are made to a simplistic daily persistence forecasting technique, as well as to the Transmission System Operator's forecast (TSO). The models are predicated off the notion that there is an increased complexity added at each iteration, which accompanied the hypothesis that an increased performance should be observed between each iteration, which was not the case when test performance was investigated.
13 |
14 |
15 |
16 | model architecture schematic for encoder-decoder with spatial and temporal attention mechanisms as implemented in keras
17 |
18 |
19 | ### Performance Overview :racing_car::
20 | The above figure illustrates the pinnacle of the model complexity investigated as part of this project. With both temporal and spatial attention mechanisms, the novel encoder-decoder architecture does not always prevail as the best preforming technique but shows encourging performance and may merit further investigation and fine-tuning.
21 |
22 |
23 |
24 |
25 |
26 | The above plot illustrates the performance of the temporal attention mechanism for the prior 7-days of features inputted into the model, the attention weights show there's a recognition of temporal patterns within the data, paying particular attention to the previous day for the proceeding forecast. Similarly, the below gif depicts the performance of the spatial attention weights in the solar generation forecast, again this shows some promising indication of the mechanism recognising the influence of solar irradiance to the forecast.
27 |
28 |
29 |
30 |
31 |
32 | Quantative performance breakdown of all investigated deep learning architectures, given below, alongside TSO and persistence forecasting performances.
33 |
34 |
35 |
36 |
37 |
38 | ### Notes on Code :notebook::
39 | Install python dependencies for repository:
40 | ```
41 | $ pip install -r requirements.txt
42 | ```
43 |
44 | :weight_lifting: Training for all models was conducted on a Google Colab Pro+ subscription.
45 |
46 | ### Further Work :telescope::
47 | - [ ] Insightfulness of study could be broadened by analysing additional ML architectures alongside the variations of RNNs examined here, particularly XGBoost and transformers.
48 | - [ ] Problem pushes limitations of high-level DL frameworks, adopting Pytorch or Tensorflow could allow for increased efficiency and performance.
49 |
50 | ### To Do :test_tube::
51 | - [ ] Code links and references to be validated since re-organisation.
52 | - [ ] Clean code, especially interactive d3 plots.
53 | - [ ] Further validate environments and optimisation scripts.
54 |
55 | ### Resources :gem::
56 | + [https://www.elexon.co.uk/documents/training-guidance/bsc-guidance-notes/bmrs-api-and-data-push-user-guide-2/](https://www.elexon.co.uk/documents/training-guidance/bsc-guidance-notes/bmrs-api-and-data-push-user-guide-2/)
57 | + [https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly](https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly)
58 | + [https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview)
59 | + [https://colah.github.io/posts/2015-08-Understanding-LSTMs](https://colah.github.io/posts/2015-08-Understanding-LSTMs)
60 | + [https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html](https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html)
61 | + [https://colab.research.google.com/github/kmkarakaya/ML_tutorials/blob/master/seq2seq_Part_D_Encoder_Decoder_with_Teacher_Forcing.ipynb](https://colab.research.google.com/github/kmkarakaya/ML_tutorials/blob/master/seq2seq_Part_D_Encoder_Decoder_with_Teacher_Forcing.ipynb)
62 |
--------------------------------------------------------------------------------
/data/processed/README.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/data/raw/README.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/models/bilstm/demand/q_all_bilstm/demand_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/demand/q_all_bilstm/demand_bilstm.h5
--------------------------------------------------------------------------------
/models/bilstm/price/q_all_bilstm/price_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/price/q_all_bilstm/price_bilstm.h5
--------------------------------------------------------------------------------
/models/bilstm/solar/q_all_bilstm/solar_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/solar/q_all_bilstm/solar_bilstm.h5
--------------------------------------------------------------------------------
/models/bilstm/wind/q_all_bilstm/wind_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/wind/q_all_bilstm/wind_bilstm.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_main.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_main.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_spatial_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_spatial_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_temporal_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_main.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_main.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_spatial_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_spatial_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_temporal_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_main.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_main.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_spatial_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_spatial_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_temporal_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal.h5
--------------------------------------------------------------------------------
/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal_enc.h5
--------------------------------------------------------------------------------
/models/seq2seq/demand/q_all_seq2seq/demand_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/demand/q_all_seq2seq/demand_seq2seq.h5
--------------------------------------------------------------------------------
/models/seq2seq/price/q_all_seq2seq/price_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/price/q_all_seq2seq/price_seq2seq.h5
--------------------------------------------------------------------------------
/models/seq2seq/solar/q_all_seq2seq/solar_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/solar/q_all_seq2seq/solar_seq2seq.h5
--------------------------------------------------------------------------------
/models/seq2seq/wind/q_all_seq2seq/wind_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/wind/q_all_seq2seq/wind_seq2seq.h5
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.11.0
2 | aiohttp==3.7.4
3 | alabaster==0.7.12
4 | alpaca==1.0.0
5 | alpaca-trade-api==1.4.0
6 | appdirs==1.4.4
7 | arrow==0.17.0
8 | astunparse==1.6.3
9 | asv==0.4.2
10 | async-timeout==3.0.1
11 | attrs==20.3.0
12 | Babel==2.9.0
13 | beautifulsoup4==4.9.3
14 | black==20.8b1
15 | boto3==1.16.23
16 | botocore==1.19.23
17 | cachetools==4.1.1
18 | cdsapi==0.5.1
19 | certifi==2020.6.20
20 | cfgv==3.2.0
21 | cftime==1.1.3
22 | chardet==3.0.4
23 | click==7.1.2
24 | cloudpickle==1.6.0
25 | coloredlogs==15.0
26 | configparser==5.0.1
27 | cycler==0.10.0
28 | decorator==4.4.2
29 | distlib==0.3.1
30 | docopt==0.6.2
31 | docutils==0.16
32 | ez-setup==0.9
33 | filelock==3.0.12
34 | flatbuffers==1.12
35 | FLORIS==2.2.3
36 | future==0.18.2
37 | gast==0.3.3
38 | google-auth==1.18.0
39 | google-auth-oauthlib==0.4.1
40 | google-pasta==0.2.0
41 | grpcio==1.32.0
42 | gym==0.18.0
43 | h5py==2.10.0
44 | humanfriendly==9.1
45 | identify==1.5.12
46 | idna==2.10
47 | ImageHash==4.2.0
48 | imageio==2.9.0
49 | imageio-ffmpeg==0.4.3
50 | imagesize==1.2.0
51 | iniconfig==1.1.1
52 | iris==1.0.7
53 | Jinja2==2.11.2
54 | jmespath==0.10.0
55 | joblib==0.16.0
56 | Js2Py==0.70
57 | Keras==2.4.3
58 | Keras-Preprocessing==1.1.2
59 | kiwisolver==1.2.0
60 | lxml==4.6.3
61 | Markdown==3.2.2
62 | MarkupSafe==1.1.1
63 | matplotlib==3.3.0
64 | mdolab-baseclasses==1.4.0
65 | memory-profiler==0.57.0
66 | moviepy==1.0.3
67 | msgpack==1.0.2
68 | multidict==5.2.0
69 | mypy-extensions==0.4.3
70 | netCDF4==1.5.3
71 | nodeenv==1.5.0
72 | nose==1.3.7
73 | numpy==1.21.2
74 | oauthlib==3.1.0
75 | opt-einsum==3.3.0
76 | packaging==20.8
77 | pandas==1.2.3
78 | pathspec==0.8.1
79 | petsc==3.14.3
80 | petsc4py==3.14.1
81 | Pillow==6.2.2
82 | pipwin==0.5.0
83 | pluggy==0.13.1
84 | pockets==0.9.1
85 | pre-commit==2.9.3
86 | proglog==0.1.9
87 | proj==0.2.0
88 | protobuf==3.12.2
89 | psutil==5.7.2
90 | py==1.10.0
91 | pyasn1==0.4.8
92 | pyasn1-modules==0.2.8
93 | pyglet==1.5.0
94 | Pygments==2.7.4
95 | pyjsparser==2.7.1
96 | pyparsing==2.4.7
97 | PyPrind==2.11.2
98 | pyproj==3.0.0.post1
99 | pySmartDL==1.3.4
100 | pytest==6.2.1
101 | python-dateutil==2.8.1
102 | pytz==2020.1
103 | PyWavelets==1.1.1
104 | PyYAML==5.4.1
105 | regex==2020.11.13
106 | requests==2.24.0
107 | requests-oauthlib==1.3.0
108 | rsa==4.6
109 | s3transfer==0.3.3
110 | scikit-learn==0.23.1
111 | scipy==1.6.2
112 | seaborn==0.11.1
113 | six==1.15.0
114 | sklearn==0.0
115 | snowballstemmer==2.0.0
116 | soupsieve==2.1
117 | Sphinx==3.4.3
118 | sphinx-copybutton==0.3.1
119 | sphinx-gallery==0.8.2
120 | sphinx-panels==0.5.2
121 | sphinx-rtd-theme==0.5.1
122 | sphinxcontrib-applehelp==1.0.2
123 | sphinxcontrib-devhelp==1.0.2
124 | sphinxcontrib-htmlhelp==1.0.3
125 | sphinxcontrib-jsmath==1.0.1
126 | sphinxcontrib-napoleon==0.7
127 | sphinxcontrib-qthelp==1.0.3
128 | sphinxcontrib-serializinghtml==1.1.4
129 | sqlitedict==1.7.0
130 | tensorboard==2.4.0
131 | tensorboard-plugin-wit==1.7.0
132 | tensorflow==2.4.1
133 | tensorflow-estimator==2.4.0
134 | termcolor==1.1.0
135 | threadpoolctl==2.1.0
136 | toml==0.10.2
137 | tqdm==4.59.0
138 | typed-ast==1.4.2
139 | typing-extensions==3.7.4.3
140 | tzlocal==2.1
141 | urllib3==1.25.9
142 | virtualenv==20.3.1
143 | websocket-client==1.2.1
144 | websockets==9.1
145 | Werkzeug==1.0.1
146 | wget==3.2
147 | wrapt==1.12.1
148 | yarl==1.7.0
149 |
--------------------------------------------------------------------------------
/results/demand/bilstm/forecasted_time_series_demand_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/bilstm/forecasted_time_series_demand_bilstm.pkl
--------------------------------------------------------------------------------
/results/demand/bilstm/preformance_summary_demand_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 91.43518519,90,1.435185185,1459.753738,4.733158189,1903.185308
--------------------------------------------------------------------------------
/results/demand/bilstm/q_all_bilstm/demand_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/bilstm/q_all_bilstm/demand_bilstm.h5
--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal+spatial/forecasted_time_series_demand_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq+temporal+spatial/forecasted_time_series_demand_seq2seq+temporal+spatial.pkl
--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal+spatial/preformance_summary_demand_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 70.2119883,90,-19.7880117,1519.480105,4.961981752,1903.396953
--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal/forecasted_time_series_demand_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq+temporal/forecasted_time_series_demand_seq2seq+temporal.pkl
--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal/preformance_summary_demand_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 86.70808967,90,-3.291910331,1457.563216,4.907357214,1890.307257
--------------------------------------------------------------------------------
/results/demand/seq2seq/forecasted_time_series_demand_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq/forecasted_time_series_demand_seq2seq.pkl
--------------------------------------------------------------------------------
/results/demand/seq2seq/preformance_summary_demand_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 90.83820663,90,0.838206628,1315.00029,4.395793398,1715.146304
--------------------------------------------------------------------------------
/results/price/bilstm/forecasted_time_series_price_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/bilstm/forecasted_time_series_price_bilstm.pkl
--------------------------------------------------------------------------------
/results/price/bilstm/preformance_summary_price_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 72.4537037,90,-17.5462963,6.375387473,inf,8.02840373
--------------------------------------------------------------------------------
/results/price/seq2seq+temporal+spatial/forecasted_time_series_price_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal+spatial/forecasted_time_series_price_seq2seq+temporal+spatial.pkl
--------------------------------------------------------------------------------
/results/price/seq2seq+temporal+spatial/preformance_summary_price_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 69.57846004,90,-20.42153996,7.507209148,inf,9.576368247
--------------------------------------------------------------------------------
/results/price/seq2seq+temporal/attention_data_price_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal/attention_data_price_seq2seq+temporal.pkl
--------------------------------------------------------------------------------
/results/price/seq2seq+temporal/forecasted_time_series_price_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal/forecasted_time_series_price_seq2seq+temporal.pkl
--------------------------------------------------------------------------------
/results/price/seq2seq+temporal/preformance_summary_price_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 80.17787524,90,-9.822124756,6.526100553,inf,8.47807083
--------------------------------------------------------------------------------
/results/price/seq2seq/forecasted_time_series_price_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq/forecasted_time_series_price_seq2seq.pkl
--------------------------------------------------------------------------------
/results/price/seq2seq/preformance_summary_price_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 45.45565302,90,-44.54434698,6.49537643,inf,8.052062264
--------------------------------------------------------------------------------
/results/solar/bilstm/forecasted_time_series_solar_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/bilstm/forecasted_time_series_solar_bilstm.pkl
--------------------------------------------------------------------------------
/results/solar/bilstm/preformance_summary_solar_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 92.10526316,90,2.105263158,327.3727615,,689.229032
--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal+spatial/forecasted_time_series_solar_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal+spatial/forecasted_time_series_solar_seq2seq+temporal+spatial.pkl
--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal+spatial/preformance_summary_solar_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 95.27290448,90,5.272904483,270.9945811,,585.2357481
--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal+spatial/spatial_attention_data_solar.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal+spatial/spatial_attention_data_solar.pkl
--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal/forecasted_time_series_solar_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal/forecasted_time_series_solar_seq2seq+temporal.pkl
--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal/preformance_summary_solar_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 98.0994152,90,8.099415205,291.0802181,,613.8738376
--------------------------------------------------------------------------------
/results/solar/seq2seq/forecasted_time_series_solar_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq/forecasted_time_series_solar_seq2seq.pkl
--------------------------------------------------------------------------------
/results/solar/seq2seq/preformance_summary_solar_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,PINAW,PINRW,MAE,MAPE,RMSE
2 | 96.0891812865497,89.99999999999999,6.089181286549717,7.249654247663978,1.6922971719864661,300.5354471646511,,655.2354701170318
3 |
--------------------------------------------------------------------------------
/results/wind/bilstm/forecasted_time_series_wind_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/bilstm/forecasted_time_series_wind_bilstm.pkl
--------------------------------------------------------------------------------
/results/wind/bilstm/preformance_summary_wind_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 87.8411306,90,-2.158869396,1318.7222,30.59062064,1760.6124
--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal+spatial/forecasted_time_series_wind_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq+temporal+spatial/forecasted_time_series_wind_seq2seq+temporal+spatial.pkl
--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal+spatial/preformance_summary_wind_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 85.47758285,90,-4.522417154,1062.046295,21.25483009,1383.115011
--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal/forecasted_time_series_wind_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq+temporal/forecasted_time_series_wind_seq2seq+temporal.pkl
--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal/preformance_summary_wind_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 90.09502924,90,0.09502924,998.0828,19.72910166,1320.7335
--------------------------------------------------------------------------------
/results/wind/seq2seq/forecasted_time_series_wind_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq/forecasted_time_series_wind_seq2seq.pkl
--------------------------------------------------------------------------------
/results/wind/seq2seq/preformance_summary_wind_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 94.79775828,90,4.797758285,937.0275,19.33659166,1251.3384
--------------------------------------------------------------------------------
/scripts/models/_shared/__pycache__/attention_layer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/models/_shared/__pycache__/attention_layer.cpython-38.pyc
--------------------------------------------------------------------------------
/scripts/models/_shared/__pycache__/timeseries_data_generator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/models/_shared/__pycache__/timeseries_data_generator.cpython-38.pyc
--------------------------------------------------------------------------------
/scripts/models/_shared/attention_layer.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.keras
3 | import tensorflow.keras.backend as K
4 | from tensorflow.keras import Model
5 | from tensorflow.keras.layers import Dense
6 | from tensorflow.keras.layers import Activation, concatenate, Dot
7 |
8 |
9 |
10 | class attention(tf.keras.layers.Layer):
11 |
12 | def __init__(self, hidden_units, **kwargs):
13 | # super(attention, self).__init__(hidden_units)
14 | self.hidden_units = hidden_units
15 | super(attention, self).__init__(**kwargs)
16 |
17 |
18 | def build(self, input_shape):
19 |
20 | input_dim = int(input_shape[-1])
21 |
22 | self.attention_score_vec = Dense(64, name='attention_score_vec')
23 | self.h_t = Dense(64, name='ht')
24 | self.attention_score = Dot(axes=[1, 2], name='attention_score')
25 | self.attention_weight = Activation('softmax', name='attention_weight')
26 | self.context_vector = Dot(axes=[1, 1], name='context_vector')
27 | self.attention_vector = Dense(self.hidden_units, activation='tanh', name='attention_vector')
28 |
29 | super(attention, self).build(input_shape)
30 |
31 | def call(self, enc_output, enc_out, h_state, c_state):
32 |
33 |
34 | score_first_part = self.attention_score_vec(enc_output)
35 | # score_first_part dot last_hidden_state => attention_weights
36 | # (batch_size, time_steps, hidden_size) dot (batch_size, hidden_size) => (batch_size, time_steps)
37 | h_t = concatenate([h_state, enc_out[:,0,:]])
38 | h_t = self.h_t(h_t)
39 |
40 | score = self.attention_score([h_t, score_first_part])
41 |
42 | attention_weights = self.attention_weight(score)
43 | # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
44 | context_vector = self.context_vector([enc_output, attention_weights])
45 | pre_activation = concatenate([context_vector, h_t])
46 | attention_vector = self.attention_vector(pre_activation)
47 |
48 | attention_weights = K.expand_dims(attention_weights, axis=-1)
49 | attention_vector = K.expand_dims(attention_vector, axis=1)
50 |
51 | return [attention_weights, attention_vector]
52 |
53 | def compute_output_shape(self):
54 | return [(input_shape[0], Tx, 1), (input_shape[0], 1, n_s)]
55 |
56 | def get_config(self):
57 | config = super(attention, self).get_config()
58 | config.update({"hidden_units": self.hidden_units})
59 | return config
60 |
--------------------------------------------------------------------------------
/scripts/models/_shared/timeseries_data_generator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow
3 |
4 | # as adapted from: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
5 | class DataGenerator(tensorflow.keras.utils.Sequence):
6 |
7 | def __init__(self, dataset_name, x_length, y_length, hidden_states, batch_size, shuffle):
8 | self.dataset_name = dataset_name
9 | self.batch_size = batch_size
10 | self.shuffle = shuffle
11 | self.n_s = hidden_states
12 | self.xlen = x_length
13 | self.ylen = y_length
14 | self.index_ref = 0
15 | self.on_epoch_end()
16 |
17 | def __len__(self):
18 | # 'number of batches per Epoch'
19 | return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size))
20 |
21 | def __getitem__(self, index):
22 |
23 | # input and output indexes relative current batch size and data generator index reference
24 | input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))]
25 | output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))]
26 |
27 | # Generate data
28 | (X_train1, X_train2, X_train3, X_train4, s0, c0), y_train = self.__data_generation(input_indexes, output_indexes)
29 |
30 | # replicate labels for each quantile
31 | y_trues = [y_train for i in quantiles]
32 |
33 | # extend true values for spatial and temporal attention (only relavant if compiled model used for inference)
34 | # y_trues.extend([[], []])
35 |
36 | return (X_train1, X_train2, X_train3, X_train4, s0, c0), (y_trues) # pass empty training outputs to extract extract attentions
37 |
38 | def on_epoch_end(self):
39 | # set length of indexes for each epoch
40 | self.input_indexes = np.arange(self.xlen)
41 | self.output_indexes = np.arange(self.ylen)
42 |
43 | if self.shuffle == True:
44 | np.random.shuffle(self.input_indexes)
45 |
46 | def to_sequence(self, x1, x2, x3, x4, y):
47 | # convert timeseries batch in sequences
48 | input_start, output_start = 0, 0
49 |
50 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
51 |
52 | while (input_start + input_seq_size) <= len(x1):
53 | # offset handled during pre-processing
54 | input_end = input_start + input_seq_size
55 | output_end = output_start + output_seq_size
56 |
57 | # inputs
58 | seqX1.append(x1[input_start:input_end])
59 | seqX2.append(x2[input_start:input_end])
60 |
61 | # outputs
62 | seqX3.append(x3[output_start:output_end])
63 | seqX4.append(x4[output_start:output_end])
64 | seqY.append(y[output_start:output_end])
65 |
66 | input_start += 1
67 | output_start += 1
68 |
69 | # convert to numpy arrays
70 | seqX1, seqX2, seqX3, seqX4, seqY = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
71 |
72 | return seqX1, seqX2, seqX3, seqX4, seqY
73 |
74 | def __data_generation(self, input_indexes, output_indexes):
75 |
76 | # load data for current batch
77 | f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r")
78 | X_train1 = f['train_set']['X1_train'][input_indexes] # main feature array
79 | X_train2 = f['train_set']['X2_train'][input_indexes] # input time features from feature engineering
80 | X_train3 = f['train_set']['X3_train'][output_indexes] # output time features from feature engineering
81 |
82 | # no spatial data if model is training for price forecasting
83 | if model_type != 'price':
84 | X_train4 = f['train_set']['X1_train'][output_indexes][:,:,:,1:] # all nwp features apart from the generation itself
85 | X_train4 = np.average(X_train4, axis=(1,2))
86 | else:
87 | X_train4 = f['train_set']['X1_train'][output_indexes][:,1:]
88 |
89 | y_train = f['train_set']['y_train'][output_indexes]
90 |
91 | f.close()
92 |
93 | # convert to sequence data
94 | X_train1, X_train2, X_train3, X_train4, y_train = self.to_sequence(X_train1, X_train2, X_train3, X_train4, y_train)
95 |
96 | s0 = np.zeros((self.batch_size, self.n_s))
97 | c0 = np.zeros((self.batch_size, self.n_s))
98 |
99 | return (X_train1, X_train2, X_train3, X_train4, s0, c0), y_train
--------------------------------------------------------------------------------
/scripts/models/bilstm_model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys, os
3 | import h5py
4 | import tensorflow as tf
5 | import tensorflow.keras
6 | import tensorflow.keras.backend as K
7 | from tensorflow.keras import Model
8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM
9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
10 | from keras.backend import sigmoid
11 | from keras.utils.generic_utils import get_custom_objects
12 |
13 | np.set_printoptions(threshold=sys.maxsize)
14 | tf.random.set_seed(180)
15 |
16 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
17 | model_type ="solar"
18 |
19 | # declare dataset file
20 | dataset_name = f'dataset_{model_type}.hdf5'
21 |
22 | # declare quantiles for model
23 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
24 |
25 | # get useful size parameters
26 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
27 | features = np.empty_like(f['train_set']['X1_train'][0])
28 | times_in = np.empty_like(f['train_set']['X2_train'][0])
29 | times_out = np.empty_like(f['train_set']['X3_train'][0])
30 | labels = np.empty_like(f['train_set']['y_train'][0])
31 | x_len = f['train_set']['X1_train'].shape[0]
32 | y_len = f['train_set']['y_train'].shape[0]
33 | f.close()
34 |
35 | # input / output sequence sizes
36 | input_seq_size = 336
37 | output_seq_size = 48
38 | n_s = 32 # number of hidden states used through model
39 |
40 | ###########################################_____DATA_GENERATOR_____#################################################
41 |
42 | # data generator input parameters - avoid shuffle in this case
43 | params = {'batch_size': 64,
44 | 'shuffle': False }
45 |
46 | class DataGenerator(tensorflow.keras.utils.Sequence):
47 |
48 | def __init__(self, dataset_name, x_length, y_length, batch_size, shuffle):
49 | self.dataset_name = dataset_name
50 | self.batch_size = batch_size
51 | self.shuffle = shuffle
52 | self.xlen = x_length
53 | self.ylen = y_length
54 | self.index_ref = 0
55 | self.on_epoch_end()
56 |
57 | def __len__(self):
58 | # 'number of batches per Epoch'
59 | # return int(np.floor((self.xlen - (input_seq_size-1)) / self.batch_size))
60 | return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size))
61 |
62 | def __getitem__(self, index):
63 |
64 | input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))]
65 | output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))]
66 |
67 | # Generate data
68 | (X_train1, X_train2), y_train = self.__data_generation(input_indexes, output_indexes)
69 |
70 | y_trues = [y_train for i in quantiles]
71 |
72 | return (X_train1, X_train2), (y_trues) # pass empty training outputs to extract extract attentions
73 |
74 | def on_epoch_end(self):
75 | # set length of indexes for each epoch
76 | self.input_indexes = np.arange(self.xlen)
77 | self.output_indexes = np.arange(self.ylen)
78 |
79 | if self.shuffle == True:
80 | np.random.shuffle(self.input_indexes)
81 |
82 | def to_sequence(self, x1, x2, y):
83 | # convert timeseries batch in sequences
84 | input_start, output_start = 0, 0
85 |
86 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
87 |
88 | while (input_start + input_seq_size) <= len(x1):
89 | # offset handled during pre-processing
90 | input_end = input_start + input_seq_size
91 | output_end = output_start + output_seq_size
92 |
93 | # inputs
94 | seqX1.append(x1[input_start:input_end])
95 | seqX2.append(x2[input_start:input_end])
96 |
97 | # outputs
98 | seqY.append(y[output_start:output_end])
99 |
100 | input_start += 1
101 | output_start += 1
102 |
103 | seqX1, seqX2, seqY = np.array(seqX1), np.array(seqX2), np.array(seqY)
104 |
105 | return seqX1, seqX2, seqY
106 |
107 | def __data_generation(self, input_indexes, output_indexes):
108 |
109 | f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r")
110 |
111 | X_train2 = f['train_set']['X2_train'][input_indexes]
112 |
113 | if model_type != 'price':
114 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:,:,:]
115 | X_train1 = np.average(X_train1, axis=(1,2))
116 | else:
117 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:]
118 |
119 |
120 | y_train = f['train_set']['y_train'][output_indexes]
121 | # decoder_input = f['train_set']['y_train'][output_indexes]
122 | f.close()
123 |
124 | # convert to sequence data
125 | X_train1, X_train2, y_train = self.to_sequence(X_train1, X_train2, y_train)
126 |
127 |
128 | return (X_train1, X_train2), y_train
129 |
130 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, **params)
131 |
132 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
133 |
134 | # cpature some more useful dimensions
135 | Tx = input_seq_size
136 | Ty = output_seq_size
137 |
138 | channels = features.shape[-1]
139 |
140 | times_in_dim = times_in.shape[-1]
141 | times_out_dim = times_out.shape[-1]
142 |
143 | # make custom activation - swish
144 | def swish(x, beta = 1):
145 | return (x * sigmoid(beta * x))
146 |
147 | # add swish activation to keras
148 | get_custom_objects().update({'swish': Activation(swish)})
149 |
150 | # define inputs for model
151 | x_input = Input(shape=(Tx, channels))
152 |
153 | times_in = Input(shape=(Tx, times_in_dim))
154 | times_out = Input(shape=(Ty, times_out_dim))
155 | out_nwp = Input(shape=(Ty, channels-1))
156 | s_state0 = Input(shape=(32,))
157 | c_state0 = Input(shape=(32,))
158 |
159 | # create empty list for outputs
160 | quantile_predictions = []
161 |
162 | for q in quantiles:
163 |
164 | combined_inputs = concatenate([x_input, times_in], axis=-1, name=f'concat_q_{q}')
165 |
166 | layer1, _, _, _, _ = Bidirectional(LSTM(32, return_sequences = False, return_state = True), name=f'biLSTM_q_{q}')(combined_inputs)
167 | layer2 = Dense(48, name=f'dense1_q_{q}')(layer1)
168 |
169 | if model_type == 'solar':
170 | layer2 = Activation('relu', name=f'relu_act_q_{q}')(layer2)
171 |
172 | quantile_predictions.append(layer2)
173 |
174 | model = Model(inputs = [x_input, times_in], outputs = quantile_predictions)
175 |
176 |
177 | ###########################################_____MODEL_TRAINING_____#################################################
178 |
179 | #include clipvalue in optmisier
180 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
181 |
182 | # define loss for each quantile
183 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
184 |
185 | # append additional empty losses for temporal and spatial encoders
186 | # q_losses.append([None,None])
187 |
188 | # compile and train model
189 | model.compile(loss = q_losses, optimizer= optimizer)
190 | print(model.summary())
191 | model.fit(training_generator, epochs = 20)
192 |
193 | # save models - saving encoders individually for inference
194 | os.mkdir(f'../../models/bilstm/{model_type}')
195 | model.save(f'../../models/bilstm/{model_type}/{model_type}_bilstm.h5')
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
--------------------------------------------------------------------------------
/scripts/models/inference+testing/bilstm_seq2seq_predictions.py:
--------------------------------------------------------------------------------
1 | import keras
2 | from keras.models import load_model, model_from_json
3 | from keras.backend import sigmoid
4 | from tensorflow.keras.layers import Input, Activation, concatenate, Lambda
5 | import numpy as np
6 | import h5py
7 | from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
8 | from pickle import load, dump
9 | import matplotlib.pyplot as plt
10 | from keras.utils.generic_utils import get_custom_objects
11 |
12 |
13 | from keras.utils.generic_utils import get_custom_objects# import custom classes
14 | from _shared.attention_layer import attention
15 |
16 | # script to produce test-set predictions for Bi-directional LSTM model
17 |
18 | # declare model type
19 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal
20 |
21 | # indicate model type
22 | forecast_var = 'price'
23 |
24 | # quantiles - needed for key references - ensure aligns with trained model
25 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
26 |
27 | # define swish function for use within comptile model
28 | def swish(x, beta = 1):
29 | return (x * sigmoid(beta * x))
30 |
31 | # Below in place of swish you can take any custom key for the name
32 | get_custom_objects().update({'swish': Activation(swish)})
33 |
34 | # load trainined model
35 | if model_type != 'seq2seq+temporal':
36 | model = load_model(f'../../../Models/{model_type}/{forecast_var}/q_all_{model_type}/{forecast_var}_{model_type}.h5', custom_objects = {'': lambda y,f: defined_loss(q,y,f)})
37 | else:
38 | model = load_model(f'../../../Models/{model_type}/{forecast_var}/q_all_{model_type}/{forecast_var}_{model_type}.h5', custom_objects = {'': lambda y,f: defined_loss(q,y,f), 'attention': attention, 'Activation': Activation(swish)})
39 |
40 | # load time references
41 | with open(f'../../../data/processed/{forecast_var}/time_refs_{forecast_var}_v2.pkl', 'rb') as time_file:
42 | time_refs = load(time_file)
43 |
44 | input_times = time_refs[f'input_times_test']
45 | output_times = time_refs[f'output_times_test']
46 |
47 | time_file.close()
48 |
49 | # load and process data
50 | f = h5py.File(f"../../../data/processed/{forecast_var}/dataset_{forecast_var}.hdf5", "r")
51 |
52 | set_type = 'test'
53 | X_train1 = f[f'{set_type}_set'][f'X1_{set_type}']
54 | X_train2 = f[f'{set_type}_set'][f'X2_{set_type}']
55 | X_train3 = f[f'{set_type}_set'][f'X3_{set_type}']
56 | X_train4 = f[f'{set_type}_set'][f'X1_{set_type}']
57 | y_train = f[f'{set_type}_set'][f'y_{set_type}']
58 |
59 | input_seq_size = 336
60 | output_seq_size = 48
61 |
62 | input_start, output_start = 0, input_seq_size
63 |
64 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
65 |
66 | times_in, times_out = [], []
67 |
68 | # sequence the data
69 | while (output_start + output_seq_size) <= len(y_train):
70 | # offset handled during pre-processing
71 | input_end = input_start + input_seq_size
72 | output_end = output_start + output_seq_size
73 |
74 | # inputs
75 | seqX1.append(X_train1[input_start:input_end])
76 | seqX2.append(X_train2[input_start:input_end])
77 |
78 | times_in.append(input_times[input_start:input_end])
79 |
80 | # outputs
81 | seqY.append(y_train[output_start:output_end])
82 | seqX3.append(X_train3[output_start:output_end])
83 | seqX4.append(X_train4[output_start:output_end])
84 | times_out.append(output_times[output_start:output_end])
85 |
86 | input_start += output_seq_size
87 | output_start += output_seq_size
88 |
89 | x1, x2, x3, x4, y = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
90 | times_in, times_out = np.array(times_in), np.array(times_out)
91 |
92 | f.close()
93 |
94 | # load scaler
95 | scaler = load(open(f'../../../data/processed/{forecast_var}/_scaler/scaler_{forecast_var}.pkl', 'rb'))
96 |
97 | # average inputs over spatial dimensions
98 | if forecast_var != 'price':
99 | if model_type != 'seq2seq+temporal':
100 | x1 = np.average(x1, axis=(2,3))
101 |
102 | x4 = np.average(x4, axis=(2,3))
103 | x4 = x4[:,:,1:]
104 | else:
105 | x4 = x4[:,:,:-1]
106 |
107 | # cache test set length
108 | test_len = y.shape[0]
109 |
110 | # delcare intial hidden states
111 | s0 = np.zeros((y.shape[0], 32,))
112 | c0 = np.zeros((y.shape[0], 32,))
113 |
114 |
115 | print('predicting')
116 | if model_type == 'bilstm':
117 | results = model.predict([x1, x2])
118 | elif model_type == 'seq2seq+temporal':
119 | results = model.predict([x1, x2, x3, x4, s0, c0])
120 | quantile_temporal_attns = results[-1]
121 | else:
122 | results = model.predict([x1, x2, x3, x4])
123 |
124 |
125 | results_dict = {}
126 |
127 | # inverse transform predictions + transfer to dictionary
128 | for idx in range(len(quantiles)):
129 | results_dict[str(quantiles[idx])] = scaler.inverse_transform(results[idx].reshape(-1,1)).reshape(test_len, output_seq_size, 1)
130 |
131 | # inverse transform true values
132 | y_true = scaler.inverse_transform(y.reshape(-1,1)).reshape(test_len, output_seq_size, 1)
133 |
134 | # create time_refs dictionary
135 | times_refs = {'input_times': times_in, 'output_times': times_out}
136 |
137 | # create results dictionary for performance analysis / plotting
138 | results_dict['time_refs'] = times_refs
139 | results_dict['y_true'] = y_true
140 |
141 | print(results_dict.keys())
142 |
143 | # save results - forecasted timeseries matrix
144 | with open(f'../../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'wb') as ts_file:
145 | dump(results_dict, ts_file)
146 |
147 | # save results - forecasted tempotal attention matrix
148 | if (model_type == 'seq2seq+temporal') and (forecast_var == 'price'):
149 |
150 | # construct attention results dictionary
151 | attention_results = {}
152 | attention_results['0.5'] = quantile_temporal_attns
153 | attention_results['time_refs'] = times_refs
154 | attention_results['input_features'] = x1
155 |
156 | with open(f'../../../results/{forecast_var}/{model_type}/attention_data_{forecast_var}_seq2seq+temporal.pkl', 'wb') as attention_file:
157 | dump(attention_results, attention_file)
158 |
159 |
160 |
161 |
162 |
163 |
--------------------------------------------------------------------------------
/scripts/models/inference+testing/inference_model_seq2seq+spatial+temporal_attn.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import os
4 | import sys
5 | from sklearn.preprocessing import MinMaxScaler
6 | import tensorflow as tf
7 | from keras.models import load_model
8 | from keras import Model
9 | import tensorflow.keras
10 | import tensorflow.keras.backend as K
11 | from tensorflow.keras.layers import Input, Activation, concatenate, Lambda
12 | from tensorflow.keras.layers import Reshape
13 | from keras.callbacks import ModelCheckpoint
14 | from keras.backend import sigmoid
15 | from keras.utils.generic_utils import get_custom_objects
16 | from pickle import load
17 | import matplotlib.pyplot as plt
18 | import scipy
19 | from sklearn.metrics import mean_absolute_error, mean_squared_error
20 | import h5py
21 |
22 | import matplotlib.pyplot as plt
23 | import matplotlib.gridspec as gridspec
24 | from matplotlib.animation import FuncAnimation
25 | import seaborn as sns
26 | from pickle import dump, load
27 |
28 | import geopandas
29 | import contextily as ctx
30 |
31 | # import custom classes
32 | from _shared.attention_layer import attention
33 |
34 |
35 |
36 | # choose model type to run test for
37 | model_type ="solar"
38 |
39 | # declare dataset file
40 | dataset_name = f'dataset_{model_type}.hdf5'
41 |
42 | # choose to activate plot functions
43 | plot_temporal_attention = False
44 | plot_spatial_attention = False
45 |
46 | # declare quantiles
47 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
48 |
49 | # index to declare which test result to plot
50 | plot_ref = 0
51 |
52 | # load scaler
53 | scaler = load(open(f'../../data/processed/{model_type}/_scaler/scaler_{model_type}.pkl', 'rb'))
54 |
55 | # collect param sizes
56 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
57 | features = np.empty_like(f['train_set']['X1_train'][0])
58 | times_in = np.empty_like(f['train_set']['X2_train'][0])
59 | times_out = np.empty_like(f['train_set']['X3_train'][0])
60 | labels = np.empty_like(f['train_set']['y_train'][0])
61 | x_len = f['train_set']['X1_train'].shape[0]
62 | y_len = f['train_set']['y_train'].shape[0]
63 | print('size parameters loaded')
64 |
65 | # additional params dependent on wether spatial data is present
66 | if model_type != "price":
67 | height, width, channels = features.shape[0], features.shape[1], features.shape[2]
68 | else:
69 | channels = features.shape[-1]
70 |
71 | times_in_dim = times_in.shape[-1]
72 | times_out_dim = times_out.shape[-1]
73 |
74 | # decalre additional usefule params
75 | Tx = 336
76 | Ty = 48
77 | n_s = 32
78 | input_seq_size = Tx
79 | output_seq_size = Ty
80 |
81 | # define swish function for use within comptile model
82 | def swish(x, beta = 1):
83 | return (x * sigmoid(beta * x))
84 |
85 | # Below in place of swish you can take any custom key for the name
86 | get_custom_objects().update({'swish': Activation(swish)})
87 |
88 | # load main model
89 | model = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_main.h5', custom_objects = {'': lambda y,f: defined_loss(q,y,f), 'attention': attention, 'Activation': Activation(swish)})
90 |
91 | # read encoder models - igoring the spatail encoder in the price forecasting case
92 | temporal_enc = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_temporal_enc.h5')
93 |
94 | if model_type != "price":
95 | spatial_enc = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_spatial_enc.h5')
96 |
97 | # load and process data
98 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
99 |
100 | # load test or train data - too much memory to load all data, so just load segment
101 | set_type = 'test'
102 | X_train1 = f[f'{set_type}_set'][f'X1_{set_type}']
103 | X_train2 = f[f'{set_type}_set'][f'X2_{set_type}']
104 | X_train3 = f[f'{set_type}_set'][f'X3_{set_type}']
105 | X_train4 = f[f'{set_type}_set'][f'X1_{set_type}']
106 | y_train = f[f'{set_type}_set'][f'y_{set_type}']
107 |
108 | # get time relevant time references
109 | with open(f'../../data/processed/{model_type}/time_refs_{model_type}.pkl', 'rb') as time_file:
110 | time_refs = load(time_file)
111 |
112 | input_times = time_refs[f'input_times_{set_type}']
113 | output_times = time_refs[f'output_times_{set_type}']
114 |
115 | time_file.close()
116 |
117 | # begin sequencing of data
118 | input_start, output_start = 0, input_seq_size
119 |
120 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
121 |
122 | times_in, times_out = [], []
123 |
124 | while (output_start + output_seq_size) <= len(y_train):
125 | # increment indexes for windowing of data
126 | input_end = input_start + input_seq_size
127 | output_end = output_start + output_seq_size
128 |
129 | # inputs
130 | seqX1.append(X_train1[input_start:input_end])
131 | seqX2.append(X_train2[input_start:input_end])
132 | times_in.append(input_times[input_start:input_end])
133 |
134 | # outputs
135 | seqX3.append(X_train3[output_start:output_end])
136 | if model_type != 'price':
137 | nwp_data = X_train4[output_start:output_end][:,:,:,1:]
138 | nwp_data = np.average(nwp_data, axis=(1,2))
139 | else:
140 | nwp_data = X_train4[output_start:output_end][:,1:]
141 | seqX4.append(nwp_data)
142 | seqY.append(y_train[output_start:output_end])
143 | times_out.append(output_times[output_start:output_end])
144 |
145 | input_start += output_seq_size
146 | output_start += output_seq_size
147 |
148 | # make sure all are numpy arrays
149 | x1, x2, x3, x4, y = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
150 | times_in, times_out = np.array(times_in), np.array(times_out)
151 | f.close()
152 |
153 | # scale actual values
154 | y_idx = y.shape[0]
155 | y = scaler.inverse_transform(y.reshape(-1,1)).reshape(y_idx, Ty, 1)
156 |
157 | # declare intial hidden states
158 | s0 = np.zeros((1, n_s))
159 | c0 = np.zeros((1, n_s))
160 |
161 | # function for inference decoder model - one for each quantile
162 | def inference_dec_model(quantile):
163 |
164 | # Encoder outputs for setup
165 | ccn_enc_output_test = Input(shape=(320, 128))
166 | lstm_enc_output_test = Input(shape=(Tx, n_s*2)) #+ times_in_dim
167 | prev_prediction = Input(shape=(1, 1))
168 |
169 | # Decoder Input
170 | times_in = Input(shape=(1, times_in_dim))
171 | times_out = Input(shape=(1, times_out_dim))
172 | out_nwp = Input(shape=(1, channels-1))
173 | s_state0 = Input(shape=(32,))
174 | c_state0 = Input(shape=(32,))
175 | if model_type != "price":
176 | decoder_input = Input(shape=(1, times_out_dim + (channels-1)))
177 | else:
178 | decoder_input = Input(shape=(1, times_out_dim))
179 |
180 | # define input for encoder
181 | if model_type != 'price':
182 | enc_in = concatenate([out_nwp, times_out], axis=-1)
183 | else:
184 | enc_in = times_out
185 |
186 | # context and previous output
187 | attn_weights_temp_test, context = model.get_layer(f'temporal_attention_q_{quantile}')(lstm_enc_output_test, enc_in, s_state0, c_state0)
188 |
189 | if model_type != 'price':
190 | attn_weights_spat_test, context_spat_test = model.get_layer(f'spatial_attention_q_{quantile}')(ccn_enc_output_test, enc_in, s_state0, c_state0)
191 |
192 | # context & previous output combine
193 | context = concatenate([context, context_spat_test], axis=-1)
194 |
195 | decoder_input_with_prev = concatenate([decoder_input, prev_prediction])
196 |
197 | # Decoder inference
198 | dec_output, s_state, c_state = model.get_layer(f'decoder_q_{quantile}')(decoder_input_with_prev, initial_state=[s_state0, c_state0])
199 |
200 | # combine context and prediction
201 | prediction = concatenate([context, K.expand_dims(dec_output,axis=1)])
202 |
203 | # final dense layer
204 | pred_test = model.get_layer(f'dense1_q_{quantile}')(prediction)
205 | pred_test = model.get_layer(f'dense3_q_{quantile}')(pred_test)
206 |
207 | if model_type == "solar":
208 | pred_test = model.get_layer(f'relu_act_q_{quantile}')(pred_test)
209 |
210 | # Inference Model
211 | if model_type != 'price':
212 | deoceder_test_model = Model(inputs=[times_in, times_out, out_nwp, decoder_input, ccn_enc_output_test, lstm_enc_output_test, prev_prediction, s_state0, c_state0], outputs=[pred_test, s_state, c_state, attn_weights_temp_test, attn_weights_spat_test])
213 | else:
214 | deoceder_test_model = Model(inputs=[times_in, times_out, out_nwp, decoder_input, lstm_enc_output_test, prev_prediction, s_state0, c_state0], outputs=[pred_test, s_state, c_state, attn_weights_temp_test])
215 | return deoceder_test_model
216 |
217 | # dictionary to store decoder models
218 | decoder_models = {}
219 |
220 | # instantiate model for each quantile
221 | for q in quantiles:
222 | decoder_models[f'{q}'] = inference_dec_model(q)
223 |
224 | # store predictions
225 | predictions = {}
226 | quantile_temporal_attns = {}
227 | quantile_spatial_attns = {}
228 |
229 | # loop through each sample, passing individually to model
230 | for q in quantiles:
231 | print(q)
232 |
233 | # set hidden states to zero
234 | s_state, c_state = s0, c0
235 |
236 | # empty arrays to store all results
237 | total_pred = np.empty((x1.shape[0], Ty, 1))
238 | total_temp = np.empty((x1.shape[0], Tx, Ty))
239 |
240 | if model_type != 'price':
241 | total_spat = np.empty((x1.shape[0], 320, Ty)) # 320 is the fixed spatial attention res
242 |
243 | decoder = decoder_models[f'{q}']
244 |
245 | for idx in range(x1.shape[0]): # loop through each sample, to keep track of hidden states
246 |
247 | # create empty results for results per sample
248 | outputs = []
249 | spatial_attns = []
250 | temporal_attns = []
251 |
252 | # create final inference model
253 | lstm_enc_output, enc_s_state, enc_c_state = temporal_enc([x1[idx:idx+1], x2[idx:idx+1]])
254 |
255 | if model_type != 'price':
256 | ccn_enc_output = spatial_enc(x1[idx:idx+1])
257 | intial_in = np.average(x1[idx:idx+1], axis=(2,3))
258 | else:
259 | intial_in = x1[idx:idx+1]
260 |
261 | prev_prediction = intial_in[:,-1:,0:1]
262 |
263 | for ts in range(Ty):
264 |
265 | if model_type != 'price':
266 | # declare decoder input
267 | if ts > 0:
268 | decoder_input = concatenate([x4[idx:idx+1,ts-1:ts,:], x3[idx:idx+1,ts-1:ts,:]], axis=-1)
269 | else:
270 | decoder_input = concatenate([intial_in[:,-1:,1:], x2[idx:idx+1,-1:,:]], axis=-1)
271 | else:
272 | if ts > 0:
273 | decoder_input = x3[idx:idx+1,ts-1:ts,:]
274 | else:
275 | decoder_input = x2[idx:idx+1,-1:,:]
276 |
277 | if model_type != 'price':
278 | pred, s_state, c_state, attn_weights_temp_test, attn_weights_spat_test = decoder([x2[idx:idx+1,ts:ts+1,:], x3[idx:idx+1,ts:ts+1,:], x4[idx:idx+1,ts:ts+1,:], decoder_input, ccn_enc_output, lstm_enc_output, prev_prediction, s_state, c_state])
279 | spatial_attns.append(attn_weights_spat_test)
280 | else:
281 | pred, s_state, c_state, attn_weights_temp_test = decoder([x2[idx:idx+1,ts:ts+1,:], x3[idx:idx+1,ts:ts+1,:], x4[idx:idx+1,ts:ts+1,:], decoder_input, lstm_enc_output, prev_prediction, s_state, c_state])
282 |
283 | prev_prediction = pred
284 |
285 | outputs.append(pred)
286 | temporal_attns.append(attn_weights_temp_test)
287 |
288 | combined_outputs = np.concatenate(outputs, axis=1)
289 | combined_temp_attn = np.concatenate(temporal_attns, axis=-1)
290 | combined_spat_attn = np.concatenate(spatial_attns, axis=-1)
291 |
292 | total_pred[idx, : , :] = scaler.inverse_transform(combined_outputs[0,:,:])
293 | total_temp[idx, : , :] = combined_temp_attn
294 |
295 | if model_type != 'price':
296 | combined_spat_attn = np.concatenate(spatial_attns, axis=-1)
297 | total_spat[idx, : , :] = combined_spat_attn
298 |
299 | predictions[f'{q}'] = total_pred
300 | quantile_temporal_attns[f'{q}'] = total_temp
301 | quantile_spatial_attns[f'{q}'] = total_spat
302 |
303 | # plot predictions for specified index
304 | for idx, (key, values) in enumerate(predictions.items()):
305 | plt.plot(values[plot_ref:plot_ref+7,:].flatten(), label=f"prediction_{key}")
306 |
307 | plt.plot(y[plot_ref:plot_ref+7,:,0].flatten(), label="actual")
308 | plt.legend()
309 | plt.show()
310 |
311 |
312 | # plot temporal attention (quantile 0.5)
313 | att_w_temp = np.transpose(quantile_temporal_attns['0.5'][plot_ref])
314 | if model_type != "price":
315 | x = np.average(x1, axis=(2,3))[plot_ref, :]
316 | else:
317 | x = x1[plot_ref, :]
318 |
319 | y_attn = y[plot_ref, :, 0]
320 | y_hat = predictions['0.5'][plot_ref, :]
321 |
322 | #make attention plotting function
323 | def temporal_attention_graph(x, y, att_w_temp):
324 |
325 | fig = plt.figure(figsize=(24, 8))
326 | gs = gridspec.GridSpec(ncols=90, nrows=100)
327 |
328 | upper_axis = fig.add_subplot(gs[0:20, 10:75])
329 | left_axis = fig.add_subplot(gs[25:, 0:8])
330 | atten_axis = fig.add_subplot(gs[25:, 10:])
331 |
332 | upper_axis.plot(x)
333 | upper_axis.set_xlim([0, Tx])
334 | upper_axis.set_ylim([0, 1])
335 | upper_axis.set_xticks(range(0, Tx))
336 | upper_axis.set_xticklabels(range(0, Tx))
337 |
338 | left_axis.plot(y, range(0,Ty), label='Prediction')
339 | left_axis.plot(y_hat, range(0,Ty), label='True')
340 | left_axis.set_ylim([0, Ty])
341 | left_axis.set_yticks(range(0, Ty, 6))
342 | left_axis.set_yticklabels(range(0, Ty, 6))
343 | left_axis.invert_yaxis()
344 |
345 | sns.heatmap(att_w_temp, cmap='flare', ax = atten_axis, vmin=0, vmax=0.001)
346 | atten_axis.set_xticks(range(0, Tx))
347 | atten_axis.set_xticklabels(range(0, Tx))
348 | atten_axis.set_yticks(range(0, Ty, 4))
349 | atten_axis.set_yticklabels(range(0, Ty, 4))
350 |
351 | plt.show()
352 |
353 |
354 | if plot_temporal_attention is True:
355 | temporal_attention_graph(x, y_attn, att_w_temp)
356 |
357 |
358 |
359 | # plot spatial attention
360 | def plot_spatial_predictions(spatial_data, title, height_scale, width_scale, frame_num):
361 |
362 | fig = plt.figure(figsize=[8,10]) # a new figure window
363 | ax_set = fig.add_subplot(1, 1, 1)
364 |
365 | # create baseline map
366 | # spatial data on UK basemap
367 | df = pd.DataFrame({
368 | 'LAT': [49.78, 61.03],
369 | 'LON': [-11.95, 1.55],
370 | })
371 |
372 | geo_df = geopandas.GeoDataFrame(df, crs = {'init': 'epsg:4326'},
373 | geometry=geopandas.points_from_xy(df.LON, df.LAT)).to_crs(epsg=3857)
374 |
375 | ax = geo_df.plot(
376 | figsize= (8,10),
377 | alpha = 0,
378 | ax=ax_set,
379 | )
380 |
381 | plt.title(title)
382 | ax.set_axis_off()
383 |
384 | # add basemap
385 | url = 'http://tile.stamen.com/terrain/{z}/{x}/{y}.png'
386 | zoom = 10
387 | xmin, xmax, ymin, ymax = ax.axis()
388 | basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, url=url)
389 | ax.imshow(basemap, extent=extent, interpolation='gaussian')
390 | attn_over = np.resize(spatial_data[0], (height_scale, width_scale))
391 |
392 | gb_shape = geopandas.read_file("./Data/shapefiles/GBR_adm/GBR_adm0.shp").to_crs(epsg=3857)
393 | irl_shape = geopandas.read_file("./Data/shapefiles/IRL_adm/IRL_adm0.shp").to_crs(epsg=3857)
394 | gb_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
395 | irl_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
396 | overlay = ax.imshow(attn_over, cmap='viridis', alpha=0.5, extent=extent)
397 | # ax.axis((xmin, xmax, ymin, ymax))
398 | txt = fig.text(.5, 0.09, '', ha='center')
399 |
400 |
401 | def update(i):
402 | spatial_over = np.resize(spatial_data[i], (height_scale, width_scale))
403 | # overlay = ax.imshow(spatial_over, cmap='viridis', alpha=0.5, extent=extent)
404 | overlay.set_data(spatial_over)
405 | txt.set_text(f"Timestep: {i}")
406 | # plt.cla()
407 |
408 | return [overlay, txt]
409 |
410 | animation_ = FuncAnimation(fig, update, frames=frame_num, blit=False, repeat=False)
411 | plt.show(block=True)
412 | # animation_.save(f'{title}_animation.gif', writer='imagemagick')
413 |
414 | if plot_spatial_attention is True:
415 | # transpose spatial attention results
416 | att_w_spat = np.transpose(total_spat[plot_ref])
417 | # plot attention weights
418 | plot_spatial_predictions(att_w_spat, 'Spatial Context', 16, 20, 48)
419 |
420 |
421 |
422 |
423 |
424 | # add date references to result dictionaries
425 | time_refs = {'input_times': times_in, 'output_times': times_out}
426 |
427 | predictions['time_refs'] = time_refs
428 | quantile_temporal_attns['time_refs'] = time_refs
429 |
430 | # add x-input data
431 | quantile_temporal_attns['input_features'] = x1
432 |
433 | # add true value for reference to prediction dictionary
434 | predictions['y_true'] = y
435 |
436 | # performance evaluation
437 | # evaluate_predictions(predictions)
438 |
439 |
440 | # save results - forecasted timeseries matrix
441 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/forecasted_time_series_{model_type}.pkl', 'wb') as ts_file:
442 | dump(predictions, ts_file)
443 |
444 | # save results - forecasted temporal attention matrix
445 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/attention_data_{model_type}.pkl', 'wb') as attention_file:
446 | dump(quantile_temporal_attns, attention_file)
447 |
448 | # save results - forecasted spatial attention matrix
449 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/attention_data_{model_type}.pkl', 'wb') as spatial_file:
450 | dump(quantile_spatial_attns, spatial_file)
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
--------------------------------------------------------------------------------
/scripts/models/seq2seq+spatial+temporal_attn.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys, os
3 | import h5py
4 | import tensorflow as tf
5 | import tensorflow.keras
6 | import tensorflow.keras.backend as K
7 | from tensorflow.keras import Model
8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM
9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
10 | from keras.backend import sigmoid
11 | from keras.utils.generic_utils import get_custom_objects
12 |
13 | # import custom classes
14 | from _shared.attention_layer import attention
15 | from _shared.timeseries_data_generator import DataGenerator
16 |
17 | np.set_printoptions(threshold=sys.maxsize)
18 | tf.random.set_seed(180)
19 |
20 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
21 | model_type ="solar"
22 |
23 | # declare dataset file
24 | dataset_name = f'dataset_{model_type}.hdf5'
25 |
26 | # declare quantiles for model
27 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
28 |
29 | # get useful size parameters
30 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
31 | features = np.empty_like(f['train_set']['X1_train'][0])
32 | times_in = np.empty_like(f['train_set']['X2_train'][0])
33 | times_out = np.empty_like(f['train_set']['X3_train'][0])
34 | labels = np.empty_like(f['train_set']['y_train'][0])
35 | x_len = f['train_set']['X1_train'].shape[0]
36 | y_len = f['train_set']['y_train'].shape[0]
37 | f.close()
38 |
39 | # input / output sequence sizes
40 | input_seq_size = 336
41 | output_seq_size = 48
42 | n_s = 32 # number of hidden states used through model
43 |
44 | ###########################################_____DATA_GENERATOR_____#################################################
45 |
46 | # data generator input parameters - avoid shuffle in this case
47 | params = {'batch_size': 16,
48 | 'shuffle': False }
49 |
50 | # instantiate data generator object
51 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, hidden_states = n_s, **params)
52 |
53 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
54 |
55 | # cpature some more useful dimensions
56 | Tx = input_seq_size
57 | Ty = output_seq_size
58 |
59 | if model_type != "price":
60 | height, width, channels = features.shape[0], features.shape[1], features.shape[2]
61 | else:
62 | channels = features.shape[-1]
63 |
64 | times_in_dim = times_in.shape[-1]
65 | times_out_dim = times_out.shape[-1]
66 |
67 | # spatial encoder
68 | def cnn_encoder(ccn_input):
69 | # input shape -> (batch, time, width, height, features)
70 | # output shape -> (batch, time, width x height, embedding_size)
71 |
72 | ccn_enc_output = TimeDistributed(Conv2D(16, kernel_size=3, strides=1, activation="relu"))(ccn_input)
73 | ccn_enc_output = BatchNormalization()(ccn_enc_output)
74 | ccn_enc_output = TimeDistributed(AveragePooling2D(pool_size=(2, 2), data_format="channels_last"))(ccn_enc_output)
75 | ccn_enc_output = TimeDistributed(Conv2D(32, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output)
76 | ccn_enc_output = BatchNormalization()(ccn_enc_output)
77 | ccn_enc_output = TimeDistributed(Conv2D(64, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output)
78 | ccn_enc_output = BatchNormalization()(ccn_enc_output)
79 | ccn_enc_output = TimeDistributed(Conv2D(128, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output)
80 | ccn_enc_output = BatchNormalization()(ccn_enc_output)
81 |
82 | ccn_enc_output = Reshape((ccn_enc_output.shape[1], -1, ccn_enc_output.shape[-1]))(ccn_enc_output)
83 |
84 | ccn_enc_output = K.mean(ccn_enc_output, axis=1)
85 |
86 | return ccn_enc_output
87 |
88 | # temporal encoder layers
89 | lstm_encoder = Bidirectional(LSTM(n_s*2, return_sequences = True, return_state = True))
90 |
91 | def encoder(input, times_in):
92 |
93 | # accomodate for case without 2D dataset
94 | if model_type != "price":
95 | enc_output = K.mean(input, axis=(2,3))
96 | else:
97 | enc_output = input
98 |
99 | # concat input time features with input
100 | enc_output = concatenate([enc_output, times_in], axis=-1)
101 |
102 | enc_output, forward_h, forward_c, backward_h, backward_c = lstm_encoder(enc_output)
103 | # enc_output, enc_h, enc_s = lstm_encoder(enc_output)
104 |
105 | enc_h = concatenate([forward_h, backward_h], axis=-1)
106 | enc_s = concatenate([forward_c, backward_c], axis=-1)
107 |
108 | # # concat input time features with input
109 | # enc_output = concatenate([enc_output, times_in], axis=-1)
110 |
111 | return enc_output, enc_h, enc_s
112 |
113 | # declare decoder layer
114 | lstm_decoder = LSTM(n_s, return_sequences = True, return_state = True)
115 |
116 | def decoder(context, h_state, cell_state):
117 |
118 | # concat encoder input and time features
119 | # context = concatenate([context, times_out], axis=-1)
120 |
121 | dec_output, h_state , c_state = state = lstm_decoder(context, initial_state = [h_state, cell_state])
122 |
123 | return dec_output, h_state, c_state
124 |
125 | # make custom activation - swish
126 | def swish(x, beta = 1):
127 | return (x * sigmoid(beta * x))
128 |
129 | # add swish activation to keras
130 | get_custom_objects().update({'swish': Activation(swish)})
131 |
132 | # define inputs for model
133 | if model_type != "price":
134 | x_input = Input(shape=(Tx, height, width, channels))
135 | else:
136 | x_input = Input(shape=(Tx, channels))
137 |
138 | times_in = Input(shape=(Tx, times_in_dim))
139 | times_out = Input(shape=(Ty, times_out_dim))
140 | out_nwp = Input(shape=(Ty, channels-1))
141 | s_state0 = Input(shape=(32,))
142 | c_state0 = Input(shape=(32,))
143 |
144 | # create empty list for outputs
145 | qunatile_predictions = []
146 | temporal_attns = []
147 | spatial_attns = []
148 |
149 | # call CCN_encoder function
150 | if model_type != "price":
151 | ccn_enc_output = cnn_encoder(x_input)
152 |
153 | # call LSTM_encoder function
154 | lstm_enc_output, enc_s_state, enc_c_state = encoder(x_input, times_in)
155 |
156 | # call decoder
157 | for q in quantiles:
158 |
159 | # reset model parameters for each qunatile prediction
160 | ts_predictions = []
161 | temp_attns = []
162 | spatial_attns = []
163 |
164 | if model_type != "price":
165 | intial_in = K.mean(x_input, axis=(2,3))
166 | prev_prediction = intial_in[:,-1:,0:1]
167 |
168 | decoder = LSTM(32, return_sequences = False, return_state = True, name=f'decoder_q_{q}')
169 | spatial_attention = attention(n_s, name=f"spatial_attention_q_{q}")
170 | temporal_attention = attention(n_s, name=f"temporal_attention_q_{q}")
171 |
172 | output_1 = Dense(32, activation="swish", name=f'dense1_q_{q}')
173 | output_2 = Dense(1, name=f'dense3_q_{q}')
174 | final_act = Activation('relu', name=f'relu_act_q_{q}')
175 |
176 | # reset hidden states
177 | s_state = s_state0
178 | c_state = c_state0
179 |
180 | # make prediction for each output timestep
181 | for ts in range(Ty):
182 |
183 | if model_type != "price":
184 | enc_out = concatenate([out_nwp[:,ts:ts+1,:], times_out[:,ts:ts+1,:]], axis=-1, name=f'concat1_q_{q}_{ts}')
185 | else:
186 | enc_out = times_out[:,ts:ts+1,:]
187 |
188 | # get context matrix (temporal)
189 | attn_weights_temp, context = temporal_attention(lstm_enc_output, enc_out, s_state, c_state)
190 |
191 | # get context matrix (spatial)
192 | if model_type != "price":
193 | attn_weights_spat, context_spat = spatial_attention(ccn_enc_output, enc_out, s_state, c_state)
194 |
195 | # combine spatial and temporal context
196 | context = concatenate([context, context_spat], axis=-1, name=f'concat1.5_q_{q}_{ts}')
197 |
198 | # make decoder input - nwp + time features if not price predictions, other wise just time features
199 | if ts > 0:
200 | decoder_input = concatenate([out_nwp[:,ts-1:ts,:], times_out[:,ts-1:ts,:]], axis=-1, name=f'concat2_q_{q}_{ts}')
201 | else:
202 | decoder_input = concatenate([intial_in[:,-1:,1:], times_in[:,-1:,:]], axis=-1, name=f'concat3_q_{q}_{ts}')
203 | else:
204 | if ts > 0:
205 | decoder_input = times_out[:,ts-1:ts,:]
206 | else:
207 | decoder_input = times_in[:,-1:,:]
208 |
209 | # call decoder
210 | dec_output, s_state, c_state = decoder(decoder_input, initial_state = [s_state, c_state])
211 |
212 | # combine context with decoder output
213 | prediction = concatenate([context, K.expand_dims(dec_output,axis=1)], axis=-1, name=f'concat5_q_{q}_{ts}')
214 |
215 | # pass through MLP
216 | output = output_1(prediction)
217 | output = output_2(output)
218 |
219 | if model_type == "solar":
220 | output = final_act(output)
221 |
222 | # collect outputs for final predictions
223 | prev_prediction = output
224 | ts_predictions.append(output)
225 | temp_attns.append(attn_weights_temp)
226 |
227 | if model_type != "price":
228 | spatial_attns.append(attn_weights_spat)
229 |
230 | ts_predictions_total = concatenate(ts_predictions, axis = 1)
231 | temp_attns_total = concatenate(temp_attns, axis = -1)
232 |
233 | if model_type != "price":
234 | sptial_attns_total = concatenate(spatial_attns, axis = -1)
235 |
236 | qunatile_predictions.append(ts_predictions_total)
237 |
238 | # append spatial and temporal predictions - if using final model as inference
239 | # qunatile_predictions.extend([temp_attns_total])
240 | # qunatile_predictions.extend([sptial_attns_total])
241 |
242 | # instantiate model
243 | model = Model(inputs = [x_input, times_in, times_out, out_nwp, s_state0, c_state0], outputs = qunatile_predictions)
244 |
245 |
246 | ###########################################_____MODEL_TRAINING_____#################################################
247 |
248 | #include clipvalue in optmisier
249 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
250 |
251 | # define loss for each quantile
252 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
253 |
254 | # append additional empty losses for temporal and spatial encoders
255 | # q_losses.append([None,None])
256 |
257 | # compile and train model
258 | model.compile(loss = q_losses, optimizer= optimizer)
259 | print(model.summary())
260 | model.fit(training_generator, epochs = 20)
261 |
262 | # save models - saving encoders individually for inference
263 | os.mkdir(f'../../models/{model_type}')
264 | model.save(f'../../models/{model_type}/{model_type}_main.h5')
265 |
266 | # save some additional models for inference
267 | enoder_temporal_model = Model(inputs = [x_input, times_in], outputs=[lstm_enc_output, enc_s_state, enc_c_state])
268 | enoder_temporal_model.save(f'../../models/{model_type}/{model_type}_temporal_enc.h5')
269 |
270 | # save spatial encoders if not price forecasting
271 | if model_type != 'price':
272 | enoder_spatial_model = Model(x_input, ccn_enc_output)
273 | enoder_spatial_model.save(f'../../models/{model_type}/{model_type}_spatial_enc.h5')
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
--------------------------------------------------------------------------------
/scripts/models/seq2seq+temporal_attn.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys, os
3 | import h5py
4 | import tensorflow as tf
5 | import tensorflow.keras
6 | import tensorflow.keras.backend as K
7 | from tensorflow.keras import Model
8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM
9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
10 | from keras.backend import sigmoid
11 | from keras.utils.generic_utils import get_custom_objects
12 |
13 | # import custom classes
14 | from _shared.attention_layer import attention
15 | from _shared.timeseries_data_generator import DataGenerator
16 |
17 | np.set_printoptions(threshold=sys.maxsize)
18 | tf.random.set_seed(180)
19 |
20 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
21 | model_type ="solar"
22 |
23 | # declare dataset file
24 | dataset_name = f'dataset_{model_type}.hdf5'
25 |
26 | # declare quantiles for model
27 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
28 |
29 | # get useful size parameters
30 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
31 | features = np.empty_like(f['train_set']['X1_train'][0])
32 | times_in = np.empty_like(f['train_set']['X2_train'][0])
33 | times_out = np.empty_like(f['train_set']['X3_train'][0])
34 | labels = np.empty_like(f['train_set']['y_train'][0])
35 | x_len = f['train_set']['X1_train'].shape[0]
36 | y_len = f['train_set']['y_train'].shape[0]
37 | f.close()
38 |
39 | # input / output sequence sizes
40 | input_seq_size = 336
41 | output_seq_size = 48
42 | n_s = 32 # number of hidden states used through model
43 |
44 | ###########################################_____DATA_GENERATOR_____#################################################
45 |
46 | # data generator input parameters - avoid shuffle in this case
47 | params = {'batch_size': 16,
48 | 'shuffle': False }
49 |
50 | # instantiate data generator object
51 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, hidden_states = n_s, **params)
52 |
53 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
54 |
55 | # cpature some more useful dimensions
56 | Tx = input_seq_size
57 | Ty = output_seq_size
58 |
59 | if model_type != "price":
60 | height, width, channels = features.shape[0], features.shape[1], features.shape[2]
61 | else:
62 | channels = features.shape[-1]
63 |
64 | times_in_dim = times_in.shape[-1]
65 | times_out_dim = times_out.shape[-1]
66 |
67 |
68 | # temporal encoder layers
69 | lstm_encoder = Bidirectional(LSTM(n_s*2, return_sequences = True, return_state = True))
70 |
71 | def encoder(input, times_in):
72 |
73 | # accomodate for case without 2D dataset
74 | if model_type != "price":
75 | enc_output = K.mean(input, axis=(2,3))
76 | else:
77 | enc_output = input
78 |
79 | # concat input time features with input
80 | enc_output = concatenate([enc_output, times_in], axis=-1)
81 |
82 | enc_output, forward_h, forward_c, backward_h, backward_c = lstm_encoder(enc_output)
83 | # enc_output, enc_h, enc_s = lstm_encoder(enc_output)
84 |
85 | enc_h = concatenate([forward_h, backward_h], axis=-1)
86 | enc_s = concatenate([forward_c, backward_c], axis=-1)
87 |
88 | # concat input time features with input
89 | # enc_output = concatenate([enc_output, times_in], axis=-1)
90 |
91 | return enc_output, enc_h, enc_s
92 |
93 | # declare decoder layer
94 | lstm_decoder = LSTM(n_s, return_sequences = True, return_state = True)
95 |
96 | def decoder(context, h_state, cell_state):
97 |
98 | # concat encoder input and time features
99 | # context = concatenate([context, times_out], axis=-1)
100 |
101 | dec_output, h_state , c_state = state = lstm_decoder(context, initial_state = [h_state, cell_state])
102 |
103 | return dec_output, h_state, c_state
104 |
105 | # make custom activation - swish
106 | def swish(x, beta = 1):
107 | return (x * sigmoid(beta * x))
108 |
109 | # add swish activation to keras
110 | get_custom_objects().update({'swish': Activation(swish)})
111 |
112 | # define inputs for model
113 | x_input = Input(shape=(Tx, channels))
114 |
115 | times_in = Input(shape=(Tx, times_in_dim))
116 | times_out = Input(shape=(Ty, times_out_dim))
117 | out_nwp = Input(shape=(Ty, channels-1))
118 | s_state0 = Input(shape=(32,))
119 | c_state0 = Input(shape=(32,))
120 |
121 | # create empty list for outputs
122 | qunatile_predictions = []
123 | temporal_attns = []
124 |
125 | # call LSTM_encoder function
126 | lstm_enc_output, enc_s_state, enc_c_state = encoder(x_input, times_in)
127 |
128 | # call decoder
129 | for q in quantiles:
130 |
131 | # reset model parameters for each qunatile prediction
132 | ts_predictions = []
133 | temp_attns = []
134 | spatial_attns = []
135 |
136 | if model_type != "price":
137 | intial_in = K.mean(x_input, axis=(2,3))
138 | prev_prediction = intial_in[:,-1:,0:1]
139 |
140 | decoder = LSTM(32, return_sequences = False, return_state = True, name=f'decoder_q_{q}')
141 | spatial_attention = attention(n_s, name=f"spatial_attention_q_{q}")
142 | temporal_attention = attention(n_s, name=f"temporal_attention_q_{q}")
143 |
144 | output_1 = Dense(32, activation="swish", name=f'dense1_q_{q}')
145 | output_2 = Dense(1, name=f'dense3_q_{q}')
146 | final_act = Activation('relu', name=f'relu_act_q_{q}')
147 |
148 | # reset hidden states
149 | s_state = s_state0
150 | c_state = c_state0
151 |
152 | # make prediction for each output timestep
153 | for ts in range(Ty):
154 |
155 | if model_type != "price":
156 | enc_out = concatenate([out_nwp[:,ts:ts+1,:], times_out[:,ts:ts+1,:]], axis=-1, name=f'concat1_q_{q}_{ts}')
157 | else:
158 | enc_out = times_out[:,ts:ts+1,:]
159 |
160 | # get context matrix (temporal)
161 | attn_weights_temp, context = temporal_attention(lstm_enc_output, enc_out, s_state, c_state)
162 |
163 | # get context matrix (spatial)
164 | if model_type != "price":
165 |
166 | # make decoder input - nwp + time features if not price predictions, other wise just time features
167 | if ts > 0:
168 | decoder_input = concatenate([out_nwp[:,ts-1:ts,:], times_out[:,ts-1:ts,:]], axis=-1, name=f'concat2_q_{q}_{ts}')
169 | else:
170 | decoder_input = concatenate([intial_in[:,-1:,1:], times_in[:,-1:,:]], axis=-1, name=f'concat3_q_{q}_{ts}')
171 | else:
172 | if ts > 0:
173 | decoder_input = times_out[:,ts-1:ts,:]
174 | else:
175 | decoder_input = times_in[:,-1:,:]
176 |
177 | # call decoder
178 | dec_output, s_state, c_state = decoder(decoder_input, initial_state = [s_state, c_state])
179 |
180 | # combine context with decoder output
181 | prediction = concatenate([context, K.expand_dims(dec_output,axis=1)], axis=-1, name=f'concat5_q_{q}_{ts}')
182 |
183 | # pass through MLP
184 | output = output_1(prediction)
185 | output = output_2(output)
186 |
187 | if model_type == "solar":
188 | output = final_act(output)
189 |
190 | # collect outputs for final predictions
191 | prev_prediction = output
192 | ts_predictions.append(output)
193 | temp_attns.append(attn_weights_temp)
194 |
195 | ts_predictions_total = concatenate(ts_predictions, axis = 1)
196 | temp_attns_total = concatenate(temp_attns, axis = -1)
197 |
198 | qunatile_predictions.append(ts_predictions_total)
199 |
200 | # append spatial and temporal predictions - if using final model as inference
201 | # qunatile_predictions.extend([temp_attns_total])
202 | # qunatile_predictions.extend([sptial_attns_total])
203 |
204 | # instantiate model
205 | model = Model(inputs = [x_input, times_in, times_out, out_nwp, s_state0, c_state0], outputs = qunatile_predictions)
206 |
207 |
208 | ###########################################_____MODEL_TRAINING_____#################################################
209 |
210 | #include clipvalue in optmisier
211 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
212 |
213 | # define loss for each quantile
214 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
215 |
216 | # append additional empty losses for temporal and spatial encoders
217 | # q_losses.append([None,None])
218 |
219 | # compile and train model
220 | model.compile(loss = q_losses, optimizer= optimizer)
221 | print(model.summary())
222 | model.fit(training_generator, epochs = 20)
223 |
224 | # save models - saving encoders individually for inference
225 | os.mkdir(f'../../models/seq2seq+temporal/{model_type}')
226 | model.save(f'../../models//seq2seq+temporal/{model_type}/{model_type}_main.h5')
227 |
228 | # save some additional models for inference
229 | enoder_temporal_model = Model(inputs = [x_input, times_in], outputs=[lstm_enc_output, enc_s_state, enc_c_state])
230 | enoder_temporal_model.save(f'../../models/seq2seq+temporal/{model_type}/{model_type}_temporal_enc.h5')
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
--------------------------------------------------------------------------------
/scripts/models/seq2seq_model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys, os
3 | import h5py
4 | import tensorflow as tf
5 | import tensorflow.keras
6 | import tensorflow.keras.backend as K
7 | from tensorflow.keras import Model
8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM
9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
10 | from keras.backend import sigmoid
11 | from keras.utils.generic_utils import get_custom_objects
12 |
13 |
14 |
15 | np.set_printoptions(threshold=sys.maxsize)
16 | tf.random.set_seed(180)
17 |
18 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
19 | model_type ="solar"
20 |
21 | # declare dataset file
22 | dataset_name = f'dataset_{model_type}.hdf5'
23 |
24 | # declare quantiles for model
25 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
26 |
27 | # get useful size parameters
28 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
29 | features = np.empty_like(f['train_set']['X1_train'][0])
30 | times_in = np.empty_like(f['train_set']['X2_train'][0])
31 | times_out = np.empty_like(f['train_set']['X3_train'][0])
32 | labels = np.empty_like(f['train_set']['y_train'][0])
33 | x_len = f['train_set']['X1_train'].shape[0]
34 | y_len = f['train_set']['y_train'].shape[0]
35 | f.close()
36 |
37 | # input / output sequence sizes
38 | input_seq_size = 336
39 | output_seq_size = 48
40 | n_s = 32 # number of hidden states used through model
41 |
42 | ###########################################_____DATA_GENERATOR_____#################################################
43 |
44 | # data generator input parameters - avoid shuffle in this case
45 |
46 | params = {'batch_size': 64,
47 | 'shuffle': False }
48 |
49 | class DataGenerator(tensorflow.keras.utils.Sequence):
50 |
51 | def __init__(self, dataset_name, x_length, y_length, batch_size, shuffle):
52 | self.dataset_name = dataset_name
53 | self.batch_size = batch_size
54 | self.shuffle = shuffle
55 | self.xlen = x_length
56 | self.ylen = y_length
57 | self.index_ref = 0
58 | self.on_epoch_end()
59 |
60 | def __len__(self):
61 | # 'number of batches per Epoch'
62 | # return int(np.floor((self.xlen - (input_seq_size-1)) / self.batch_size))
63 | return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size))
64 |
65 | def __getitem__(self, index):
66 |
67 |
68 | input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))]
69 | output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))]
70 |
71 | # Generate data
72 | (X_train1, X_train2, X_train3, X_train4), y_train = self.__data_generation(input_indexes, output_indexes)
73 |
74 | y_trues = [y_train for i in quantiles]
75 |
76 | return (X_train1, X_train2, X_train3, X_train4), (y_trues) # pass empty training outputs to extract extract attentions
77 |
78 | def on_epoch_end(self):
79 | # set length of indexes for each epoch
80 | self.input_indexes = np.arange(self.xlen)
81 | self.output_indexes = np.arange(self.ylen)
82 |
83 | if self.shuffle == True:
84 | np.random.shuffle(self.input_indexes)
85 |
86 | def to_sequence(self, x1, x2, x3, x4, y):
87 | # convert timeseries batch in sequences
88 | input_start, output_start = 0, 0
89 |
90 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
91 |
92 | while (input_start + input_seq_size) <= len(x1):
93 | # offset handled during pre-processing
94 | input_end = input_start + input_seq_size
95 | output_end = output_start + output_seq_size
96 |
97 | # inputs
98 | seqX1.append(x1[input_start:input_end])
99 | seqX2.append(x2[input_start:input_end])
100 |
101 | # outputs
102 | seqX3.append(x3[output_start:output_end])
103 | seqX4.append(x4[output_start:output_end])
104 | seqY.append(y[output_start:output_end])
105 |
106 | input_start += 1
107 | output_start += 1
108 |
109 | seqX1, seqX2, seqX3, seqX4, seqY = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
110 |
111 | return seqX1, seqX2, seqX3, seqX4, seqY
112 |
113 | def __data_generation(self, input_indexes, output_indexes):
114 |
115 | f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r")
116 | # X_train1 = f['train_set']['X1_train'][input_indexes]
117 | X_train2 = f['train_set']['X2_train'][input_indexes]
118 | X_train3 = f['train_set']['X3_train'][output_indexes]
119 |
120 | if model_type != 'price':
121 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:,:,:]
122 | X_train1 = np.average(X_train1, axis=(1,2))
123 |
124 | X_train4 = f['train_set']['X1_train'][output_indexes][:,:,:,:]
125 | X_train4 = np.average(X_train4, axis=(1,2))
126 | X_train4 = X_train4[:,1:]
127 |
128 | else:
129 | X_train1 = f['train_set']['X1_train'][input_indexes][:,:]
130 |
131 | X_train4 = f['train_set']['X1_train'][output_indexes][:,:]
132 | X_train4 = X_train4[:,:-1]
133 |
134 |
135 | y_train = f['train_set']['y_train'][output_indexes]
136 | f.close()
137 |
138 | # convert to sequence data
139 | X_train1, X_train2, X_train3, X_train4, y_train = self.to_sequence(X_train1, X_train2, X_train3, X_train4, y_train)
140 |
141 | return (X_train1, X_train2, X_train3, X_train4), y_train
142 |
143 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, **params)
144 |
145 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
146 |
147 | # cpature some more useful dimensions
148 | Tx = input_seq_size
149 | Ty = output_seq_size
150 |
151 | channels = features.shape[-1]
152 |
153 | times_in_dim = times_in.shape[-1]
154 | times_out_dim = times_out.shape[-1]
155 |
156 | # make custom activation - swish
157 | def swish(x, beta = 1):
158 | return (x * sigmoid(beta * x))
159 |
160 | # add swish activation to keras
161 | get_custom_objects().update({'swish': Activation(swish)})
162 |
163 | # define inputs for model
164 | x_input = Input(shape=(Tx, channels))
165 |
166 | times_in = Input(shape=(Tx, times_in_dim))
167 | times_out = Input(shape=(Ty, times_out_dim))
168 | out_nwp = Input(shape=(Ty, channels-1))
169 | s_state0 = Input(shape=(32,))
170 | c_state0 = Input(shape=(32,))
171 |
172 | # create empty list for outputs
173 | quantile_predictions = []
174 | temporal_attns = []
175 |
176 | for q in quantiles:
177 |
178 | combined_inputs = concatenate([x_input, times_in], axis=-1, name=f'concat_q_{q}')
179 |
180 | encoder_output, forward_h, forward_c, backward_h, backward_c = Bidirectional(LSTM(32, return_sequences = False, return_state = True), name=f'biLSTM_q_{q}')(combined_inputs)
181 | repeat_layer = RepeatVector(48)(encoder_output)
182 |
183 | enc_h = concatenate([forward_h, backward_h])
184 | enc_s = concatenate([backward_h, backward_h])
185 |
186 | decoder_input = concatenate([repeat_layer, times_out])
187 | decoder_input = concatenate([decoder_input, out_nwp])
188 |
189 | decoder_out, _, _ = LSTM(64, return_sequences = True, return_state = True, name=f'decoder_LSTM_q_{q}')(decoder_input, initial_state = [enc_h, enc_s])
190 |
191 | dense_out = TimeDistributed(Dense(1, name=f'dense_q_{q}'))(decoder_out)
192 |
193 | if model_type == 'solar':
194 | dense_out = Activation('relu', name=f'relu_act_q_{q}')(dense_out)
195 |
196 | quantile_predictions.append(dense_out)
197 |
198 | model = Model(inputs = [x_input, times_in, times_out, out_nwp], outputs = quantile_predictions)
199 |
200 |
201 | ###########################################_____MODEL_TRAINING_____#################################################
202 |
203 | #include clipvalue in optmisier
204 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
205 |
206 | # define loss for each quantile
207 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
208 |
209 | # append additional empty losses for temporal and spatial encoders
210 | # q_losses.append([None,None])
211 |
212 | # compile and train model
213 | model.compile(loss = q_losses, optimizer= optimizer)
214 | print(model.summary())
215 | model.fit(training_generator, epochs = 20)
216 |
217 | # save models - saving encoders individually for inference
218 | os.mkdir(f'../../models/{model_type}')
219 | model.save(f'../../models/{model_type}/{model_type}_seq2seq.h5')
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
--------------------------------------------------------------------------------
/scripts/postprocessing/d3_scripts/forecasting_graph.js:
--------------------------------------------------------------------------------
1 |
2 |
3 | function prob_forecast(file, ref, color_array) {
4 |
5 |
6 | // set the dimensions and margins of the graph
7 | var margin = {top: 10, right: 0, bottom: 50, left: 80},
8 | width = 1000 - margin.left - margin.right,
9 | height = 600 - margin.top - margin.bottom;
10 |
11 |
12 |
13 | // append the svg object to the body of the page
14 | var svg = d3.select("#my_dataviz_" + ref)
15 | .append("svg")
16 | // .attr("width", width + margin.left + margin.right)
17 | // .attr("height", height + margin.top + margin.bottom)
18 | .attr("width", "100%")
19 | .attr("height", "100%")
20 | .attr("viewBox", "0 0 1000 600")
21 | .attr("preserveAspectRatio", "xMinYMin meet")
22 | .append("g")
23 | .attr("transform",
24 | "translate(" + margin.left + "," + margin.top + ")");
25 |
26 | // var svg = d3.select("#my_dataviz")
27 | // .append("svg")
28 | // .attr("width", "50%")
29 | // .attr("height", "50%")
30 | // .attr("viewBox", "0 0 740 800");
31 |
32 | svg.append("rect")
33 | .attr("x",0)
34 | .attr("y",0)
35 | .attr("height", height)
36 | .attr("width", width)
37 | .style("fill","#DEDEDE") //EBEBEB
38 | .style("stroke","none")
39 | .style("opacity", 0.3)
40 |
41 | // svg.append('text')
42 | // .attr("x",width/2)
43 | // .attr("y",height/2)
44 | // .attr('font-family', 'FontAwesome')
45 | // .attr('font-size', 100)
46 | // .text(function(d) { return '\uf185' })
47 | // .style("fill","white")
48 | // .style("opacity", 0.4) ;
49 |
50 |
51 |
52 |
53 | // Parse the Data
54 | d3.csv(file,
55 |
56 | function(d){
57 | return { date: d3.timeParse("%d/%m/%Y %H:%M")(d.Datetime),
58 | one: d.q_05 = +d.q_05,
59 | second: d.q_15 = +d.q_15,
60 | third: d.q_25 = +d.q_25,
61 | fourth: d.q_35 = +d.q_35,
62 | five: d.q_5 = +d.q_5,
63 | six: d.q_65 = +d.q_65,
64 | seven: d.q_75 = +d.q_75,
65 | eight: d.q_85 = +d.q_85,
66 | nine: d.q_95 = +d.q_95,
67 | actual: d.actual = +d.actual,
68 | }
69 | },
70 |
71 | function(data) {
72 |
73 | // data.forEach(function(d) {
74 | // d.actual= +d.actual;
75 | // d.five= +d.five;
76 | // d.date = +d.date;
77 | // // d.Datetime = d3.timeParse(d.Datetime);
78 | // });
79 |
80 | //declare parse dates
81 | var parseDate = d3.timeParse("%A");
82 |
83 |
84 |
85 | // List of groups = header of the csv files
86 | var keys = data.columns.slice(1)
87 |
88 |
89 |
90 | // Add X axis
91 | var x = d3.scaleTime()
92 | .domain(d3.extent(data, function(d) { return d.date; }))
93 | .range([ 0, width ])
94 |
95 | svg.append("g")
96 | .attr("transform", "translate(0," + height + ")")
97 | .call(d3.axisBottom(x).tickFormat(d3.timeFormat(parseDate)).tickSizeInner(-height).tickSizeOuter(0).ticks(7).tickPadding(20)) //.tickFormat(d3.timeFormat(parseDate))
98 | .selectAll(".tick text")
99 | .attr("transform", "translate(" + (width / 7) / 2 + ",0)")
100 | .style("text-transform", "uppercase")
101 | .style("font-size", "16px")
102 | .style("opacity", 0.5)
103 | // .tickArguments([5])
104 | // .tickCenterLabel(true)
105 | .select(".domain").remove()
106 |
107 | svg.append("g")
108 | .attr("transform", "translate(0," + height + ")")
109 | .call(d3.axisBottom(x).tickFormat(d3.timeFormat("(%d/%m/%y)")).tickSizeInner(-height).tickSizeOuter(0).ticks(7).tickPadding(20)) //.tickFormat(d3.timeFormat(parseDate))
110 | .selectAll(".tick text")
111 | .attr("transform", "translate(" + (width / 7) / 2 + ",17)")
112 | .style("text-transform", "uppercase")
113 | .style("font-size", "14px")
114 | .style("font-style", "italic")
115 | .style("opacity", 1)
116 | .select(".domain").remove()
117 |
118 |
119 | // x-axis mini tick marks
120 | // d3.svg.axis()
121 | // .scale()
122 | // .orient('bottom')
123 | // .tickFormat('')
124 | // .tickSize(30)
125 | // .tickPadding(6)
126 |
127 |
128 | // Add X axis label:
129 | svg.append("text")
130 | .attr("text-anchor", "middle")
131 | .attr("x", width/2)
132 | .attr("y", height + margin.top + 30)
133 | // .text("Day")
134 | .style("font", "12px arial")
135 |
136 |
137 | // Add Y axis label:
138 | if (ref === "price") {
139 | svg.append("text")
140 | .attr("text-anchor", "end")
141 | // .attr("y", +margin.left)
142 | // .attr("x", -margin.top + height/2)
143 | .attr("y", -margin.left + 35)
144 | .attr("x", -height/2 + 60)
145 | .text(ref +" (£/MW)")
146 | .style("font", "14px arial")
147 | .style("text-transform", "uppercase")
148 | // .attr("transform",
149 | // "translate(" + (height/2) + ")")
150 | .attr("transform", "rotate(-90)");
151 | } else if (ref === "demand") {
152 | svg.append("text")
153 | .attr("text-anchor", "end")
154 | // .attr("y", +margin.left)
155 | // .attr("x", -margin.top + height/2)
156 | .attr("y", -margin.left + 35)
157 | .attr("x", -height/2 + 60)
158 | .text(ref +" Demand (GW)")
159 | .style("font", "14px arial")
160 | .style("text-transform", "uppercase")
161 | // .attr("transform",
162 | // "translate(" + (height/2) + ")")
163 | .attr("transform", "rotate(-90)");
164 | } else {
165 | svg.append("text")
166 | .attr("text-anchor", "end")
167 | // .attr("y", +margin.left)
168 | // .attr("x", -margin.top + height/2)
169 | .attr("y", -margin.left + 35)
170 | .attr("x", -height/2 + 95)
171 | .text(ref +" Generation (GW)")
172 | .style("font", "14px arial")
173 | .style("text-transform", "uppercase")
174 | // .attr("transform",
175 | // "translate(" + (height/2) + ")")
176 | .attr("transform", "rotate(-90)");
177 | }
178 |
179 | // Add Y axis
180 | var y = d3.scaleLinear()
181 | .domain([d3.min(data, function(d) { return +d.one; }) * 0.95, d3.max(data, function(d) { return +d.nine; }) * 1.05])
182 | .range([ height, 0 ])
183 | svg.append("g")
184 | .call(d3.axisLeft(y).tickSizeInner(-width).ticks(8).tickPadding(12.5))
185 | .style("font", "15px arial")
186 | .select(".domain").remove();
187 | svg.selectAll(".tick line").attr("stroke", "white").attr('stroke-width',1)
188 |
189 |
190 |
191 | // group the data
192 | var sumstat = d3.nest()
193 | .key(function(d) { return d.name;})
194 | .entries(data);
195 |
196 | //stack the data
197 | var stackedData = d3.stack()
198 | // .offset(d3.stackOffsetSilhouette)
199 | .keys(keys)
200 | // .value(function(d, key){
201 | // return d.values[key]
202 | // })
203 | (data)
204 | console.log(stackedData.keys)
205 |
206 | // create a tooltip
207 | var Tooltip = svg
208 | .select("#my_dataviz_" + ref)
209 | .append("text")
210 | .attr("x", 0)
211 | .attr("y", 0)
212 | .style("opacity", 0)
213 | .style("font-size", 17)
214 |
215 | // Three function that change the tooltip when user hover / move / leave a cell
216 | var mouseover = function(d) {
217 |
218 | Tooltip.style("opacity", 0.5)
219 | d3.selectAll(".myArea").style("opacity", .2)
220 | d3.select(this)
221 | .style("stroke", "black")
222 | .style("opacity", 0.5)
223 | }
224 | var mousemove = function(d,i) {
225 | grp = keys[i]
226 | Tooltip.text(grp)
227 | }
228 |
229 | var mouseleave = function(d) {
230 | Tooltip.style("opacity", 0)
231 | d3.selectAll(".myArea").style("opacity", 0.5).style("stroke", "none")
232 | }
233 |
234 | // Area generator
235 | var area = d3.area()
236 | .curve(d3.curveMonotoneX)
237 | .x(function(d) { return x(d.data.date); })
238 | .y0(function(d) { return y(d.data.one); })
239 | .y1(function(d) { return y(d.data.nine); })
240 |
241 | // Area generator
242 | var area2 = d3.area()
243 | .curve(d3.curveMonotoneX)
244 | .x(function(d) { return x(d.data.date); })
245 | .y0(function(d) { return y(d.data.second); })
246 | .y1(function(d) { return y(d.data.eight); })
247 |
248 | // Area generator
249 | var area3 = d3.area()
250 | .curve(d3.curveMonotoneX)
251 | .x(function(d) { return x(d.data.date); })
252 | .y0(function(d) { return y(d.data.third); })
253 | .y1(function(d) { return y(d.data.seven); })
254 |
255 | // Area generator
256 | var area4 = d3.area()
257 | .curve(d3.curveMonotoneX)
258 | .x(function(d) { return x(d.data.date); })
259 | .y0(function(d) { return y(d.data.fourth); })
260 | .y1(function(d) { return y(d.data.six); })
261 |
262 | // Area generator
263 | var line = d3.line()
264 | // .curve(d3.curveMonotoneX)
265 | .x(function(d) { return x(d.data.date); })
266 | .y(function(d) { return y(d.data.actual); })
267 |
268 |
269 | // Area generator
270 | var line2 = d3.line()
271 | .curve(d3.curveMonotoneX)
272 | .x(function(d) { return x(d.data.date); })
273 | .y(function(d) { return y(d.data.five); })
274 |
275 | // Area generator
276 | var line3 = d3.line()
277 | .curve(d3.curveMonotoneX)
278 | .x(function(d) { return x(d.data.date); })
279 | .y(function(d) { return y(d.data.one); })
280 |
281 | // Area generator
282 | var line4 = d3.line()
283 | .curve(d3.curveMonotoneX)
284 | .x(function(d) { return x(d.data.date); })
285 | .y(function(d) { return y(d.data.nine); })
286 |
287 | // graph colors
288 | var legendColors = d3.scaleOrdinal().range(color_array)
289 |
290 | var areas = [area, area2]
291 |
292 | // var line = d3.svg.line()
293 | // .x(function(d) { return x(d.data.date) })
294 | // .y(function(d) { return y(y(d[0])); });
295 |
296 | // d3.selectAll('.line')
297 | // .attr("d", line)
298 |
299 | // Show the areas
300 | svg
301 | .selectAll("mylayers")
302 | .data(stackedData)
303 | .enter()
304 | .append("path")
305 | .attr("class", "myArea")
306 | .style("fill",legendColors(0))
307 | .attr("d", area)
308 | .attr("clip-path", "url(#clip)");
309 | // .on("mouseover", mouseover)
310 | // // .on("mousemove", mousemove)
311 | // .on("mouseleave", mouseleave)
312 | // .attr("fill-opacity","0.3")
313 |
314 | svg
315 | .selectAll("mylayers")
316 | .data(stackedData)
317 | .enter()
318 | .append("path")
319 | .attr("class", "myArea")
320 | .style("fill" ,legendColors(1))
321 | .attr("d", area2)
322 | .attr("clip-path", "url(#clip)");
323 | // .on("mouseover", mouseover)
324 | // // .on("mousemove", mousemove)
325 | // .on("mouseleave", mouseleave)
326 | // .attr("fill-opacity","0.5")
327 |
328 | svg
329 | .selectAll("mylayers")
330 | .data(stackedData)
331 | .enter()
332 | .append("path")
333 | .attr("class", "myArea")
334 | .style("fill",legendColors(2))
335 | // .attr("fill-opacity","0.9")
336 | .attr("d", area3)
337 | .attr("clip-path", "url(#clip)");
338 | // .on("mouseover", mouseover)
339 | // // .on("mousemove", mousemove)
340 | // .on("mouseleave", mouseleave)
341 |
342 | var area4 = svg
343 | .selectAll("mylayers")
344 | .data(stackedData)
345 | .enter()
346 | .append("path")
347 | .attr("class", "myArea")
348 | .style("fill", legendColors(3))
349 | // .attr("fill-opacity","0.5")
350 | .attr("d", area4)
351 | .attr("clip-path", "url(#clip)");
352 | // .on("mouseover", mouseover)
353 | // // .on("mousemove", mousemove)
354 | // .on("mouseleave", mouseleave)
355 |
356 |
357 |
358 | var totalLength = 50000
359 | var totalLength2 = area4.node().getTotalLength();
360 |
361 | // mean predictions
362 | var path2 = svg
363 | .selectAll("mylayers")
364 | .data(stackedData)
365 | .enter()
366 | .append("path")
367 | .attr("class", "test-line")
368 | .style("fill", 'none')
369 | .attr("stroke", "white")
370 | .attr("stroke-width", 0.05)
371 | .attr("clip-path", "url(#clip)")
372 | .attr("d", line2)
373 |
374 | var path3 = svg
375 | .selectAll("mylayers")
376 | .data(stackedData)
377 | .enter()
378 | .append("path")
379 | .attr("class", "test-line")
380 | .style("fill", 'none')
381 | .attr("stroke", legendColors(4))
382 | .attr("stroke-width", 0)
383 | .attr("clip-path", "url(#clip)")
384 | .attr("d", line3)
385 |
386 | var path4 = svg
387 | .selectAll("mylayers")
388 | .data(stackedData)
389 | .enter()
390 | .append("path")
391 | .attr("class", "test-line")
392 | .style("fill", 'none')
393 | .attr("stroke", legendColors(4))
394 | .attr("stroke-width", 0)
395 | .attr("clip-path", "url(#clip)")
396 | .attr("d", line4)
397 |
398 | // var clip = svg.append("clipPath")
399 | // .attr("id", "clip");
400 | // var clipRect = clip.append("rect")
401 | // .attr("width", 100)
402 | // .attr("height", height)
403 |
404 | // clipRect
405 | // .transition()
406 | // .delay(1000)
407 | // .duration(8000)
408 | // .ease(d3.easeLinear)
409 | // .attr("width", width)
410 |
411 | // path
412 | // .attr("stroke-dasharray", totalLength + " " + totalLength)
413 | // .attr("stroke-dashoffset", totalLength)
414 | // .transition()
415 | // .duration(9000)
416 | // .ease(d3.easeLinear)
417 | // .attr("stroke-dashoffset", 0)
418 | // .on("end")
419 |
420 |
421 | // legend
422 | var count = ['1','2','3','4','5','6']
423 | var legendKeys = d3.scaleOrdinal().range(['Quantile 5 - 95', 'Quantile 15 - 85', 'Quantile 25 - 75', 'Quantile 35 - 65', 'Mean', 'Actual']);
424 |
425 |
426 | // Add one dot in the legend for each name.
427 | var size = 12.5
428 | svg.selectAll("myrects")
429 | .data(count)
430 | .enter()
431 | .append("rect")
432 | .attr("x", width - 150)
433 | .attr("y", function(d,i){ if(i < 4) {return 20 + i*(size+10)}; if(i >= 4) {return 25 + i*(size+10)}; })
434 | .attr("width", size)
435 | .attr("height", function(d,i){ if(i < 4) {return size}; if(i >= 4) {return size/5}; })
436 | .style("fill", function(d, i){ return legendColors(i) })
437 |
438 | // Add one dot in the legend for each name.
439 | svg.selectAll("mylabels")
440 | .data(count)
441 | .enter()
442 | .append("text")
443 | .attr("x", (width - 150) + size*1.5)
444 | .attr("y", function(d,i){ return 20 + i*(size+10.25) + (size/2)})
445 | .style("fill", '#000000')
446 | .text(function(d, i){ return legendKeys(i)})
447 | .style("font", "14px arial")
448 | .style("fill", "grey")
449 | // .style("text-transform", "uppercase")
450 | .attr("text-anchor", "left")
451 | .style("alignment-baseline", "middle")
452 |
453 | // actual, measured data
454 | var path = svg
455 | .selectAll("mylayers")
456 | .data(stackedData)
457 | .enter()
458 | .append("path")
459 | .attr("class", "test-line")
460 | .style("fill", 'none')
461 | .attr("stroke", '#1c2f33') //D21404
462 | .attr("stroke-width", 0.15)
463 | .attr("stroke-opacity", 0.9)
464 | .attr("d", line)
465 |
466 | // create cursor highlight //////////////////////////////////////
467 |
468 | var mouseG = svg
469 | .append("g")
470 | .attr("class", "mouse-over-effects");
471 |
472 | mouseG
473 | .append("path") // this is the black vertical line to follow mouse
474 | .attr("class", "mouse-line")
475 | .style("stroke", "#393B45") //6E7889
476 | .style("stroke-width", "0.5px")
477 | .style("opacity", 0.75)
478 |
479 | mouseG.append("text")
480 | .attr("class", "mouse-text")
481 | // .style("font-size", "200%")
482 | // .text("test")
483 | .style("opacity", 0)
484 |
485 | // var lines = document.getElementsByClassName('line');
486 | var lines = [path, path3, path4]
487 |
488 | var mousePerLine = mouseG.selectAll('.mouse-per-line')
489 | .data(data)
490 | .enter()
491 | .append("g")
492 | .attr("class", "mouse-per-line");
493 |
494 | var res = sumstat.map(function(d){ return d.key })
495 | var color = d3.scaleOrdinal()
496 | .domain(res)
497 | .range(['darkblue','darkblue','darkblue','darkblue'])
498 |
499 |
500 | mousePerLine.append("circle")
501 | .attr("r", 7)
502 | .style("stroke", function(d, i) {
503 | return color(i);
504 | })
505 | .style("fill", "none")
506 | .style("stroke-width", "1px")
507 | .style("opacity", "0");
508 |
509 | mousePerLine.append("text")
510 | .attr("transform", "translate(10,3)");
511 |
512 | mousePerLine.append("text")
513 | .attr("class", "timetext");
514 |
515 | mouseG
516 | .append('svg:rect') // append a rect to catch mouse movements on canvas
517 | .attr('width', width) // can't catch mouse events on a g element
518 | .attr('height', height)
519 | .attr('fill', 'none')
520 | .attr('pointer-events', 'all')
521 | .on('mouseout touchout', function() { // on mouse out hide line, circles and text
522 | d3.select("#my_dataviz_" + ref)
523 | .select(".mouse-line ")
524 | .style("opacity", "0" );
525 | d3.select("#my_dataviz_" + ref)
526 | .select(".mouse-text")
527 | .style("opacity", "0");
528 | d3.select("#my_dataviz_" + ref)
529 | .selectAll(".mouse-per-line circle")
530 | .style("opacity", "0");
531 | d3.select("#my_dataviz_" + ref)
532 | .selectAll(".mouse-per-line text")
533 | .style("opacity", "0")
534 | })
535 | .on('mouseover touchover', function() { // on mouse in show line, circles and text
536 | d3.select("#my_dataviz_" + ref)
537 | .select(".mouse-line")
538 | .style("opacity", "1");
539 | d3.select("#my_dataviz_" + ref)
540 | .select(".mouse-text")
541 | .style("opacity", "1");
542 | // d3.selectAll(".mouse-per-line circle")
543 | // .style("opacity", "1");
544 | d3.select("#my_dataviz_" + ref)
545 | .selectAll(".mouse-per-line text" )
546 | .style("opacity", "1");
547 |
548 | })
549 | .on('mousemove touchmove', function() { // mouse moving over canvas
550 | var mouse = d3.mouse(this);
551 | d3.select("#my_dataviz_" + ref)
552 | .select(".mouse-text")
553 | .attr("x", mouse[0])
554 | .attr("transform", "translate(10,30)")
555 | d3.select("#my_dataviz_" + ref)
556 | .select(".mouse-line")
557 | .attr("d", function() {
558 | var d = "M" + mouse[0] + "," + height;
559 | d += " " + mouse[0] + "," + 0;
560 | return d;
561 | })
562 |
563 |
564 | d3.select("#my_dataviz_" + ref)
565 | .selectAll(".mouse-per-line")
566 | .attr("transform", function(d, i) {
567 | if (i >= 4){ return null };
568 |
569 | var xDate = x.invert(mouse[0])
570 | time = d3.timeFormat("%H:%M %p")(xDate)
571 |
572 | // bisect = d3.bisector(function(d) { return d.date; }).left;
573 | // idx = bisect(data, xDate, 1);
574 |
575 | var beginning = 0,
576 | // end = lines[i].node().getTotalLength()
577 | end = totalLength
578 | target = null;
579 |
580 | while (true){
581 |
582 | target = Math.floor((beginning + end) / 2);
583 | pos = lines[i].node().getPointAtLength(target);
584 | // pos = target;
585 | if ((target === end || target === beginning) && pos.x !== mouse[0]) {
586 | break;
587 | }
588 | if (pos.x > mouse[0]) end = target;
589 | else if (pos.x < mouse[0]) beginning = target;
590 |
591 | else break; //position found
592 | }
593 |
594 | if (ref == 'price') {
595 | unit = ' £/MWh'
596 | } else {
597 | unit = ' GW'
598 | }
599 |
600 | if (i === 0) {
601 | d3.select(this).select('text')
602 | .text(y.invert(pos.y).toFixed(1) + unit)
603 | .attr("transform", "translate(10,0)")
604 | .style("font", "18px arial")
605 | .style('fill', 'blue')
606 | } else {
607 | d3.select(this).select('text')
608 | .text(y.invert(pos.y).toFixed(1) + unit)
609 | .attr("transform", "translate(-75,0)")
610 | .style("font", "16px arial")
611 | .style('fill', 'black');
612 | }
613 |
614 | d3.select(this).select('circle')
615 | .style("opacity", 1)
616 | var parseDate = d3.timeParse("%a %d");
617 | var timestamp = d3.select("#my_dataviz_" + ref).select('.mouse-text')
618 | .text(time)
619 | .style("opacity", 0.5)
620 | .style("text-transform", "uppercase")
621 | .style("font", "arial")
622 | .style("font-size", "22.5px")
623 |
624 | return "translate(" + mouse[0] + "," + pos.y +")";
625 | });
626 | })
627 |
628 |
629 | // Add Y line:
630 | svg.append("line")
631 | // .attr("transform", "rotate(-90)")
632 | .attr("y1", height)
633 | .attr("x1", 0)
634 | .style("stroke-width", 1)
635 | .style("stroke", "#263238")
636 |
637 | // Add X line:
638 | svg.append("line")
639 | // .attr("transform", "rotate(-90)")
640 | .attr("y1", height)
641 | .attr("x1", 0)
642 | .attr("y2", height)
643 | .attr("x2", width)
644 | .style("stroke-width", 1)
645 | .style("stroke", "#263238")
646 |
647 |
648 | //add minor tick marks to x-axis
649 | var m
650 | for (m = 0; m < width; ){
651 | svg.append("line")
652 | .attr("y1", height)
653 | .attr("x1", m )
654 | .attr("y2", height + 5)
655 | .attr("x2", m )
656 | .style("stroke-width", 1)
657 | .style("stroke", "#263238")
658 | .style("opacity", 0.5);
659 | m = m + (width / 167.5 )
660 | }
661 |
662 | //add main tick marks to x-axis
663 | var i
664 | for (i = (width / 7); i < width; i++){
665 | svg.append("line")
666 | .attr("y1", height)
667 | .attr("x1", i )
668 | .attr("y2", height + 20)
669 | .attr("x2", i )
670 | .style("stroke-width", 1.5)
671 | .style("stroke", "#263238");
672 | i = i + (width / 7) - 0.5
673 | }
674 |
675 | //add noon tick marks to x-axis
676 | var n
677 | for (n = (width / 14); n < width; n++){
678 | svg.append("line")
679 | .attr("y1", height)
680 | .attr("x1", n )
681 | .attr("y2", height + 12)
682 | .attr("x2", n )
683 | .style("stroke-width", 1.5)
684 | .style("stroke", "#263238");
685 | n = n + (width / 7) - 0.5
686 | }
687 |
688 | //add main tick marks to x-axis
689 | var i
690 | for (i = (width / 7); i < width; i++){
691 | svg.append("line")
692 | .attr("y1", height)
693 | .attr("x1", i )
694 | .attr("y2", 0)
695 | .attr("x2", i )
696 | .style("stroke-width", 0.5)
697 | .style("stroke-dasharray", ("3, 3"))
698 | .style("stroke", "#263238");
699 | i = i + (width / 7) - 0.5
700 | }
701 |
702 |
703 |
704 |
705 | //add y-axis tick marks to y-axis
706 | // var u
707 | // for (u = 0; u < height; u++){
708 | // svg.append("line")
709 | // .attr("y1", u)
710 | // .attr("x1", -5)
711 | // .attr("y2", u)
712 | // .attr("x2", 0)
713 | // .style("stroke-width", 1.0)
714 | // .style("stroke", "#263238");
715 | // u = u + (height / 9) - 1
716 | // }
717 |
718 | })
719 | }
720 |
--------------------------------------------------------------------------------
/scripts/postprocessing/format_results_Qforecast_plot.py:
--------------------------------------------------------------------------------
1 | # format prediction results for qunatile forecasting d3 plot
2 | import numpy as np
3 | import pandas as pd
4 | from datetime import datetime, timedelta
5 | import csv
6 | from pickle import load
7 | from sklearn.preprocessing import MinMaxScaler
8 |
9 |
10 |
11 | # declare model type
12 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal, seq2seq+temporal+spatial
13 |
14 | # forecasting model
15 | forecast_var = 'price'
16 |
17 | # select start example index reference, 7-days plotted from here
18 | ex_idx = 4
19 |
20 | # load prediction data
21 | with open(f'../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'rb') as forecast_data:
22 | predictions = load(forecast_data)
23 |
24 | print(len(predictions['0.5']))
25 |
26 | # get start date
27 | out_start_time = predictions['time_refs']['output_times'][ex_idx][0]
28 |
29 | print(out_start_time)
30 |
31 | # produce date range for week-long predictions
32 | ouput_sequence_len = 336 # (Half-Hours)
33 | input_num_of_days = ouput_sequence_len / 48
34 | # start_date = datetime.strptime(str(out_start_time)[:10], "%Y-%m-%d")
35 | # out_date_range = pd.date_range(start=start_date, end=start_date + timedelta(days=input_num_of_days) , freq="30min")[:-1]# remove HH entry form unwanted day
36 |
37 | out_start_time = predictions['time_refs']['output_times'][ex_idx:ex_idx+int(input_num_of_days)]
38 | out_date_range = pd.to_datetime(out_start_time.ravel(), format='%Y-%m-%d')
39 |
40 | # index ref
41 | idx_ref = [x for x in range(1, ouput_sequence_len+1)]
42 |
43 | # final params for df
44 | final_params = {'year': idx_ref,
45 | 'Datetime': out_date_range }
46 |
47 | # loop through to write results for each quantile
48 | for q in list(predictions.keys())[:-2]:
49 |
50 | final_params[f'q_{q[2:]}'] = predictions[str(q)][ex_idx:ex_idx+7, :, 0].reshape((-1))
51 |
52 | # add actual values for reference
53 | final_params['actual'] = predictions['y_true'][ex_idx:ex_idx+7, :, 0].reshape((-1))
54 |
55 | print(final_params.keys())
56 |
57 | # convert to pandas df
58 | df = pd.DataFrame(dict([(keys ,pd.Series(values, dtype = 'object')) for keys, values in final_params.items()])) # set all as objects to avoid warning on empty cells
59 |
60 | # divide to GW
61 | if forecast_var != "price":
62 | df.iloc[:,2:] = df.iloc[:,2:] / 1000
63 |
64 | # copy to clipboard
65 | df.to_clipboard()
66 |
67 | # save data to file
68 | df.to_csv(f'../../results/{forecast_var}/{model_type}/quantile_prediction_results_{forecast_var}_{model_type}.csv', index=False)
69 |
70 |
--------------------------------------------------------------------------------
/scripts/postprocessing/format_results_attn_plot.py:
--------------------------------------------------------------------------------
1 | # format attention results for context d3 plot
2 | import numpy as np
3 | import pandas as pd
4 | from datetime import datetime, timedelta
5 | import csv
6 | from pickle import load
7 | from sklearn.preprocessing import MinMaxScaler
8 |
9 | # forecasting model
10 | type = 'solar' # 'wind', 'solar', 'price', 'demand'
11 |
12 | # select example refernce
13 | ex_idx = 26
14 |
15 | # load attention data
16 | if type != "price":
17 | with open(f'../../results/{type}/seq2seq+temporal+spatial/attention_data_{type}_seq2seq+temporal+spatial.pkl', 'rb') as attention_data:
18 | attention_results = load(attention_data)
19 |
20 | # load prediction data
21 | with open(f'../../results/{type}/seq2seq+temporal+spatial/forecasted_time_series_{type}_seq2seq+temporal+spatial.pkl', 'rb') as forecast_data:
22 | predictions = load(forecast_data)
23 | else:
24 | with open(f'../../results/{type}/seq2seq+temporal/attention_data_{type}_seq2seq+temporal.pkl', 'rb') as attention_data:
25 | attention_results = load(attention_data)
26 |
27 | # load prediction data
28 | with open(f'../../results/{type}/seq2seq+temporal/forecasted_time_series_{type}_seq2seq+temporal.pkl', 'rb') as forecast_data:
29 | predictions = load(forecast_data)
30 |
31 |
32 | print(attention_results.keys())
33 |
34 | # get start dates for inputs and outputs
35 | in_start_time = attention_results['time_refs']['input_times'][ex_idx][0]
36 | out_start_time = attention_results['time_refs']['output_times'][ex_idx][0]
37 |
38 | # log start date of selected index
39 | print(f'input time start date: {in_start_time}')
40 | print(f'output time start date: {out_start_time}')
41 |
42 | # input data for reference
43 | if type != 'price':
44 | input_data = np.average(attention_results['input_features'][ex_idx, :, :, :, 0], axis=(1,2))
45 | else:
46 | input_data = attention_results['input_features'][ex_idx, :, -1:]
47 |
48 |
49 | # get prediction result for current index
50 | current_prediction = predictions['0.5'][ex_idx, :, 0]
51 |
52 | # attention values for current index
53 | current_attention_vals = attention_results['0.5'][ex_idx]
54 |
55 | attention_vals = np.empty((current_attention_vals.shape[0] * current_attention_vals.shape[1]))
56 |
57 | # make sure attention values are in correct format
58 | iidx = 0
59 | for idx in range(current_attention_vals.shape[0]):
60 | attention_vals[iidx:iidx+48] = current_attention_vals[idx, :]
61 | iidx += 48
62 |
63 | # input params
64 | input_sequence_len = 336
65 | input_num_of_days = input_sequence_len / 48
66 | start_date = datetime.strptime(str(in_start_time)[:10], "%Y-%m-%d")
67 | target_data = datetime.strptime(str(out_start_time)[:10], "%Y-%m-%d")
68 | input_date_range = pd.date_range(start=start_date, end=start_date + timedelta(days=input_num_of_days) , freq="30min")[:-1]# remove HH entry form unwanted day
69 |
70 | # out_start_time = predictions['time_refs']['output_times'][ex_idx:ex_idx+int(input_num_of_days)]
71 | # input_date_range = pd.to_datetime(out_start_time.ravel(), format='%Y-%m-%d')
72 |
73 | # create index values
74 | group_index = [48 * [idx] for idx in range(input_sequence_len)]
75 | variable_index = [[idx for idx in range(48)] for iidx in range(input_sequence_len)]
76 |
77 | # flatten lists if lists
78 | group_index = sum(group_index, [])
79 | variable_index = sum(variable_index, [])
80 |
81 | # create data ranges
82 | group = [48 * [date_time] for date_time in input_date_range]
83 | variable = [pd.date_range(start=target_data, end=target_data + timedelta(days=1) , freq="30min").tolist()[:-1] for idx in range(input_sequence_len)] # remove HH entry for next day
84 |
85 | # flatten timestamps into single list
86 | group = sum(group, [])
87 | variable = sum(variable, [])
88 |
89 | # create output time idxs
90 | output_time_ref = [idx for idx in range(48)]
91 |
92 | # create input time idxs
93 | input_time_ref = [idx for idx in range(input_sequence_len)]
94 |
95 | # input times
96 | input_time = [date_time for date_time in input_date_range]
97 |
98 | # output times
99 | output_time = pd.date_range(start=target_data, end=target_data + timedelta(days=1) , freq="30min").tolist()[:-1]
100 |
101 | # load and apply scaler
102 | # load scaler
103 | scaler = load(open(f'../../data/processed/{type}/_scaler/scaler_{type}_v2.pkl', 'rb'))
104 |
105 | input_data = np.squeeze(input_data)
106 |
107 | # transform input data
108 | input_data = scaler.inverse_transform(input_data)
109 |
110 | attention_vals_int = attention_vals
111 |
112 | # take log of attention values
113 | # scaler = MinMaxScaler(feature_range = (0, 1))
114 | # attention_vals_scaled = scaler.fit_transform(attention_vals.reshape(-1,1)).reshape(-1)
115 |
116 | # attention_vals_scaled = np.sqrt(attention_vals_scaled)
117 |
118 | attention_vals_scaled = attention_vals
119 |
120 | # get true values for reference
121 | y_true = predictions['y_true'][ex_idx][:,0]
122 |
123 | # final params for df
124 | final_params = {'group_index': group_index,
125 | 'variable_index': variable_index,
126 | 'group': group,
127 | 'variable': variable,
128 | 'value_scaled': attention_vals_scaled,
129 | 'value': attention_vals_int,
130 | 'input_time_ref': input_time_ref,
131 | 'input_time': input_time,
132 | 'input_values': input_data,
133 | 'output_time_ref': output_time_ref,
134 | 'output_time': output_time,
135 | 'prediction': current_prediction,
136 | 'y_true': y_true }
137 |
138 | # convert to pandas df
139 | df = pd.DataFrame(dict([(keys ,pd.Series(values, dtype = 'object')) for keys, values in final_params.items()])) # set all as objects to avoid warning on empty cells
140 |
141 | # copy to clipboard
142 | df.to_clipboard()
143 |
144 | # save data to file
145 | # df.to_csv(f'../../results/{type}/attention_plot_results_{type}.csv', index=False)
146 |
147 |
148 |
--------------------------------------------------------------------------------
/scripts/postprocessing/results_summary.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import scipy
4 | from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
5 | from pickle import load
6 |
7 |
8 | # declare model type
9 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal, seq2seq+temporal+spatial
10 |
11 | # desired var to run analysis
12 | forecast_var = 'price'
13 |
14 | # load quantile prediction results
15 | with open(f'../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'rb') as forecast_data:
16 | results = load(forecast_data)
17 |
18 |
19 | def mean_absolute_percentage_error(y_true, y_pred):
20 |
21 | y_true, y_pred = np.array(y_true), np.array(y_pred)
22 | return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
23 |
24 | def smape(y_true, y_pred):
25 | return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))
26 |
27 | # function to evaluate general & quantile performance
28 | def evaluate_predictions(predictions):
29 | '''
30 | Theory from Bazionis & Georgilakis (2021): https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&ved=2ahUKEwiUprb39qbyAhXNgVwKHWVsA50QFnoECAMQAQ&url=https%3A%2F%2Fwww.mdpi.com%2F2673-4826%2F2%2F1%2F2%2Fpdf&usg=AOvVaw1AWP-zHuNGrw8pgDfUS09e
31 | func to caluclate probablistic forecast performance
32 | Prediction Interval Coverage Probability (PICP)
33 | Prediction Interval Nominla Coverage (PINC)
34 | Average Coverage Error (ACE) [PICP - PINC]
35 | '''
36 | test_len = len(predictions['y_true'])
37 |
38 | print(test_len)
39 |
40 | y_true = predictions['y_true'].ravel()
41 | lower_pred = predictions[list(predictions.keys())[0]].ravel()
42 | upper_pred = predictions[list(predictions.keys())[-3]].ravel()
43 | central_case = predictions['0.5'].ravel()
44 |
45 | alpha = float(list(predictions.keys())[-3]) - float(list(predictions.keys())[0])
46 |
47 | # picp_ind = np.sum((y_true > lower_pred) & (y_true <= upper_pred))
48 |
49 | picp = ((np.sum((y_true >= lower_pred) & (y_true <= upper_pred))) / (test_len * 48) ) * 100
50 |
51 | pinc = alpha * 100
52 |
53 | ace = (picp - pinc) # closer to '0' higher the reliability
54 |
55 | r = np.max(y_true) - np.min(y_true)
56 |
57 | # PI normalised width
58 | pinaw = (1 / (test_len * r)) * np.sum((upper_pred - lower_pred))
59 |
60 | # PI normalised root-mean-sqaure width
61 | pinrw = (1/r) * np.sqrt( (1/test_len) * np.sum((upper_pred - lower_pred)**2))
62 |
63 | # calculate MAE & RMSE
64 | mae = mean_absolute_error(y_true, central_case)
65 | mape = mean_absolute_percentage_error(y_true, central_case)
66 | rmse = mean_squared_error(y_true, central_case, squared=False)
67 |
68 | # calculate MAE & RMSE for persistence
69 | persistence_prediction = predictions['y_true'][:-1].ravel()
70 | persistence_true = predictions['y_true'][1:].ravel()
71 |
72 | mae_base = mean_absolute_error(persistence_true, persistence_prediction)
73 | mape_base = mean_absolute_percentage_error(persistence_true, persistence_prediction)
74 | rmse_base = mean_squared_error(persistence_true, persistence_prediction, squared=False)
75 |
76 | # create pandas df
77 | metrics = pd.DataFrame({'PICP': picp, 'PINC': pinc, 'ACE': ace, 'PINAW': pinaw, 'PINRW': pinrw, 'MAE': mae, 'MAPE': mape, 'RMSE': rmse}, index={alpha})
78 | metrics.index.name = 'Prediction_Interval'
79 |
80 | # create pandas df for baseline
81 | metrics_base = pd.DataFrame({'MAE': mae_base, 'MAPE': mape_base, 'RMSE': rmse_base}, index={'basline_persistence'})
82 |
83 | print(metrics.to_string())
84 | print(metrics_base.to_string())
85 |
86 | # save performance metrics
87 | metrics.to_csv(f'../../results/{forecast_var}/{model_type}/preformance_summary_{forecast_var}_{model_type}.csv', index=False)
88 |
89 | return metrics
90 |
91 |
92 | # function to evaluate trends
93 | def correlation_analysis(X, Y):
94 |
95 | rs = np.empty((X.shape[0], 1))
96 | #caclulate 'R^2' for each feature - average over all days
97 | for l in range(X.shape[0]):
98 | slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(X[l,:,0], Y[l,:,0])
99 | rs[l, 0] =r_value**2
100 |
101 | print('mean' + '\n R**2: %s' %rs.mean())
102 | print('max' + '\n R**2: %s' %rs.max())
103 | print('min' + '\n R**2: %s' %rs.min())
104 |
105 | #get best
106 | best_fit = np.argmax(rs, axis=0)
107 | worst_fit = np.argmin(rs, axis=0)
108 | print(best_fit)
109 | print(worst_fit)
110 |
111 | return
112 |
113 | # call evaluate performance
114 | evaluate_predictions(results)
115 |
116 |
117 |
--------------------------------------------------------------------------------
/scripts/postprocessing/spatial_attention_plots.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | import geopandas
5 | import contextily as ctx
6 | from pickle import load
7 | from matplotlib.animation import FuncAnimation
8 |
9 |
10 | # plot spatial attention
11 | def plot_spatial_predictions(spatial_data, title, height_scale, width_scale, frame_num):
12 |
13 | fig = plt.figure(figsize=[8,10]) # a new figure window
14 | ax_set = fig.add_subplot(1, 1, 1)
15 |
16 | # create baseline map
17 | # spatial data on UK basemap
18 | df = pd.DataFrame({
19 | 'LAT': [49.78, 61.03],
20 | 'LON': [-11.95, 1.55],
21 | })
22 |
23 | geo_df = geopandas.GeoDataFrame(df, crs = {'init': 'epsg:4326'},
24 | geometry=geopandas.points_from_xy(df.LON, df.LAT)).to_crs(epsg=3857)
25 |
26 | ax = geo_df.plot(
27 | figsize= (8,10),
28 | alpha = 0,
29 | ax=ax_set,
30 | )
31 |
32 | plt.title(title)
33 | ax.set_axis_off()
34 |
35 | # add basemap
36 | url = 'http://tile.stamen.com/terrain/{z}/{x}/{y}.png'
37 | zoom = 10
38 | xmin, xmax, ymin, ymax = ax.axis()
39 | basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, source=url)
40 | ax.imshow(basemap, extent=extent, interpolation='gaussian')
41 | attn_over = np.resize(spatial_data[0], (height_scale, width_scale))
42 |
43 | gb_shape = geopandas.read_file("../../data/raw/_mapping/shapefiles/GBR_adm/GBR_adm0.shp").to_crs(epsg=3857)
44 | irl_shape = geopandas.read_file("../../data/raw/_mapping/shapefiles/IRL_adm/IRL_adm0.shp").to_crs(epsg=3857)
45 | gb_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
46 | irl_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
47 | overlay = ax.imshow(attn_over, cmap='viridis', alpha=0.5, extent=extent)
48 | # ax.axis((xmin, xmax, ymin, ymax))
49 | txt = fig.text(.5, 0.09, '', ha='center')
50 |
51 |
52 | def update(i):
53 | spatial_over = np.resize(spatial_data[i], (height_scale, width_scale))
54 | print(spatial_over.shape)
55 | # overlay = ax.imshow(spatial_over, cmap='viridis', alpha=0.5, extent=extent)
56 | overlay.set_data(spatial_over)
57 | txt.set_text(f"Timestep: {i}")
58 | # plt.cla()
59 |
60 | return [overlay, txt]
61 |
62 |
63 | animation_ = FuncAnimation(fig, update, frames=frame_num, blit=False, repeat=False)
64 | # plt.show(block=True)
65 | animation_.save(f'{title}_animation.gif', writer='imagemagick')
66 |
67 |
68 |
69 | # define model type to plot
70 | model_type = 'solar'
71 |
72 | idx = 0
73 |
74 | # load spatial attention data
75 | # save results - forecasted spatial attention matrix
76 | with open(f'../../results/{model_type}/seq2seq+temporal+spatial/spatial_attention_data_{model_type}.pkl', 'rb') as spatial_file:
77 | spatial_data = load(spatial_file)
78 |
79 |
80 | # grab relevant example
81 | spatial_data = spatial_data['0.5'][idx,:,:]
82 |
83 | spatial_data = np.transpose(spatial_data)
84 |
85 | print(spatial_data.shape)
86 | print(spatial_data[30, :])
87 |
88 | # exit()
89 |
90 |
91 | # call plot function
92 | plot_spatial_predictions(spatial_data=spatial_data, title='Solar Spatial Attention', height_scale=16, width_scale=20, frame_num=48)
--------------------------------------------------------------------------------
/scripts/preprocessing/ERA5_downloader.py:
--------------------------------------------------------------------------------
1 | import cdsapi
2 | import os
3 |
4 |
5 | os.chdir("PATH TO ERA5 DOWNLOADER FILE")
6 | print(os.getcwd())
7 |
8 |
9 | c = cdsapi.Client()
10 |
11 | years = ['2020', '2021']
12 | variables = ['surface_net_solar_radiation']
13 |
14 | for l, var in enumerate(variables):
15 | for i, year in enumerate(years):
16 |
17 | print(f'year:{year}, var:{var}')
18 |
19 | if year == '2021':
20 | c.retrieve(
21 | f'reanalysis-era5-single-levels',
22 | {
23 | 'product_type': 'reanalysis',
24 | 'format': 'netcdf',
25 | 'variable': [
26 | var,
27 | ],
28 | 'year': [
29 | year,
30 | ],
31 | 'month': [
32 | '01', '02', '03',
33 | '04', '05', '06',
34 | # '07', '08', '09',
35 | # '10', '11', '12',
36 | ],
37 | 'day': [
38 | '01', '02', '03',
39 | '04', '05', '06',
40 | '07', '08', '09',
41 | '10', '11', '12',
42 | '13', '14', '15',
43 | '16', '17', '18',
44 | '19', '20', '21',
45 | '22', '23', '24',
46 | '25', '26', '27',
47 | '28', '29', '30',
48 | '31',
49 | ],
50 | 'time': [
51 | '00:00', '01:00', '02:00',
52 | '03:00', '04:00', '05:00',
53 | '06:00', '07:00', '08:00',
54 | '09:00', '10:00', '11:00',
55 | '12:00', '13:00', '14:00',
56 | '15:00', '16:00', '17:00',
57 | '18:00', '19:00', '20:00',
58 | '21:00', '22:00', '23:00',
59 | ],
60 | 'area': [
61 | 61.19, -11.95, 49.78,
62 | 1.76,
63 | ],
64 | },
65 | str(var) + '_' + str(year) + '.nc')
66 |
67 |
68 | else:
69 | c.retrieve(
70 | f'reanalysis-era5-single-levels',
71 | {
72 | 'product_type': 'reanalysis',
73 | 'format': 'netcdf',
74 | 'variable': [
75 | var,
76 | ],
77 | 'year': [
78 | year,
79 | ],
80 | 'month': [
81 | '01', '02', '03',
82 | '04', '05', '06',
83 | '07', '08', '09',
84 | '10', '11', '12',
85 | ],
86 | 'day': [
87 | '01', '02', '03',
88 | '04', '05', '06',
89 | '07', '08', '09',
90 | '10', '11', '12',
91 | '13', '14', '15',
92 | '16', '17', '18',
93 | '19', '20', '21',
94 | '22', '23', '24',
95 | '25', '26', '27',
96 | '28', '29', '30',
97 | '31',
98 | ],
99 | 'time': [
100 | '00:00', '01:00', '02:00',
101 | '03:00', '04:00', '05:00',
102 | '06:00', '07:00', '08:00',
103 | '09:00', '10:00', '11:00',
104 | '12:00', '13:00', '14:00',
105 | '15:00', '16:00', '17:00',
106 | '18:00', '19:00', '20:00',
107 | '21:00', '22:00', '23:00',
108 | ],
109 | 'area': [
110 | 61.19, -11.95, 49.78,
111 | 1.76,
112 | ],
113 | },
114 | str(var) + '_' + str(year) + '.nc')
115 |
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/scripts/preprocessing/__pycache__/preprocessing_funcs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/preprocessing/__pycache__/preprocessing_funcs.cpython-38.pyc
--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_demand.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import sys
4 | import os
5 | from pickle import dump, load
6 | import h5py
7 |
8 | from preprocessing_funcs import demand_data_processing
9 |
10 | np.set_printoptions(threshold=sys.maxsize)
11 |
12 | ###########################################_____LOAD & PRE-PROCESS DATA_____###########################################
13 |
14 | #cache current working directory of main script
15 | workingDir = os.getcwd()
16 |
17 | # paths to nc files for x_value features:
18 | filepaths = {
19 | 'temperature': '../../data/raw/temperature',
20 | }
21 |
22 | #load labels (solar generation per HH)
23 | demandGenLabels = pd.read_csv('../../data/raw/demand_labels/HH_demand_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
24 |
25 | # call main pre-processing function - sequence windowing no longer utilised
26 | dataset, time_refs = demand_data_processing(filepaths = filepaths, labels = demandGenLabels, workingDir = workingDir)
27 |
28 | # print data summaries
29 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
30 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
31 |
32 | # #save time timeseries (inputs & outputs) for reference
33 | print('saving data...')
34 | with open("../../data/processed/demand/time_refs_demand_v4.pkl", "wb") as times:
35 | dump(time_refs, times)
36 |
37 | # save training set as dictionary (h5py dump)
38 | f = h5py.File('../../data/processed/demand/dataset_demand_v4.hdf5', 'w')
39 |
40 | for group_name in dataset:
41 | group = f.create_group(group_name)
42 | for dset_name in dataset[group_name]:
43 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
44 | f.close()
45 |
46 |
--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_price.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from sklearn.preprocessing import MinMaxScaler, StandardScaler
4 | from pickle import dump
5 | import h5py
6 |
7 | from workalendar.europe import UnitedKingdom
8 | cal = UnitedKingdom()
9 |
10 | # load input data
11 | windGen_data = pd.read_csv('../../data/raw/wind_labels/HH_windGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
12 | solarGen_data = pd.read_csv('../../data/raw/solar_labels/HH_PVGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
13 | demand_data = pd.read_csv('../../data/raw/demand_labels/HH_demand_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
14 |
15 | # load labels
16 | price_data = pd.read_csv('../../data/raw/price_labels/N2EX_UK_DA_Auction_Hourly_Prices_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
17 |
18 | # interpolate hourly prices into HH resolution
19 | price_data = price_data.reindex(pd.date_range(start=price_data.index.min(), end=price_data.index.max() + pd.Timedelta(minutes=30), freq='30T'))
20 | price_data = price_data.interpolate()
21 |
22 | # combine vars into feature array
23 | arrays = [windGen_data.values, solarGen_data.values, demand_data.values]
24 |
25 | feature_array = []
26 |
27 | # normalise feature array
28 | for i, array in enumerate(arrays):
29 | scaler = StandardScaler()
30 | feature_array.append(scaler.fit_transform(array))
31 |
32 | # normalise labels
33 | # scaler = MinMaxScaler() #normalise data
34 | scaler = StandardScaler()
35 | price_data = scaler.fit_transform(price_data.values)
36 |
37 | # save price data scaler
38 | dump(scaler, open('../../data/processed/price/_scaler/scaler_price_v2.pkl', 'wb'))
39 |
40 | # stack features
41 | feature_array = np.concatenate(feature_array, axis=-1)
42 |
43 | # mask data (eliminate nans)
44 | wind_mask = windGen_data.iloc[:,-1].isna().groupby(windGen_data.index.normalize()).transform('any')
45 | solar_mask = solarGen_data.iloc[:,-1].isna().groupby(solarGen_data.index.normalize()).transform('any')
46 | demand_mask = demand_data.iloc[:,-1].isna().groupby(demand_data.index.normalize()).transform('any')
47 | price_mask = demand_data.iloc[:,-1].isna().groupby(demand_data.index.normalize()).transform('any')
48 |
49 | # eliminate all missing values with common mask
50 | mask_all = wind_mask | solar_mask | demand_mask | price_mask
51 |
52 | # apply mask, removing days with more than one nan value
53 | feature_array = feature_array[~mask_all]
54 |
55 | price_data = price_data[~mask_all]
56 |
57 | # combine price data to other features for complete feature array
58 | feature_array = [feature_array, price_data]
59 | feature_array = np.concatenate(feature_array, axis=-1)
60 |
61 | # time refs
62 | time_refs = windGen_data.index
63 | time_refs = time_refs[~mask_all]
64 |
65 | # time data engineering
66 | df_times_outputs = pd.DataFrame()
67 | df_times_outputs['date'] = time_refs.date
68 | df_times_outputs['hour'] = time_refs.hour
69 | df_times_outputs['month'] = time_refs.month - 1
70 | df_times_outputs['year'] = time_refs.year
71 | df_times_outputs['day_of_week'] = time_refs.dayofweek
72 | df_times_outputs['day_of_year'] = time_refs.dayofyear - 1
73 | df_times_outputs['weekend'] = df_times_outputs['day_of_week'].apply(lambda x: 1 if x>=5 else 0)
74 |
75 | # account for bank / public holidays
76 | start_date = time_refs.min()
77 | end_date = time_refs.max()
78 | start_year = df_times_outputs['year'].min()
79 | end_year = df_times_outputs['year'].max()
80 |
81 | holidays = set(holiday[0]
82 | for year in range(start_year, end_year + 1)
83 | for holiday in cal.holidays(year)
84 | if start_date <= holiday[0] <= end_date)
85 |
86 | df_times_outputs['holiday'] = df_times_outputs['date'].isin(holidays).astype(int)
87 |
88 | #process output times for half hours
89 | for idx, row in df_times_outputs.iterrows():
90 | if idx % 2 != 0:
91 | df_times_outputs.iloc[idx, 1] = df_times_outputs.iloc[idx, 1] + 0.5
92 |
93 | # create sin / cos of output hour
94 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
95 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
96 |
97 | # create sin / cos of output month
98 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
99 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
100 |
101 | # create sin / cos of output year
102 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
103 |
104 | # create sin / cos of output day of week
105 | times_out_DoW_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
106 | times_out_DoW_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
107 |
108 | # create sin / cos of output day of year
109 | times_out_DoY_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)
110 | times_out_DoY_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)
111 |
112 | weekends = np.expand_dims(df_times_outputs['weekend'].values, axis =-1)
113 | holidays = np.expand_dims(df_times_outputs['holiday'].values, axis =-1)
114 |
115 | time_features = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_DoW_sin, times_out_DoW_cos,
116 | times_out_DoY_sin, times_out_DoY_cos, times_out_year, weekends, holidays), axis=-1)
117 |
118 | # combine demand / solar / wind with time features
119 | # combined_data = np.concatenate([feature_array, output_times], axis=-1)
120 |
121 | test_split_seq = 8544 # larger test test to compensate adverse demand from COVID
122 |
123 | # split data into train and test sets
124 | dataset = {
125 | 'train_set' : {
126 | 'X1_train': feature_array[:-test_split_seq],
127 | 'X2_train': time_features[:-test_split_seq],
128 | 'X3_train': time_features[:-test_split_seq],
129 | 'y_train': price_data[:-test_split_seq]
130 | },
131 | 'test_set' : {
132 | 'X1_test': feature_array[-test_split_seq:],
133 | 'X2_test': time_features[-test_split_seq:],
134 | 'X3_test': time_features[-test_split_seq:],
135 | 'y_test': price_data[-test_split_seq:]
136 | }
137 | }
138 |
139 | time_refs = {
140 | 'input_times_train': time_refs[:-test_split_seq],
141 | 'input_times_test': time_refs[-test_split_seq:],
142 | 'output_times_train': time_refs[:-test_split_seq],
143 | 'output_times_test': time_refs[-test_split_seq:]
144 | }
145 |
146 | # print data for info
147 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
148 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
149 |
150 | # save dataset
151 | with open("../../data/processed/price/time_refs_price_v2.pkl", "wb") as times:
152 | dump(time_refs, times)
153 |
154 | # save training set as dictionary (h5py dump)
155 | f = h5py.File('../../data/processed/price/dataset_price_v2.hdf5', 'w')
156 |
157 | for group_name in dataset:
158 | group = f.create_group(group_name)
159 | for dset_name in dataset[group_name]:
160 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
161 | f.close()
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_solar.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import sys
4 | import os
5 | from pickle import dump, load
6 | import h5py
7 |
8 | from preprocessing_funcs import solar_data_processing
9 |
10 | np.set_printoptions(threshold=sys.maxsize)
11 |
12 | ###########################################_____LOAD & PRE-PROCESS DATA_____###########################################
13 |
14 | #cache current working directory of main script
15 | workingDir = os.getcwd()
16 |
17 |
18 | # paths to nc files for x_value features:
19 | filepaths = {
20 | 'solarRad': '../../data/raw/net_solar_radiation',
21 | 'lowcloudcover': '../../data/raw/low_cloud_Cover',
22 | 'temperature': '../../data/raw/temperature'
23 | }
24 |
25 | # load labels (solar generation per HH)
26 | solarGenLabels = pd.read_csv('../../data/raw/solar_labels/HH_PVGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
27 |
28 | # call main pre-processing function - sequence windowing no longer utilised
29 | dataset, time_refs = solar_data_processing(filepaths = filepaths, labels = solarGenLabels, input_seq_size = 336, output_seq_size = 48, workingDir = workingDir)
30 |
31 | # print data summaries
32 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
33 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
34 |
35 | # save time timeseries (inputs & outputs) for reference
36 | print('saving data...')
37 | with open("../../data/processed/solar/time_refs_solar_min_v4.pkl", "wb") as times:
38 | dump(time_refs, times)
39 |
40 | # save training set as dictionary (h5py dump)
41 | f = h5py.File('../../data/processed/solar/dataset_solar_min_v4.hdf5', 'w')
42 |
43 | for group_name in dataset:
44 | group = f.create_group(group_name)
45 | for dset_name in dataset[group_name]:
46 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
47 | f.close()
48 |
49 |
50 |
--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_wind.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import sys
4 | import os
5 | from pickle import dump, load
6 | import h5py
7 |
8 | from preprocessing_funcs import wind_data_processing
9 |
10 |
11 | np.set_printoptions(threshold=sys.maxsize)
12 |
13 | ###########################################_____LOAD & PRE-PROCESS DATA_____###########################################
14 |
15 | #cache current working directory of main script
16 | workingDir = os.getcwd()
17 |
18 | # paths to nc files for x_value features:
19 | filepaths = {
20 | 'u_wind_component_10': '../../data/raw/10m_u_component_of_wind',
21 | 'v_wind_component_10': '../../data/raw/10m_v_component_of_wind',
22 | 'u_wind_component_100': '../../data/raw/100m_u_component_of_wind',
23 | 'v_wind_component_100': '../../data/raw/100m_v_component_of_wind',
24 | 'instantaneous_10m_wind_gust': '../../data/raw/instantaneous_10m_wind_gust',
25 | 'surface_pressure': '../../data/raw/surface_pressure',
26 | 'temperature': '../../data/raw/temperature'
27 | }
28 |
29 | #load labels (wind generation per HH)
30 | windGenLabels = pd.read_csv('../../data/raw/wind_labels/HH_windGen_v4.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
31 |
32 | # call main pre-processing function - sequence windowing no longer utilised
33 | dataset, time_refs = wind_data_processing(filepaths = filepaths, labels = windGenLabels, input_seq_size = 336, output_seq_size = 48, workingDir = workingDir)
34 |
35 | # print data summaries
36 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
37 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
38 |
39 | # #save time timeseries references (inputs & outputs) for reference
40 | print('saving data...')
41 | with open("../../data/processed/wind/time_refs_wind_v4.pkl", "wb") as times:
42 | dump(time_refs, times)
43 |
44 | # save training set as dictionary (h5py dump)
45 | f = h5py.File('../../data/processed/wind/dataset_wind_v4.hdf5', 'w')
46 |
47 | for group_name in dataset:
48 | group = f.create_group(group_name)
49 | for dset_name in dataset[group_name]:
50 | dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
51 | f.close()
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/scripts/preprocessing/preprocessing_funcs.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import netCDF4
4 | from netCDF4 import Dataset
5 | import os
6 | import glob
7 | import sys
8 | from datetime import datetime
9 | from sklearn.preprocessing import MinMaxScaler, StandardScaler
10 | import scipy
11 | import matplotlib.pyplot as plt
12 | from pickle import dump, load
13 | import time
14 | import h5py
15 |
16 | import h5py
17 | from workalendar.europe import UnitedKingdom
18 |
19 | # define calender reference (allows for easy identification of holidays)
20 | cal = UnitedKingdom()
21 |
22 | # function to extract data from yearly .nc files passing directory
23 | def ncExtract(directory, current_wrk_dir): # will append files if multiple present
24 |
25 | #intialising parameters
26 | os.chdir(directory)
27 | files = []
28 | readVariables = {}
29 | consistentVars = ['longitude', 'latitude', 'time']
30 |
31 | #read files in directory
32 | for file in glob.glob("*.nc"):
33 | files.append(file)
34 | files.sort()
35 |
36 | for i, file in enumerate(files):
37 | print(file)
38 | #read nc file using netCDF4
39 | ncfile = Dataset(file)
40 | varaibles = list(ncfile.variables.keys())
41 | #find unique vars
42 | uniqueVars = list(set(varaibles) - set(consistentVars))
43 |
44 | #iteriate and concat each unique variable
45 | for variable in uniqueVars:
46 |
47 | if i == 0:
48 | readVariables['data'] = np.empty([0,ncfile.variables['latitude'].shape[0],
49 | ncfile.variables['longitude'].shape[0]])
50 |
51 | readVar = ncfile.variables[variable][:]
52 |
53 | readVariables['data'] = np.concatenate([readVariables['data'],readVar])
54 |
55 | #read & collect time
56 | if i == 0:
57 | readVariables['time'] = np.empty([0])
58 |
59 | timeVar = ncfile.variables['time']
60 | datesVar = netCDF4.num2date(timeVar[:], timeVar.units, timeVar.calendar)
61 | readVariables['time'] = np.concatenate([readVariables['time'],datesVar])
62 |
63 | #read lat and long
64 | readVariables['latitude'] = ncfile.variables['latitude'][:]
65 | readVariables['longitude'] = ncfile.variables['longitude'][:]
66 |
67 | #close ncfile file
68 | Dataset.close(ncfile)
69 |
70 | #change directory back
71 | os.chdir(current_wrk_dir)
72 |
73 | #define name of extracted data
74 | fileNameLoc = directory.rfind('/') + 1
75 | fileName = str(directory[fileNameLoc:])
76 |
77 | return readVariables
78 |
79 |
80 |
81 | # helper function to filter irregular values out
82 | def lv_filter(data):
83 | #define +ve and -ve thresholds
84 | filter_thres_pos = np.mean(np.mean(data)) * (10**(-10))
85 | filter_thres_neg = filter_thres_pos * (-1)
86 |
87 | #filter data relevant to thresholds
88 | data[(filter_thres_neg <= data) & (data <= filter_thres_pos)] = 0
89 |
90 | return data
91 |
92 |
93 | # helper function to convert 24hr input to 48hrs
94 | def interpolate_4d(array):
95 | interp_array = np.empty((array.shape[0]*2 , array.shape[1], array.shape[2], array.shape[3]))
96 | for ivar in range(array.shape[-1]):
97 | for interp_idx in range(interp_array.shape[0]):
98 | if (interp_idx % 2 == 0) or (int(np.ceil(interp_idx/2)) == array.shape[0]):
99 | interp_array[interp_idx, :, :, ivar] = array[int(np.floor(interp_idx/2)), :, :, ivar]
100 | else:
101 | interp_array[interp_idx, :, :, ivar] = (array[int(np.floor(interp_idx/2)), :, :, ivar] + array[int(np.ceil(interp_idx/2)), :, :, ivar]) / 2
102 |
103 | return interp_array
104 |
105 |
106 | # helper function to interpolate time array
107 | def interpolate_time(time_array):
108 | interp_time = np.linspace(time_array[0], time_array[-1], len(time_array)*2)
109 |
110 | return interp_time
111 |
112 |
113 | # helper function to check for missing nans - if so delete day
114 | def remove_nan_days(x_in, y_out): # assume both are
115 | # check for missing vals in outputs
116 | idx = 0
117 | for i in range(len(y_out)):
118 | if y_out[idx].isnull().values.any() or x_in[idx].isnull().values.any():
119 | del x_in[idx]
120 | del y_out[idx]
121 | idx -= 1
122 | idx += 1
123 |
124 | return x_in, y_out
125 |
126 | # function to window time series data relative to specified input and output sequence lengths
127 | # NO LONGER USED #
128 | def format_data_into_timesteps(X1, X2, X3, Y, input_seq_size, output_seq_size, input_times_reference, output_times_reference):
129 | print('formating data into timesteps & interpolating input data')
130 |
131 | #number of timesteps to be included in each sequence
132 | seqX1, seqX2, seqX3, seqY_in, seqY, in_times, out_times = [], [], [], [], [], [], []
133 | input_start, input_end = 0, 0
134 | output_start = input_seq_size + output_seq_size
135 |
136 | while (output_start + output_seq_size) < len(X1):
137 |
138 | x1 = np.empty((input_seq_size , X1.shape[1], X1.shape[2], X1.shape[3]))
139 | x2 = np.empty((input_seq_size , X2.shape[1]))
140 | x3 = np.empty((output_seq_size , X3.shape[1]))
141 | y_in = np.empty(((input_seq_size), 1))
142 | y = np.empty((output_seq_size, 1))
143 |
144 | in_time = np.empty(((input_seq_size)), dtype = 'datetime64[ns]')
145 | out_time = np.empty(((output_seq_size)), dtype = 'datetime64[ns]')
146 |
147 | #define sequences
148 | input_end = input_start + input_seq_size
149 | output_end = output_start + output_seq_size
150 |
151 | #add condition to ommit any days with nan values
152 | if np.isnan(X1[input_start:input_end]).any() == True or np.isnan(X2[input_start:input_end]).any() == True or np.isnan(Y[input_start:input_end]).any() == True:
153 | input_start += input_seq_size
154 | output_start += input_seq_size
155 | continue
156 | elif np.isnan(X3[output_start:output_end]).any() == True or np.isnan(Y[output_start:output_end]).any() == True:
157 | input_start += output_seq_size
158 | output_start += output_seq_size
159 | continue
160 |
161 | x1[:,:,:,:] = X1[input_start:input_end]
162 | seqX1.append(x1)
163 | x2[:,:] = X2[input_start:input_end]
164 | seqX2.append(x2)
165 | x3[:,:] = X3[output_start:output_end]
166 | seqX3.append(x3)
167 | y_in[:,:] = Y[input_start:input_end]
168 | # y_in[-48:,:] = 0 # elinimate metered output - only NWP available for prediction day
169 | seqY_in.append(y_in)
170 | y[:] = Y[output_start:output_end]
171 | seqY.append(y)
172 |
173 | in_time[:] = np.squeeze(input_times_reference[input_start:input_end])
174 | in_times.append(in_time)
175 | out_time[:] = np.squeeze(output_times_reference[output_start:output_end])
176 | out_times.append(out_time)
177 |
178 | input_start += 1 # divide by 2 to compensate for 24hr period (edited)
179 | output_start += 1
180 |
181 | print('converting to float32 numpy arrays')
182 | seqX1 = np.array(seqX1, dtype=np.float32)
183 | seqX2 = np.array(seqX2, dtype=np.float32)
184 | seqX3 = np.array(seqX3, dtype=np.float32)
185 | seqY_in = np.array(seqY_in, dtype=np.float32)
186 | seqY = np.array(seqY, dtype=np.float32)
187 |
188 |
189 | # stack 'Y_inputs' onto the spatial array
190 | print('combining feature array with lagged outputs')
191 | broadcaster = np.ones((seqX1.shape[0], seqX1.shape[1], seqX1.shape[2], seqX1.shape[3], 1), dtype=np.float32)
192 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(seqY_in, axis =2), axis=2)
193 | seqX1 = np.concatenate((broadcaster, seqX1), axis = -1)
194 |
195 | #split data for train and test sets
196 | test_set_percentage = 0.1
197 | test_split = int(len(seqX1) * (1 - test_set_percentage))
198 |
199 |
200 | dataset = {
201 | 'train_set' : {
202 | 'X1_train': seqX1[:test_split],
203 | 'X2_train': seqX2[:test_split], # input time features
204 | 'X3_train': seqX3[:test_split], # output time features
205 | 'y_train': seqY[:test_split]
206 | },
207 | 'test_set' : {
208 | 'X1_test': seqX1[test_split:],
209 | 'X2_test': seqX2[test_split:],
210 | 'X3_test': seqX3[test_split:],
211 | 'y_test': seqY[test_split:]
212 | }
213 | }
214 |
215 | #create dictionary for time references
216 | time_refs = {
217 | 'input_times_train': in_times[:test_split],
218 | 'input_times_test': in_times[test_split:],
219 | 'output_times_train': out_times[:test_split],
220 | 'output_times_test': out_times[test_split:]
221 | }
222 |
223 | return dataset, time_refs
224 | # train_set, test_set, time_refs
225 |
226 |
227 | ###### WIND ##############################################################################################################################################
228 |
229 | # main function for preprocessing of data - wind specific updates applied
230 | def wind_data_processing(filepaths, labels, input_seq_size, output_seq_size, workingDir):
231 |
232 | #get dictionary keys
233 | keys = list(filepaths.keys())
234 |
235 | #dictionaries for extracted vars
236 | vars_extract = {}
237 | vars_extract_filtered = {}
238 | vars_extract_filtered_masked = {}
239 | vars_extract_filtered_masked_norm = {}
240 |
241 | #define daylight hours mask - relative to total solar radiation
242 | # solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation')
243 | # solar_rad_reference = lv_filter(solar_rad_reference['data'])
244 | # daylight_hr_mask = solar_rad_reference > 0
245 |
246 | #cache matrix dimensions
247 | # dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]]
248 |
249 | #loop to extract data features
250 | for i, key in enumerate(filepaths):
251 | vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files
252 |
253 | #break in 1-iteration to get time features & cache dimensions
254 | if i == 0:
255 | times_in = vars_extract[str(key)]['time']
256 | dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]]
257 |
258 | vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data
259 | # vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data
260 | # scaler = MinMaxScaler() #normalise data
261 | # vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2])
262 |
263 | # convert u and v components to wind speed and direction
264 | ws_10 = np.sqrt((vars_extract_filtered['u_wind_component_10']**2) + (vars_extract_filtered['v_wind_component_10']**2))
265 | ws_100 = np.sqrt((vars_extract_filtered['u_wind_component_100']**2) + (vars_extract_filtered['v_wind_component_100']**2))
266 |
267 | wd_10 = np.mod(180+np.rad2deg(np.arctan2(vars_extract_filtered['u_wind_component_10'], vars_extract_filtered['v_wind_component_10'])), 360)
268 | wd_100 = np.mod(180+np.rad2deg(np.arctan2(vars_extract_filtered['u_wind_component_100'], vars_extract_filtered['v_wind_component_100'])), 360)
269 |
270 | # convert ws and wd to float 32
271 | ws_10 = ws_10.astype('float32')
272 | wd_10 = wd_10.astype('float32')
273 | ws_100 = ws_100.astype('float32')
274 | wd_100 = wd_100.astype('float32')
275 |
276 | # combine into an array
277 | feature_array = [ws_10, wd_10, ws_100, wd_100, vars_extract_filtered['temperature'], vars_extract_filtered['surface_pressure']]
278 |
279 | #stack features into one matrix
280 | feature_array = np.stack(feature_array, axis = -1)
281 |
282 | # interpolate feature array from 24hrs to 48hrs
283 | print('interpolating data...')
284 | feature_array = interpolate_4d(feature_array)
285 |
286 | # remove nan values - by day
287 | outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any')
288 | # outputs_mask = labels['MW'].isna()
289 |
290 | # apply mask, removing days with more than one nan value
291 | feature_array = feature_array[~outputs_mask]
292 | labels = labels[~outputs_mask]
293 |
294 | dimensions = feature_array.shape
295 | feature_array_final = np.empty_like(feature_array)
296 |
297 | # normalise features
298 | for i in range(feature_array.shape[-1]):
299 | # scaler = StandardScaler(with_mean=False) #normalise data
300 | scaler = MinMaxScaler()
301 | array = feature_array[:,:,:,i]
302 | feature_array_final[:,:,:,i:i+1] = scaler.fit_transform(array.reshape(array.shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2], 1)
303 |
304 | #Do time feature engineering for input times
305 | times_in = pd.DataFrame({"datetime": times_in})
306 | times_in['datetime'] = times_in['datetime'].astype('str')
307 | times_in['datetime'] = pd.to_datetime(times_in['datetime'])
308 | times_in.set_index('datetime', inplace = True)
309 | in_times = times_in.index
310 |
311 | # get hours and months from datetime
312 | hour_in = times_in.index.hour
313 | hour_in = np.float32(hour_in)
314 |
315 | # add HH to hours
316 | index = 0
317 | for idx, time in enumerate(hour_in):
318 | if time == 24:
319 | index += 1
320 | else:
321 | hour_in = np.insert(hour_in, index+1, time+0.5)
322 | index += 2
323 |
324 | month_in = times_in.index.month - 1
325 | year_in = times_in.index.year
326 |
327 | # duplicate months to compensate for switch from 24hr to 48hr input data
328 | index = 0
329 | for idx, month in enumerate(month_in):
330 | if idx % 24 == 0:
331 | index += 1
332 | else:
333 | month_in = np.insert(month_in, index+1, month)
334 | index += 2
335 |
336 | # create one_hot encoding input times: hour and month
337 | one_hot_months_in = pd.get_dummies(month_in, prefix='month_')
338 | one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_')
339 |
340 | times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1)
341 | times_in = times_in_df.values
342 |
343 | # create sin / cos of input times
344 | times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1)
345 | times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1)
346 |
347 | times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1)
348 | times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1)
349 |
350 | times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times))
351 |
352 | #Process output times as secondary input for decoder
353 | #cache output times
354 | label_times = labels.index
355 |
356 | #declare 'output' time features
357 | df_times_outputs = pd.DataFrame()
358 | df_times_outputs['hour'] = labels.index.hour
359 | df_times_outputs['month'] = labels.index.month - 1
360 | df_times_outputs['year'] = labels.index.year
361 |
362 | #process output times for half hours
363 | for idx, row in df_times_outputs.iterrows():
364 | if idx % 2 != 0:
365 | df_times_outputs.iloc[idx, 0] = df_times_outputs.iloc[idx, 0] + 0.5
366 |
367 | months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_')
368 | hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_')
369 |
370 | times_out_df = pd.concat([hours_out, months_out], axis=1)
371 | times_out = times_out_df.values
372 |
373 | # create sin / cos of input times
374 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
375 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
376 |
377 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
378 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
379 |
380 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
381 |
382 | # print(times_out_hour_cos[:50])
383 | labels['MW'] = labels['MW'].astype('float32')
384 |
385 | #normalise labels
386 | scaler = StandardScaler(with_mean=False)
387 | # scaler = MinMaxScaler()
388 | labels[['MW']] = scaler.fit_transform(labels[['MW']])
389 |
390 | # save the scaler for inference
391 | dump(scaler, open('../../data/processed/wind/_scaler/scaler_wind_v3.pkl', 'wb'))
392 |
393 | # make single array for
394 | time_refs = [in_times, label_times]
395 |
396 | # one-hot method
397 | # input_times = times_in_df.values
398 | # output_times = times_out_df.values
399 |
400 | # cyclic method
401 | output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_year), axis=-1)
402 |
403 | labels = labels.values
404 |
405 | # testing input 24hr and 48hr input data - convert to 48hrs for X2
406 | input_times = output_times
407 |
408 | # add labels to inputs
409 | broadcaster = np.ones((feature_array_final.shape[0], feature_array_final.shape[1], feature_array_final.shape[2], 1), dtype=np.float32)
410 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2)
411 | feature_array_final = np.concatenate((broadcaster, feature_array_final), axis = -1)
412 |
413 |
414 | # decalre train test split
415 | test_split_seq = 8544 # use the last 100 days, around 10%
416 |
417 | # create dataset
418 | dataset = {
419 | 'train_set' : {
420 | 'X1_train': feature_array_final[:-test_split_seq],
421 | 'X2_train': input_times[:-test_split_seq], # input time features
422 | 'X3_train': output_times[:-test_split_seq], # output time features
423 | 'y_train': labels[:-test_split_seq]
424 | },
425 | 'test_set' : {
426 | 'X1_test': feature_array_final[-test_split_seq:],
427 | 'X2_test': input_times[-test_split_seq:],
428 | 'X3_test': output_times[-test_split_seq:],
429 | 'y_test': labels[-test_split_seq:]
430 | }
431 | }
432 |
433 | time_refs = {
434 | 'input_times_train': in_times[:-test_split_seq],
435 | 'input_times_test': in_times[-test_split_seq:],
436 | 'output_times_train': label_times[:-test_split_seq],
437 | 'output_times_test': label_times[-test_split_seq:]
438 | }
439 |
440 | return dataset, time_refs
441 |
442 |
443 | ###### SOLAR ##############################################################################################################################################
444 |
445 | # function to process data in train and test sets
446 | def solar_data_processing(filepaths, labels, input_seq_size, output_seq_size, workingDir):
447 |
448 | #get dictionary keys
449 | keys = list(filepaths.keys())
450 |
451 | #dictionaries for extracted vars
452 | vars_extract = {}
453 | vars_extract_filtered = {}
454 | vars_extract_filtered_masked = {}
455 | vars_extract_filtered_masked_norm = {}
456 |
457 | #define daylight hours mask - relative to total solar radiation
458 | # solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation')
459 | # solar_rad_reference = lv_filter(solar_rad_reference['data'])
460 | # daylight_hr_mask = solar_rad_reference > 0
461 |
462 | #cache matrix dimensions
463 | # dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]]
464 |
465 | #loop to extract data features
466 | for i, key in enumerate(filepaths):
467 | vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files
468 |
469 | #break in 1-iteration to get time features & cache dimensions
470 | if i == 0:
471 | times_in = vars_extract[str(key)]['time']
472 | dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]]
473 |
474 | vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data
475 | # vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data
476 | # scaler = MinMaxScaler() #normalise data
477 | scaler = StandardScaler(with_mean=False)
478 | vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2])
479 |
480 |
481 | #stack features into one matrix
482 | feature_array = [vars_extract_filtered_masked_norm[str(i)] for i in vars_extract_filtered_masked_norm]
483 | feature_array = np.stack([x for x in vars_extract_filtered_masked_norm.values()], axis = -1)
484 |
485 | # interpolate feature array from 24hrs to 48hrs
486 | feature_array = interpolate_4d(feature_array)
487 |
488 | # remove nan values - by day
489 | outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any')
490 |
491 |
492 | # apply mask, removing days with more than one nan value
493 | feature_array = feature_array[~outputs_mask]
494 | labels = labels[~outputs_mask]
495 |
496 | dimensions = feature_array.shape
497 |
498 | #Do time feature engineering for input times
499 | times_in = pd.DataFrame({"datetime": times_in})
500 | times_in['datetime'] = times_in['datetime'].astype('str')
501 | times_in['datetime'] = pd.to_datetime(times_in['datetime'])
502 | times_in.set_index('datetime', inplace = True)
503 | in_times = times_in.index
504 |
505 | # get hours and months from datetime
506 | hour_in = times_in.index.hour
507 | hour_in = np.float32(hour_in)
508 |
509 | # add HH to hours
510 | index = 0
511 | for idx, time in enumerate(hour_in):
512 | if time == 24:
513 | index += 1
514 | else:
515 | hour_in = np.insert(hour_in, index+1, time+0.5)
516 | index += 2
517 |
518 | month_in = times_in.index.month - 1
519 | year_in = times_in.index.year
520 |
521 | # duplicate months to compensate for switch from 24hr to 48hr input data
522 | index = 0
523 | for idx, month in enumerate(month_in):
524 | if idx % 24 == 0:
525 | index += 1
526 | else:
527 | month_in = np.insert(month_in, index+1, month)
528 | index += 2
529 |
530 | # create one_hot encoding input times: hour and month
531 | one_hot_months_in = pd.get_dummies(month_in, prefix='month_')
532 | one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_')
533 |
534 | times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1)
535 | times_in = times_in_df.values
536 |
537 | # create sin / cos of input times
538 | times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1)
539 | times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1)
540 |
541 | times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1)
542 | times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1)
543 |
544 | times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times))
545 |
546 | #Process output times as secondary input for decoder
547 | #cache output times
548 | label_times = labels.index
549 |
550 | #declare 'output' time features
551 | df_times_outputs = pd.DataFrame()
552 | df_times_outputs['hour'] = labels.index.hour
553 | df_times_outputs['month'] = labels.index.month - 1
554 | df_times_outputs['year'] = labels.index.year
555 |
556 | #process output times for half hours
557 | for idx, row in df_times_outputs.iterrows():
558 | if idx % 2 != 0:
559 | df_times_outputs.iloc[idx, 0] = df_times_outputs.iloc[idx, 0] + 0.5
560 |
561 | months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_')
562 | hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_')
563 |
564 | times_out_df = pd.concat([hours_out, months_out], axis=1)
565 | times_out = times_out_df.values
566 |
567 | # create sin / cos of input times
568 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
569 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
570 |
571 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
572 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
573 |
574 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
575 |
576 | # normalise y labels
577 | scaler = StandardScaler(with_mean=False)
578 | # scaler = MinMaxScaler()
579 | labels[['MW']] = scaler.fit_transform(labels[['MW']])
580 |
581 | # save the scaler for inference
582 | dump(scaler, open('../../data/processed/solar/_scaler/scaler_solar_v4.pkl', 'wb'))
583 |
584 | in_times = label_times
585 | time_refs = [in_times, label_times]
586 |
587 | # one-hot method
588 | # input_times = times_in_df.values
589 | # output_times = times_out_df.values
590 |
591 | # cyclic method
592 | # input_times = np.concatenate((times_in_hour_sin, times_in_hour_cos, times_in_month_sin, times_in_month_cos), axis=-1) swtich to output times for HH periods
593 | output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_year), axis=-1)
594 |
595 | labels = labels.values
596 |
597 | # add labels to inputs
598 | print('combining feature array with lagged outputs')
599 | broadcaster = np.ones((feature_array.shape[0], feature_array.shape[1], feature_array.shape[2], 1), dtype=np.float32)
600 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2)
601 | feature_array = np.concatenate((broadcaster, feature_array), axis = -1)
602 |
603 | # testing input 24hr and 48hr input data - convert to 48hrs for X2
604 | input_times = output_times
605 |
606 | test_split_seq = 8544 # use the last 100 days, around 10%
607 |
608 | # create dataset
609 | dataset = {
610 | 'train_set' : {
611 | 'X1_train': feature_array[:-test_split_seq],
612 | 'X2_train': input_times[:-test_split_seq], # input time features
613 | 'X3_train': output_times[:-test_split_seq], # output time features
614 | 'y_train': labels[:-test_split_seq]
615 | },
616 | 'test_set' : {
617 | 'X1_test': feature_array[-test_split_seq:],
618 | 'X2_test': input_times[-test_split_seq:],
619 | 'X3_test': output_times[-test_split_seq:],
620 | 'y_test': labels[-test_split_seq:]
621 | }
622 | }
623 |
624 | time_refs = {
625 | 'input_times_train': in_times[:-test_split_seq],
626 | 'input_times_test': in_times[-test_split_seq:],
627 | 'output_times_train': label_times[:-test_split_seq],
628 | 'output_times_test': label_times[-test_split_seq:]
629 | }
630 |
631 | return dataset, time_refs
632 | # return train_set, test_set, time_refs
633 |
634 | ###### DEMAND ##############################################################################################################################################
635 |
636 | #function to process data in train and test sets
637 | def demand_data_processing(filepaths, labels, workingDir):
638 |
639 | #get dictionary keys
640 | keys = list(filepaths.keys())
641 |
642 | #dictionaries for extracted vars
643 | vars_extract = {}
644 | vars_extract_filtered = {}
645 | vars_extract_filtered_masked = {}
646 | vars_extract_filtered_masked_norm = {}
647 |
648 | #define daylight hours mask - relative to total solar radiation
649 | # solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation')
650 | # solar_rad_reference = lv_filter(solar_rad_reference['data'])
651 | # daylight_hr_mask = solar_rad_reference > 0
652 |
653 | #cache matrix dimensions
654 | # dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]]
655 |
656 | #loop to extract data features
657 | for i, key in enumerate(filepaths):
658 | vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files
659 |
660 | #break in 1-iteration to get time features & cache dimensions
661 | if i == 0:
662 | times_in = vars_extract[str(key)]['time']
663 | dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]]
664 |
665 | vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data
666 | # vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data
667 | # scaler = MinMaxScaler() #normalise data
668 | scaler = StandardScaler(with_mean=False)
669 | vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2])
670 |
671 | #stack features into one matrix
672 | feature_array = [vars_extract_filtered_masked_norm[str(i)] for i in vars_extract_filtered_masked_norm]
673 | feature_array = np.stack(feature_array, axis = -1)
674 | # feature_array = np.concatenate((feature_array, input_timefeatures), axis = -1)
675 |
676 | # interpolate feature array from 24hrs to 48hrs
677 | feature_array = interpolate_4d(feature_array)
678 |
679 | # remove nan values
680 | outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any')
681 |
682 | # apply mask, removing days with more than one nan value
683 | feature_array = feature_array[~outputs_mask]
684 | labels = labels[~outputs_mask]
685 |
686 | # do time feature engineering for input times
687 | times_in = pd.DataFrame({"datetime": times_in})
688 | times_in['datetime'] = times_in['datetime'].astype('str')
689 | times_in['datetime'] = pd.to_datetime(times_in['datetime'])
690 | times_in.set_index('datetime', inplace = True)
691 | in_times = times_in.index
692 |
693 | # get hours and months from datetime
694 | hour_in = times_in.index.hour
695 | hour_in = np.float32(hour_in)
696 |
697 | # add HH to hours
698 | index = 0
699 | for idx, time in enumerate(hour_in):
700 | if time == 24:
701 | index += 1
702 | else:
703 | hour_in = np.insert(hour_in, index+1, time+0.5)
704 | index += 2
705 |
706 | month_in = times_in.index.month - 1
707 | year_in = times_in.index.year
708 |
709 | # duplicate months to compensate for switch from 24hr to 48hr input data
710 | index = 0
711 | for idx, month in enumerate(month_in):
712 | if idx % 24 == 0:
713 | index += 1
714 | else:
715 | month_in = np.insert(month_in, index+1, month)
716 | index += 2
717 |
718 | # create one_hot encoding input times: hour and month
719 | one_hot_months_in = pd.get_dummies(month_in, prefix='month_')
720 | one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_')
721 |
722 | times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1)
723 | times_in = times_in_df.values
724 |
725 | # create sin / cos of input times
726 | times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1)
727 | times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1)
728 |
729 | times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1)
730 | times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1)
731 |
732 | times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times))
733 |
734 | #Process output times as secondary input for decoder
735 | #cache output times
736 | label_times = labels.index
737 |
738 | #declare 'output' time features
739 | df_times_outputs = pd.DataFrame()
740 | df_times_outputs['date'] = labels.index.date
741 | df_times_outputs['hour'] = labels.index.hour
742 | df_times_outputs['month'] = labels.index.month - 1
743 | df_times_outputs['year'] = labels.index.year
744 | df_times_outputs['day_of_week'] = labels.index.dayofweek
745 | df_times_outputs['day_of_year'] = labels.index.dayofyear - 1
746 | df_times_outputs['weekend'] = df_times_outputs['day_of_week'].apply(lambda x: 1 if x>=5 else 0)
747 |
748 |
749 | # account for bank / public holidays
750 | start_date = labels.index.min()
751 | end_date = labels.index.max()
752 | start_year = df_times_outputs['year'].min()
753 | end_year = df_times_outputs['year'].max()
754 |
755 | holidays = set(holiday[0]
756 | for year in range(start_year, end_year + 1)
757 | for holiday in cal.holidays(year)
758 | if start_date <= holiday[0] <= end_date)
759 |
760 | df_times_outputs['holiday'] = df_times_outputs['date'].isin(holidays).astype(int)
761 |
762 | #process output times for half hours
763 | for idx, row in df_times_outputs.iterrows():
764 | if idx % 2 != 0:
765 | df_times_outputs.iloc[idx, 1] = df_times_outputs.iloc[idx, 1] + 0.5
766 |
767 | months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_')
768 | hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_')
769 |
770 | times_out_df = pd.concat([hours_out, months_out], axis=1)
771 | times_out = times_out_df.values
772 |
773 | # create sin / cos of output hour
774 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
775 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
776 |
777 | # create sin / cos of output month
778 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
779 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
780 |
781 | # create sin / cos of output year
782 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
783 |
784 | # create sin / cos of output day of week
785 | times_out_DoW_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
786 | times_out_DoW_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
787 |
788 | # create sin / cos of output day of year
789 | times_out_DoY_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)
790 | times_out_DoY_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)
791 |
792 | #normalise labels
793 | scaler = StandardScaler(with_mean=False)
794 | labels[['MW']] = scaler.fit_transform(labels[['MW']])
795 |
796 | # save the scaler for inference
797 | dump(scaler, open('../../data/processed/demand/_scaler/scaler_demand_v2.pkl', 'wb'))
798 |
799 | time_refs = [in_times, label_times]
800 |
801 | # one-hot method
802 | # input_times = times_in_df.values
803 | # output_times = times_out_df.values
804 |
805 | weekends = np.expand_dims(df_times_outputs['weekend'].values, axis =-1)
806 | holidays = np.expand_dims(df_times_outputs['holiday'].values, axis =-1)
807 |
808 | # cyclic method
809 | # input_times = np.concatenate((times_in_hour_sin, times_in_hour_cos, times_in_month_sin, times_in_month_cos), axis=-1) swtich to output times for HH periods
810 | output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_DoW_sin, times_out_DoW_cos,
811 | times_out_DoY_sin, times_out_DoY_cos, times_out_year, weekends, holidays), axis=-1)
812 |
813 | labels = labels.values
814 |
815 | # testing input 24hr and 48hr input data - convert to 48hrs for X2
816 | input_times = output_times
817 |
818 | # add labels to inputs
819 | print('combining feature array with lagged outputs')
820 | broadcaster = np.ones((feature_array.shape[0], feature_array.shape[1], feature_array.shape[2], 1), dtype=np.float32)
821 | broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2)
822 | feature_array = np.concatenate((broadcaster, feature_array), axis = -1)
823 |
824 | #divide into timesteps & train and test sets
825 | # dataset, time_refs = format_data_into_timesteps(X1 = feature_array, X2 = input_times , X3 = output_times, Y = labels, input_seq_size = 240, output_seq_size = 48, input_times_reference = time_refs[1], output_times_reference = time_refs[1]) # converting from 24hr to 48hr inputs hence can use output time references
826 | # train_set, test_set, time_refs
827 |
828 | # def to_float32(input_dict):
829 | # for idx, key in enumerate(input_dict.keys()):
830 | # input_dict[key] = input_dict[key].astype(np.float32)
831 | # return input_dict
832 |
833 | # train_set = to_float32(train_set)
834 | # test_set = to_float32(test_set)
835 |
836 | test_split_seq = 8544 # use the last 100 days, around 10%
837 |
838 | # input_test_seq = test_split_seq + (input_seq_size - 1)
839 | # output_test_seq = test_split_seq + (output_seq_size - 1)
840 |
841 | # create dataset
842 | dataset = {
843 | 'train_set' : {
844 | 'X1_train': feature_array[:-test_split_seq],
845 | 'X2_train': input_times[:-test_split_seq], # input time features
846 | 'X3_train': output_times[:-test_split_seq], # output time features
847 | 'y_train': labels[:-test_split_seq]
848 | },
849 | 'test_set' : {
850 | 'X1_test': feature_array[-test_split_seq:],
851 | 'X2_test': input_times[-test_split_seq:],
852 | 'X3_test': output_times[-test_split_seq:],
853 | 'y_test': labels[-test_split_seq:]
854 | }
855 | }
856 |
857 | time_refs = {
858 | 'input_times_train': label_times[:-test_split_seq],
859 | 'input_times_test': label_times[-test_split_seq:],
860 | 'output_times_train': label_times[:-test_split_seq],
861 | 'output_times_test': label_times[-test_split_seq:]
862 | }
863 |
864 | # def to_float32(input_dict):
865 | # for idx, key in enumerate(input_dict.keys()):
866 | # input_dict[key] = input_dict[key].astype(np.float32)
867 | # return input_dict
868 |
869 | # train_set = to_float32(train_set)
870 | # test_set = to_float32(test_set)
871 |
872 | return dataset, time_refs
873 | # return train_set, test_set, time_refs
874 |
875 |
876 |
--------------------------------------------------------------------------------
/visualisations/cloud_cover_(input)_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/cloud_cover_(input)_animation.gif
--------------------------------------------------------------------------------
/visualisations/d3_quantile_plot_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_quantile_plot_examples.png
--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_demand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_demand.png
--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_price.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_price.png
--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_solar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_solar.png
--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_wind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_wind.png
--------------------------------------------------------------------------------
/visualisations/memory_leak_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/memory_leak_test.png
--------------------------------------------------------------------------------
/visualisations/model_architecture_schematic_markup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/model_architecture_schematic_markup.png
--------------------------------------------------------------------------------
/visualisations/performance_breakdown_markup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/performance_breakdown_markup.png
--------------------------------------------------------------------------------
/visualisations/solar_spatial_attentions_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/solar_spatial_attentions_animation.gif
--------------------------------------------------------------------------------
/visualisations/tabular_performance_breakdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/tabular_performance_breakdown.png
--------------------------------------------------------------------------------