├── .gitignore
├── README.md
├── data
    ├── processed
    │   └── README.md
    └── raw
    │   └── README.md
├── models
    ├── bilstm
    │   ├── demand
    │   │   └── q_all_bilstm
    │   │   │   └── demand_bilstm.h5
    │   ├── price
    │   │   └── q_all_bilstm
    │   │   │   └── price_bilstm.h5
    │   ├── solar
    │   │   └── q_all_bilstm
    │   │   │   └── solar_bilstm.h5
    │   └── wind
    │   │   └── q_all_bilstm
    │   │       └── wind_bilstm.h5
    ├── seq2seq+temporal+spatial
    │   ├── demand
    │   │   └── q_all_seq2seq+temporal+spatial
    │   │   │   ├── demand_main.h5
    │   │   │   ├── demand_spatial_enc.h5
    │   │   │   └── demand_temporal_enc.h5
    │   ├── solar
    │   │   └── q_all_seq2seq+temporal+spatial
    │   │   │   ├── solar_main.h5
    │   │   │   ├── solar_spatial_enc.h5
    │   │   │   └── solar_temporal_enc.h5
    │   └── wind
    │   │   └── q_all_seq2seq+temporal+spatial
    │   │       ├── wind_main.h5
    │   │       ├── wind_spatial_enc.h5
    │   │       └── wind_temporal_enc.h5
    ├── seq2seq+temporal
    │   ├── demand
    │   │   └── q_all_seq2seq+temporal
    │   │   │   ├── demand_seq2seq+temporal.h5
    │   │   │   └── demand_seq2seq+temporal_enc.h5
    │   ├── price
    │   │   └── q_all_seq2seq+temporal
    │   │   │   ├── price_seq2seq+temporal.h5
    │   │   │   └── price_seq2seq+temporal_enc.h5
    │   ├── solar
    │   │   └── q_all_seq2seq+temporal
    │   │   │   ├── solar_seq2seq+temporal.h5
    │   │   │   └── solar_seq2seq+temporal_enc.h5
    │   └── wind
    │   │   └── q_all_seq2seq+temporal
    │   │       ├── wind_seq2seq+temporal.h5
    │   │       └── wind_seq2seq+temporal_enc.h5
    └── seq2seq
    │   ├── demand
    │       └── q_all_seq2seq
    │       │   └── demand_seq2seq.h5
    │   ├── price
    │       └── q_all_seq2seq
    │       │   └── price_seq2seq.h5
    │   ├── solar
    │       └── q_all_seq2seq
    │       │   └── solar_seq2seq.h5
    │   └── wind
    │       └── q_all_seq2seq
    │           └── wind_seq2seq.h5
├── requirements.txt
├── results
    ├── demand
    │   ├── attention_plot_results_demand.csv
    │   ├── bilstm
    │   │   ├── forecasted_time_series_demand_bilstm.pkl
    │   │   ├── preformance_summary_demand_bilstm.csv
    │   │   ├── q_all_bilstm
    │   │   │   └── demand_bilstm.h5
    │   │   └── quantile_prediction_results_demand_bilstm.csv
    │   ├── seq2seq+temporal+spatial
    │   │   ├── forecasted_time_series_demand_seq2seq+temporal+spatial.pkl
    │   │   ├── preformance_summary_demand_seq2seq+temporal+spatial.csv
    │   │   └── quantile_prediction_results_demand_seq2seq+temporal+spatial.csv
    │   ├── seq2seq+temporal
    │   │   ├── forecasted_time_series_demand_seq2seq+temporal.pkl
    │   │   ├── preformance_summary_demand_seq2seq+temporal.csv
    │   │   └── quantile_prediction_results_demand_seq2seq+temporal.csv
    │   └── seq2seq
    │   │   ├── forecasted_time_series_demand_seq2seq.pkl
    │   │   ├── preformance_summary_demand_seq2seq.csv
    │   │   └── quantile_prediction_results_demand_seq2seq.csv
    ├── price
    │   ├── attention_plot_results_price.csv
    │   ├── bilstm
    │   │   ├── forecasted_time_series_price_bilstm.pkl
    │   │   └── preformance_summary_price_bilstm.csv
    │   ├── seq2seq+temporal+spatial
    │   │   ├── forecasted_time_series_price_seq2seq+temporal+spatial.pkl
    │   │   └── preformance_summary_price_seq2seq+temporal+spatial.csv
    │   ├── seq2seq+temporal
    │   │   ├── attention_data_price_seq2seq+temporal.pkl
    │   │   ├── forecasted_time_series_price_seq2seq+temporal.pkl
    │   │   ├── preformance_summary_price_seq2seq+temporal.csv
    │   │   └── quantile_prediction_results_price_seq2seq+temporal.csv
    │   └── seq2seq
    │   │   ├── forecasted_time_series_price_seq2seq.pkl
    │   │   ├── preformance_summary_price_seq2seq.csv
    │   │   └── quantile_prediction_results_price_seq2seq.csv
    ├── solar
    │   ├── attention_plot_results_solar.csv
    │   ├── bilstm
    │   │   ├── forecasted_time_series_solar_bilstm.pkl
    │   │   ├── preformance_summary_solar_bilstm.csv
    │   │   └── quantile_prediction_results_solar_bilstm.csv
    │   ├── seq2seq+temporal+spatial
    │   │   ├── forecasted_time_series_solar_seq2seq+temporal+spatial.pkl
    │   │   ├── preformance_summary_solar_seq2seq+temporal+spatial.csv
    │   │   ├── quantile_prediction_results_solar_seq2seq+temporal+spatial.csv
    │   │   └── spatial_attention_data_solar.pkl
    │   ├── seq2seq+temporal
    │   │   ├── forecasted_time_series_solar_seq2seq+temporal.pkl
    │   │   ├── preformance_summary_solar_seq2seq+temporal.csv
    │   │   └── quantile_prediction_results_solar_seq2seq+temporal.csv
    │   └── seq2seq
    │   │   ├── forecasted_time_series_solar_seq2seq.pkl
    │   │   └── preformance_summary_solar_seq2seq.csv
    └── wind
    │   ├── attention_plot_results_wind.csv
    │   ├── bilstm
    │       ├── forecasted_time_series_wind_bilstm.pkl
    │       └── preformance_summary_wind_bilstm.csv
    │   ├── seq2seq+temporal+spatial
    │       ├── forecasted_time_series_wind_seq2seq+temporal+spatial.pkl
    │       ├── preformance_summary_wind_seq2seq+temporal+spatial.csv
    │       └── quantile_prediction_results_wind_seq2seq+temporal+spatial.csv
    │   ├── seq2seq+temporal
    │       ├── forecasted_time_series_wind_seq2seq+temporal.pkl
    │       ├── preformance_summary_wind_seq2seq+temporal.csv
    │       └── quantile_prediction_results_wind_seq2seq+temporal.csv
    │   └── seq2seq
    │       ├── forecasted_time_series_wind_seq2seq.pkl
    │       ├── preformance_summary_wind_seq2seq.csv
    │       └── quantile_prediction_results_wind_seq2seq.csv
├── scripts
    ├── models
    │   ├── _shared
    │   │   ├── __pycache__
    │   │   │   ├── attention_layer.cpython-38.pyc
    │   │   │   └── timeseries_data_generator.cpython-38.pyc
    │   │   ├── attention_layer.py
    │   │   └── timeseries_data_generator.py
    │   ├── bilstm_model.py
    │   ├── inference+testing
    │   │   ├── bilstm_seq2seq_predictions.py
    │   │   └── inference_model_seq2seq+spatial+temporal_attn.py
    │   ├── seq2seq+spatial+temporal_attn.py
    │   ├── seq2seq+temporal_attn.py
    │   └── seq2seq_model.py
    ├── postprocessing
    │   ├── d3_scripts
    │   │   ├── Context_graph.js
    │   │   └── forecasting_graph.js
    │   ├── format_results_Qforecast_plot.py
    │   ├── format_results_attn_plot.py
    │   ├── results_summary.py
    │   └── spatial_attention_plots.py
    └── preprocessing
    │   ├── ERA5_downloader.py
    │   ├── __pycache__
    │       └── preprocessing_funcs.cpython-38.pyc
    │   ├── data_preprocessing_demand.py
    │   ├── data_preprocessing_price.py
    │   ├── data_preprocessing_solar.py
    │   ├── data_preprocessing_wind.py
    │   └── preprocessing_funcs.py
└── visualisations
    ├── cloud_cover_(input)_animation.gif
    ├── d3_quantile_plot_examples.png
    ├── d3_temporal_attention_plot_demand.png
    ├── d3_temporal_attention_plot_price.png
    ├── d3_temporal_attention_plot_solar.png
    ├── d3_temporal_attention_plot_wind.png
    ├── memory_leak_test.png
    ├── model_architecture_schematic.svg
    ├── model_architecture_schematic_markup.png
    ├── performance_breakdown_markup.png
    ├── solar_spatial_attentions_animation.gif
    └── tabular_performance_breakdown.png


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Probabilistic Forecasting of Renewable Energy Generation and Wholesale Market Prices Using Quantile Regression in Keras
 2 | :rocket: Blog post on personal website :link: [Probabilistic Forecasting of Renewable Generation & Wholesale Prices with Quantile-Regression](https://richardfindlay.co.uk/probabilistic-forecasting-of-renewable-generation-and-wholesale-prices-with-quantile-regression-2)
 3 | 
 4 | <p align="center">
 5 |   <img src="https://github.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/blob/main/visualisations/d3_quantile_plot_examples.png" />
 6 |   screenshot of interactive d3.js plots illustrating probabilistic forecasting performance
 7 | </p>
 8 | 
 9 | ### Project Description :open_book::
10 | This repository demonstrates the use of deep learning techniques in combination with quantile regression to produce probabilistic forecasts. The above figure depicts the consecutive DA quantile forecasts for each of the investigated variables over one week, with further quantification and discussion given on the forecast performance given in the accompanying [blog post](https://richardfindlay.co.uk/probabilistic-forecasting-of-renewable-generation-and-wholesale-prices-with-quantile-regression-2).
11 | 
12 | The code investigates the performance of four different deep-learning architectures; Bi-directional LSTM, Seq-2-Seq, Seq-2-Seq with Temporal Attention and Seq-2-Seq with Temporal and Spatial Attention. To help give context, comparisons are made to a simplistic daily persistence forecasting technique, as well as to the Transmission System Operator's forecast (TSO). The models are predicated off the notion that there is an increased complexity added at each iteration, which accompanied the hypothesis that an increased performance should be observed between each iteration, which was not the case when test performance was investigated.
13 | 
14 | <p align="center">
15 |   <img src="https://github.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/blob/main/visualisations/model_architecture_schematic_markup.png" />
16 |   model architecture schematic for encoder-decoder with spatial and temporal attention mechanisms as implemented in keras
17 | </p>
18 | 
19 | ### Performance Overview :racing_car::
20 | The above figure illustrates the pinnacle of the model complexity investigated as part of this project. With both temporal and spatial attention mechanisms, the novel encoder-decoder architecture does not always prevail as the best preforming technique but shows encourging performance and may merit further investigation and fine-tuning. 
21 | 
22 | <p align="center">
23 |   <img src="https://github.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/blob/main/visualisations/d3_temporal_attention_plot_solar.png" />
24 | </p>
25 | 
26 | The above plot illustrates the performance of the temporal attention mechanism for the prior 7-days of features inputted into the model, the attention weights show there's a recognition of temporal patterns within the data, paying particular attention to the previous day for the proceeding forecast. Similarly, the below gif depicts the performance of the spatial attention weights in the solar generation forecast, again this shows some promising indication of the mechanism recognising the influence of solar irradiance to the forecast.
27 | 
28 | <p align="center">
29 |   <img src="https://github.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/blob/main/visualisations/solar_spatial_attentions_animation.gif" width="450"/>
30 | </p>
31 | 
32 | Quantative performance breakdown of all investigated deep learning architectures, given below, alongside TSO and persistence forecasting performances. 
33 | 
34 | <p align="center">
35 |   <img src="https://github.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/blob/main/visualisations/tabular_performance_breakdown.png"/>
36 | </p>
37 | 
38 | ### Notes on Code :notebook::
39 | Install python dependencies for repository:
40 | ```
41 | $ pip install -r requirements.txt
42 | ```
43 | 
44 | :weight_lifting: Training for all models was conducted on a Google Colab Pro+ subscription.
45 | 
46 | ###  Further Work :telescope:: 
47 | - [ ] Insightfulness of study could be broadened by analysing additional ML architectures alongside the variations of RNNs examined here, particularly XGBoost and transformers.
48 | - [ ] Problem pushes limitations of high-level DL frameworks, adopting Pytorch or Tensorflow could allow for increased efficiency and performance.
49 | 
50 | ### To Do :test_tube:: 
51 | - [ ] Code links and references to be validated since re-organisation.
52 | - [ ] Clean code, especially interactive d3 plots.
53 | - [ ] Further validate environments and optimisation scripts.
54 | 
55 | ### Resources :gem:: 
56 | + [https://www.elexon.co.uk/documents/training-guidance/bsc-guidance-notes/bmrs-api-and-data-push-user-guide-2/](https://www.elexon.co.uk/documents/training-guidance/bsc-guidance-notes/bmrs-api-and-data-push-user-guide-2/)
57 | + [https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly](https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly)
58 | + [https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels?tab=overview)
59 | + [https://colah.github.io/posts/2015-08-Understanding-LSTMs](https://colah.github.io/posts/2015-08-Understanding-LSTMs)
60 | + [https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html](https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html)
61 | + [https://colab.research.google.com/github/kmkarakaya/ML_tutorials/blob/master/seq2seq_Part_D_Encoder_Decoder_with_Teacher_Forcing.ipynb](https://colab.research.google.com/github/kmkarakaya/ML_tutorials/blob/master/seq2seq_Part_D_Encoder_Decoder_with_Teacher_Forcing.ipynb)
62 | 


--------------------------------------------------------------------------------
/data/processed/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/data/raw/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/models/bilstm/demand/q_all_bilstm/demand_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/demand/q_all_bilstm/demand_bilstm.h5


--------------------------------------------------------------------------------
/models/bilstm/price/q_all_bilstm/price_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/price/q_all_bilstm/price_bilstm.h5


--------------------------------------------------------------------------------
/models/bilstm/solar/q_all_bilstm/solar_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/solar/q_all_bilstm/solar_bilstm.h5


--------------------------------------------------------------------------------
/models/bilstm/wind/q_all_bilstm/wind_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/bilstm/wind/q_all_bilstm/wind_bilstm.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_main.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_main.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_spatial_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_spatial_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/demand/q_all_seq2seq+temporal+spatial/demand_temporal_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_main.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_main.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_spatial_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_spatial_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/solar/q_all_seq2seq+temporal+spatial/solar_temporal_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_main.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_main.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_spatial_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_spatial_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal+spatial/wind/q_all_seq2seq+temporal+spatial/wind_temporal_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/demand/q_all_seq2seq+temporal/demand_seq2seq+temporal_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/price/q_all_seq2seq+temporal/price_seq2seq+temporal_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/solar/q_all_seq2seq+temporal/solar_seq2seq+temporal_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal.h5


--------------------------------------------------------------------------------
/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal_enc.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq+temporal/wind/q_all_seq2seq+temporal/wind_seq2seq+temporal_enc.h5


--------------------------------------------------------------------------------
/models/seq2seq/demand/q_all_seq2seq/demand_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/demand/q_all_seq2seq/demand_seq2seq.h5


--------------------------------------------------------------------------------
/models/seq2seq/price/q_all_seq2seq/price_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/price/q_all_seq2seq/price_seq2seq.h5


--------------------------------------------------------------------------------
/models/seq2seq/solar/q_all_seq2seq/solar_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/solar/q_all_seq2seq/solar_seq2seq.h5


--------------------------------------------------------------------------------
/models/seq2seq/wind/q_all_seq2seq/wind_seq2seq.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/models/seq2seq/wind/q_all_seq2seq/wind_seq2seq.h5


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.11.0
  2 | aiohttp==3.7.4
  3 | alabaster==0.7.12
  4 | alpaca==1.0.0
  5 | alpaca-trade-api==1.4.0
  6 | appdirs==1.4.4
  7 | arrow==0.17.0
  8 | astunparse==1.6.3
  9 | asv==0.4.2
 10 | async-timeout==3.0.1
 11 | attrs==20.3.0
 12 | Babel==2.9.0
 13 | beautifulsoup4==4.9.3
 14 | black==20.8b1
 15 | boto3==1.16.23
 16 | botocore==1.19.23
 17 | cachetools==4.1.1
 18 | cdsapi==0.5.1
 19 | certifi==2020.6.20
 20 | cfgv==3.2.0
 21 | cftime==1.1.3
 22 | chardet==3.0.4
 23 | click==7.1.2
 24 | cloudpickle==1.6.0
 25 | coloredlogs==15.0
 26 | configparser==5.0.1
 27 | cycler==0.10.0
 28 | decorator==4.4.2
 29 | distlib==0.3.1
 30 | docopt==0.6.2
 31 | docutils==0.16
 32 | ez-setup==0.9
 33 | filelock==3.0.12
 34 | flatbuffers==1.12
 35 | FLORIS==2.2.3
 36 | future==0.18.2
 37 | gast==0.3.3
 38 | google-auth==1.18.0
 39 | google-auth-oauthlib==0.4.1
 40 | google-pasta==0.2.0
 41 | grpcio==1.32.0
 42 | gym==0.18.0
 43 | h5py==2.10.0
 44 | humanfriendly==9.1
 45 | identify==1.5.12
 46 | idna==2.10
 47 | ImageHash==4.2.0
 48 | imageio==2.9.0
 49 | imageio-ffmpeg==0.4.3
 50 | imagesize==1.2.0
 51 | iniconfig==1.1.1
 52 | iris==1.0.7
 53 | Jinja2==2.11.2
 54 | jmespath==0.10.0
 55 | joblib==0.16.0
 56 | Js2Py==0.70
 57 | Keras==2.4.3
 58 | Keras-Preprocessing==1.1.2
 59 | kiwisolver==1.2.0
 60 | lxml==4.6.3
 61 | Markdown==3.2.2
 62 | MarkupSafe==1.1.1
 63 | matplotlib==3.3.0
 64 | mdolab-baseclasses==1.4.0
 65 | memory-profiler==0.57.0
 66 | moviepy==1.0.3
 67 | msgpack==1.0.2
 68 | multidict==5.2.0
 69 | mypy-extensions==0.4.3
 70 | netCDF4==1.5.3
 71 | nodeenv==1.5.0
 72 | nose==1.3.7
 73 | numpy==1.21.2
 74 | oauthlib==3.1.0
 75 | opt-einsum==3.3.0
 76 | packaging==20.8
 77 | pandas==1.2.3
 78 | pathspec==0.8.1
 79 | petsc==3.14.3
 80 | petsc4py==3.14.1
 81 | Pillow==6.2.2
 82 | pipwin==0.5.0
 83 | pluggy==0.13.1
 84 | pockets==0.9.1
 85 | pre-commit==2.9.3
 86 | proglog==0.1.9
 87 | proj==0.2.0
 88 | protobuf==3.12.2
 89 | psutil==5.7.2
 90 | py==1.10.0
 91 | pyasn1==0.4.8
 92 | pyasn1-modules==0.2.8
 93 | pyglet==1.5.0
 94 | Pygments==2.7.4
 95 | pyjsparser==2.7.1
 96 | pyparsing==2.4.7
 97 | PyPrind==2.11.2
 98 | pyproj==3.0.0.post1
 99 | pySmartDL==1.3.4
100 | pytest==6.2.1
101 | python-dateutil==2.8.1
102 | pytz==2020.1
103 | PyWavelets==1.1.1
104 | PyYAML==5.4.1
105 | regex==2020.11.13
106 | requests==2.24.0
107 | requests-oauthlib==1.3.0
108 | rsa==4.6
109 | s3transfer==0.3.3
110 | scikit-learn==0.23.1
111 | scipy==1.6.2
112 | seaborn==0.11.1
113 | six==1.15.0
114 | sklearn==0.0
115 | snowballstemmer==2.0.0
116 | soupsieve==2.1
117 | Sphinx==3.4.3
118 | sphinx-copybutton==0.3.1
119 | sphinx-gallery==0.8.2
120 | sphinx-panels==0.5.2
121 | sphinx-rtd-theme==0.5.1
122 | sphinxcontrib-applehelp==1.0.2
123 | sphinxcontrib-devhelp==1.0.2
124 | sphinxcontrib-htmlhelp==1.0.3
125 | sphinxcontrib-jsmath==1.0.1
126 | sphinxcontrib-napoleon==0.7
127 | sphinxcontrib-qthelp==1.0.3
128 | sphinxcontrib-serializinghtml==1.1.4
129 | sqlitedict==1.7.0
130 | tensorboard==2.4.0
131 | tensorboard-plugin-wit==1.7.0
132 | tensorflow==2.4.1
133 | tensorflow-estimator==2.4.0
134 | termcolor==1.1.0
135 | threadpoolctl==2.1.0
136 | toml==0.10.2
137 | tqdm==4.59.0
138 | typed-ast==1.4.2
139 | typing-extensions==3.7.4.3
140 | tzlocal==2.1
141 | urllib3==1.25.9
142 | virtualenv==20.3.1
143 | websocket-client==1.2.1
144 | websockets==9.1
145 | Werkzeug==1.0.1
146 | wget==3.2
147 | wrapt==1.12.1
148 | yarl==1.7.0
149 | 


--------------------------------------------------------------------------------
/results/demand/bilstm/forecasted_time_series_demand_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/bilstm/forecasted_time_series_demand_bilstm.pkl


--------------------------------------------------------------------------------
/results/demand/bilstm/preformance_summary_demand_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 91.43518519,90,1.435185185,1459.753738,4.733158189,1903.185308


--------------------------------------------------------------------------------
/results/demand/bilstm/q_all_bilstm/demand_bilstm.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/bilstm/q_all_bilstm/demand_bilstm.h5


--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal+spatial/forecasted_time_series_demand_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq+temporal+spatial/forecasted_time_series_demand_seq2seq+temporal+spatial.pkl


--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal+spatial/preformance_summary_demand_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 70.2119883,90,-19.7880117,1519.480105,4.961981752,1903.396953


--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal/forecasted_time_series_demand_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq+temporal/forecasted_time_series_demand_seq2seq+temporal.pkl


--------------------------------------------------------------------------------
/results/demand/seq2seq+temporal/preformance_summary_demand_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 86.70808967,90,-3.291910331,1457.563216,4.907357214,1890.307257


--------------------------------------------------------------------------------
/results/demand/seq2seq/forecasted_time_series_demand_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/demand/seq2seq/forecasted_time_series_demand_seq2seq.pkl


--------------------------------------------------------------------------------
/results/demand/seq2seq/preformance_summary_demand_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 90.83820663,90,0.838206628,1315.00029,4.395793398,1715.146304


--------------------------------------------------------------------------------
/results/price/bilstm/forecasted_time_series_price_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/bilstm/forecasted_time_series_price_bilstm.pkl


--------------------------------------------------------------------------------
/results/price/bilstm/preformance_summary_price_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 72.4537037,90,-17.5462963,6.375387473,inf,8.02840373


--------------------------------------------------------------------------------
/results/price/seq2seq+temporal+spatial/forecasted_time_series_price_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal+spatial/forecasted_time_series_price_seq2seq+temporal+spatial.pkl


--------------------------------------------------------------------------------
/results/price/seq2seq+temporal+spatial/preformance_summary_price_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 69.57846004,90,-20.42153996,7.507209148,inf,9.576368247


--------------------------------------------------------------------------------
/results/price/seq2seq+temporal/attention_data_price_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal/attention_data_price_seq2seq+temporal.pkl


--------------------------------------------------------------------------------
/results/price/seq2seq+temporal/forecasted_time_series_price_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq+temporal/forecasted_time_series_price_seq2seq+temporal.pkl


--------------------------------------------------------------------------------
/results/price/seq2seq+temporal/preformance_summary_price_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 80.17787524,90,-9.822124756,6.526100553,inf,8.47807083


--------------------------------------------------------------------------------
/results/price/seq2seq/forecasted_time_series_price_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/price/seq2seq/forecasted_time_series_price_seq2seq.pkl


--------------------------------------------------------------------------------
/results/price/seq2seq/preformance_summary_price_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 45.45565302,90,-44.54434698,6.49537643,inf,8.052062264


--------------------------------------------------------------------------------
/results/solar/bilstm/forecasted_time_series_solar_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/bilstm/forecasted_time_series_solar_bilstm.pkl


--------------------------------------------------------------------------------
/results/solar/bilstm/preformance_summary_solar_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 92.10526316,90,2.105263158,327.3727615,,689.229032


--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal+spatial/forecasted_time_series_solar_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal+spatial/forecasted_time_series_solar_seq2seq+temporal+spatial.pkl


--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal+spatial/preformance_summary_solar_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 95.27290448,90,5.272904483,270.9945811,,585.2357481


--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal+spatial/spatial_attention_data_solar.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal+spatial/spatial_attention_data_solar.pkl


--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal/forecasted_time_series_solar_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq+temporal/forecasted_time_series_solar_seq2seq+temporal.pkl


--------------------------------------------------------------------------------
/results/solar/seq2seq+temporal/preformance_summary_solar_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 98.0994152,90,8.099415205,291.0802181,,613.8738376


--------------------------------------------------------------------------------
/results/solar/seq2seq/forecasted_time_series_solar_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/solar/seq2seq/forecasted_time_series_solar_seq2seq.pkl


--------------------------------------------------------------------------------
/results/solar/seq2seq/preformance_summary_solar_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,PINAW,PINRW,MAE,MAPE,RMSE
2 | 96.0891812865497,89.99999999999999,6.089181286549717,7.249654247663978,1.6922971719864661,300.5354471646511,,655.2354701170318
3 | 


--------------------------------------------------------------------------------
/results/wind/bilstm/forecasted_time_series_wind_bilstm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/bilstm/forecasted_time_series_wind_bilstm.pkl


--------------------------------------------------------------------------------
/results/wind/bilstm/preformance_summary_wind_bilstm.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 87.8411306,90,-2.158869396,1318.7222,30.59062064,1760.6124


--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal+spatial/forecasted_time_series_wind_seq2seq+temporal+spatial.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq+temporal+spatial/forecasted_time_series_wind_seq2seq+temporal+spatial.pkl


--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal+spatial/preformance_summary_wind_seq2seq+temporal+spatial.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 85.47758285,90,-4.522417154,1062.046295,21.25483009,1383.115011


--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal/forecasted_time_series_wind_seq2seq+temporal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq+temporal/forecasted_time_series_wind_seq2seq+temporal.pkl


--------------------------------------------------------------------------------
/results/wind/seq2seq+temporal/preformance_summary_wind_seq2seq+temporal.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 90.09502924,90,0.09502924,998.0828,19.72910166,1320.7335


--------------------------------------------------------------------------------
/results/wind/seq2seq/forecasted_time_series_wind_seq2seq.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/results/wind/seq2seq/forecasted_time_series_wind_seq2seq.pkl


--------------------------------------------------------------------------------
/results/wind/seq2seq/preformance_summary_wind_seq2seq.csv:
--------------------------------------------------------------------------------
1 | PICP,PINC,ACE,MAE,MAPE,RMSE
2 | 94.79775828,90,4.797758285,937.0275,19.33659166,1251.3384


--------------------------------------------------------------------------------
/scripts/models/_shared/__pycache__/attention_layer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/models/_shared/__pycache__/attention_layer.cpython-38.pyc


--------------------------------------------------------------------------------
/scripts/models/_shared/__pycache__/timeseries_data_generator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/models/_shared/__pycache__/timeseries_data_generator.cpython-38.pyc


--------------------------------------------------------------------------------
/scripts/models/_shared/attention_layer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.keras
 3 | import tensorflow.keras.backend as K
 4 | from tensorflow.keras import Model
 5 | from tensorflow.keras.layers import Dense 
 6 | from tensorflow.keras.layers import Activation, concatenate, Dot
 7 | 
 8 | 
 9 | 
10 | class attention(tf.keras.layers.Layer):
11 | 
12 | 	def __init__(self, hidden_units, **kwargs):
13 | 		# super(attention, self).__init__(hidden_units)
14 | 		self.hidden_units = hidden_units
15 | 		super(attention, self).__init__(**kwargs)
16 | 
17 | 
18 | 	def build(self, input_shape):
19 | 
20 | 		input_dim = int(input_shape[-1])
21 | 
22 | 		self.attention_score_vec = Dense(64, name='attention_score_vec')
23 | 		self.h_t = Dense(64, name='ht')
24 | 		self.attention_score = Dot(axes=[1, 2], name='attention_score')
25 | 		self.attention_weight = Activation('softmax', name='attention_weight')
26 | 		self.context_vector = Dot(axes=[1, 1], name='context_vector')
27 | 		self.attention_vector = Dense(self.hidden_units, activation='tanh', name='attention_vector')
28 | 
29 | 		super(attention, self).build(input_shape)
30 | 
31 | 	def call(self, enc_output, enc_out, h_state, c_state):
32 | 
33 | 
34 | 		score_first_part = self.attention_score_vec(enc_output)
35 |         # score_first_part           dot        last_hidden_state     => attention_weights
36 |         # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
37 | 		h_t = concatenate([h_state, enc_out[:,0,:]])
38 | 		h_t = self.h_t(h_t)
39 | 
40 | 		score = self.attention_score([h_t, score_first_part])
41 | 
42 | 		attention_weights = self.attention_weight(score)
43 |         # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
44 | 		context_vector = self.context_vector([enc_output, attention_weights])
45 | 		pre_activation = concatenate([context_vector, h_t])
46 | 		attention_vector = self.attention_vector(pre_activation)
47 | 
48 | 		attention_weights = K.expand_dims(attention_weights, axis=-1)
49 | 		attention_vector = K.expand_dims(attention_vector, axis=1)
50 | 
51 | 		return [attention_weights, attention_vector]
52 | 
53 | 	def compute_output_shape(self):
54 | 		return [(input_shape[0], Tx, 1), (input_shape[0], 1, n_s)]
55 | 
56 | 	def get_config(self):
57 | 		config = super(attention, self).get_config()
58 | 		config.update({"hidden_units": self.hidden_units})
59 | 		return config
60 | 


--------------------------------------------------------------------------------
/scripts/models/_shared/timeseries_data_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow
 3 | 
 4 | # as adapted from: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
 5 | class DataGenerator(tensorflow.keras.utils.Sequence):
 6 | 
 7 | 	def __init__(self, dataset_name, x_length, y_length, hidden_states, batch_size, shuffle):
 8 | 		self.dataset_name = dataset_name
 9 | 		self.batch_size = batch_size
10 | 		self.shuffle = shuffle
11 | 		self.n_s = hidden_states
12 | 		self.xlen = x_length
13 | 		self.ylen = y_length 
14 | 		self.index_ref = 0         
15 | 		self.on_epoch_end()
16 | 
17 | 	def __len__(self):
18 | 		# 'number of batches per Epoch'      
19 | 		return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size))
20 | 
21 | 	def __getitem__(self, index):
22 | 
23 | 		# input and output indexes relative current batch size and data generator index reference
24 | 		input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))]
25 | 		output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))]
26 | 
27 | 		# Generate data
28 | 		(X_train1, X_train2, X_train3, X_train4, s0, c0), y_train = self.__data_generation(input_indexes, output_indexes)  
29 | 
30 | 		# replicate labels for each quantile
31 | 		y_trues = [y_train for i in quantiles]    
32 | 
33 | 		# extend true values for spatial and temporal attention (only relavant if compiled model used for inference)  
34 | 		# y_trues.extend([[], []]) 
35 |      
36 | 		return (X_train1, X_train2, X_train3, X_train4, s0, c0), (y_trues) # pass empty training outputs to extract extract attentions
37 | 
38 | 	def on_epoch_end(self):
39 | 		# set length of indexes for each epoch
40 | 		self.input_indexes = np.arange(self.xlen)
41 | 		self.output_indexes = np.arange(self.ylen)
42 |  
43 | 		if self.shuffle == True:
44 | 			np.random.shuffle(self.input_indexes)
45 | 
46 | 	def to_sequence(self, x1, x2, x3, x4, y):
47 | 		# convert timeseries batch in sequences
48 | 		input_start, output_start = 0, 0
49 | 
50 | 		seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
51 | 
52 | 		while (input_start + input_seq_size) <= len(x1):
53 | 			# offset handled during pre-processing
54 | 			input_end = input_start + input_seq_size
55 | 			output_end = output_start + output_seq_size
56 | 
57 | 			# inputs
58 | 			seqX1.append(x1[input_start:input_end])
59 | 			seqX2.append(x2[input_start:input_end])
60 | 
61 | 			# outputs
62 | 			seqX3.append(x3[output_start:output_end])
63 | 			seqX4.append(x4[output_start:output_end])
64 | 			seqY.append(y[output_start:output_end])
65 | 
66 | 			input_start += 1  
67 | 			output_start += 1
68 |         
69 |         # convert to numpy arrays
70 | 		seqX1, seqX2, seqX3, seqX4, seqY = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
71 | 		
72 | 		return seqX1, seqX2, seqX3, seqX4, seqY
73 | 
74 | 	def __data_generation(self, input_indexes, output_indexes):
75 | 
76 | 		# load data for current batch
77 | 		f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r")      
78 | 		X_train1 = f['train_set']['X1_train'][input_indexes] # main feature array
79 | 		X_train2 = f['train_set']['X2_train'][input_indexes] # input time features from feature engineering
80 | 		X_train3 = f['train_set']['X3_train'][output_indexes] # output time features from feature engineering
81 | 
82 | 		# no spatial data if model is training for price forecasting
83 | 		if model_type != 'price':        
84 | 			X_train4 = f['train_set']['X1_train'][output_indexes][:,:,:,1:] # all nwp features apart from the generation itself
85 | 			X_train4 = np.average(X_train4, axis=(1,2))
86 | 		else: 
87 | 			X_train4 = f['train_set']['X1_train'][output_indexes][:,1:]
88 | 
89 | 		y_train = f['train_set']['y_train'][output_indexes]
90 | 
91 | 		f.close()  
92 | 
93 |         # convert to sequence data
94 | 		X_train1, X_train2, X_train3, X_train4, y_train = self.to_sequence(X_train1, X_train2, X_train3, X_train4, y_train)
95 | 
96 | 		s0 = np.zeros((self.batch_size, self.n_s))
97 | 		c0 = np.zeros((self.batch_size, self.n_s))
98 | 
99 | 		return (X_train1, X_train2, X_train3, X_train4, s0, c0), y_train


--------------------------------------------------------------------------------
/scripts/models/bilstm_model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys, os
  3 | import h5py 
  4 | import tensorflow as tf
  5 | import tensorflow.keras
  6 | import tensorflow.keras.backend as K
  7 | from tensorflow.keras import Model
  8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 
  9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
 10 | from keras.backend import sigmoid
 11 | from keras.utils.generic_utils import get_custom_objects
 12 | 
 13 | np.set_printoptions(threshold=sys.maxsize)
 14 | tf.random.set_seed(180)
 15 | 
 16 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
 17 | model_type ="solar"
 18 | 
 19 | # declare dataset file
 20 | dataset_name = f'dataset_{model_type}.hdf5'
 21 | 
 22 | # declare quantiles for model
 23 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
 24 | 
 25 | # get useful size parameters
 26 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
 27 | features = np.empty_like(f['train_set']['X1_train'][0])
 28 | times_in = np.empty_like(f['train_set']['X2_train'][0])
 29 | times_out = np.empty_like(f['train_set']['X3_train'][0])
 30 | labels = np.empty_like(f['train_set']['y_train'][0])
 31 | x_len = f['train_set']['X1_train'].shape[0]
 32 | y_len = f['train_set']['y_train'].shape[0]
 33 | f.close()
 34 | 
 35 | # input / output sequence sizes
 36 | input_seq_size = 336
 37 | output_seq_size = 48
 38 | n_s = 32 # number of hidden states used through model
 39 | 
 40 | ###########################################_____DATA_GENERATOR_____#################################################
 41 | 
 42 | # data generator input parameters - avoid shuffle in this case 
 43 | params = {'batch_size': 64,
 44 | 		'shuffle': False } 
 45 | 
 46 | class DataGenerator(tensorflow.keras.utils.Sequence):
 47 | 
 48 | 	def __init__(self, dataset_name, x_length, y_length, batch_size, shuffle):
 49 | 		self.dataset_name = dataset_name
 50 | 		self.batch_size = batch_size
 51 | 		self.shuffle = shuffle
 52 | 		self.xlen = x_length
 53 | 		self.ylen = y_length 
 54 | 		self.index_ref = 0         
 55 | 		self.on_epoch_end()
 56 | 
 57 | 	def __len__(self):
 58 | 		# 'number of batches per Epoch'      
 59 | 		# return int(np.floor((self.xlen - (input_seq_size-1)) / self.batch_size))
 60 | 		return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size))
 61 | 
 62 | 	def __getitem__(self, index):
 63 | 
 64 | 		input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))]
 65 | 		output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))]
 66 | 
 67 | 		# Generate data
 68 | 		(X_train1, X_train2), y_train = self.__data_generation(input_indexes, output_indexes)  
 69 | 
 70 | 		y_trues = [y_train for i in quantiles]    
 71 | 
 72 | 		return (X_train1, X_train2), (y_trues) # pass empty training outputs to extract extract attentions
 73 | 
 74 | 	def on_epoch_end(self):
 75 | 		# set length of indexes for each epoch
 76 | 		self.input_indexes = np.arange(self.xlen)
 77 | 		self.output_indexes = np.arange(self.ylen)
 78 |  
 79 | 		if self.shuffle == True:
 80 | 			np.random.shuffle(self.input_indexes)
 81 | 
 82 | 	def to_sequence(self, x1, x2, y):
 83 | 		# convert timeseries batch in sequences
 84 | 		input_start, output_start = 0, 0
 85 | 
 86 | 		seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
 87 | 
 88 | 		while (input_start + input_seq_size) <= len(x1):
 89 | 			# offset handled during pre-processing
 90 | 			input_end = input_start + input_seq_size
 91 | 			output_end = output_start + output_seq_size
 92 | 
 93 | 			# inputs
 94 | 			seqX1.append(x1[input_start:input_end])
 95 | 			seqX2.append(x2[input_start:input_end])
 96 | 
 97 | 			# outputs
 98 | 			seqY.append(y[output_start:output_end])
 99 | 
100 | 			input_start += 1  
101 | 			output_start += 1
102 |             
103 | 		seqX1, seqX2, seqY = np.array(seqX1), np.array(seqX2), np.array(seqY)
104 | 		
105 | 		return seqX1, seqX2, seqY
106 | 
107 | 	def __data_generation(self, input_indexes, output_indexes):
108 | 
109 | 		f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r")      
110 | 
111 | 		X_train2 = f['train_set']['X2_train'][input_indexes]
112 | 
113 | 		if model_type != 'price':        
114 | 			X_train1 = f['train_set']['X1_train'][input_indexes][:,:,:,:]
115 | 			X_train1 = np.average(X_train1, axis=(1,2))
116 | 		else: 
117 | 			X_train1 = f['train_set']['X1_train'][input_indexes][:,:]
118 |  
119 | 
120 | 		y_train = f['train_set']['y_train'][output_indexes]
121 | 		# decoder_input = f['train_set']['y_train'][output_indexes]
122 | 		f.close()  
123 | 
124 |         # convert to sequence data
125 | 		X_train1, X_train2, y_train = self.to_sequence(X_train1, X_train2, y_train)
126 | 
127 |   
128 | 		return (X_train1, X_train2), y_train
129 | 
130 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len,  **params)
131 | 
132 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
133 | 
134 | # cpature some more useful dimensions
135 | Tx = input_seq_size
136 | Ty = output_seq_size
137 | 
138 | channels = features.shape[-1]
139 | 
140 | times_in_dim = times_in.shape[-1]
141 | times_out_dim = times_out.shape[-1]
142 | 
143 | # make custom activation - swish
144 | def swish(x, beta = 1):
145 | 	return (x * sigmoid(beta * x))
146 | 
147 | # add swish activation to keras
148 | get_custom_objects().update({'swish': Activation(swish)})
149 |   
150 | # define inputs for model
151 | x_input = Input(shape=(Tx, channels))
152 | 
153 | times_in = Input(shape=(Tx, times_in_dim))
154 | times_out = Input(shape=(Ty, times_out_dim))
155 | out_nwp = Input(shape=(Ty, channels-1))
156 | s_state0 = Input(shape=(32,))
157 | c_state0 = Input(shape=(32,))
158 | 
159 | # create empty list for outputs
160 | quantile_predictions = []
161 | 
162 | for q in quantiles: 
163 | 
164 | 	combined_inputs = concatenate([x_input, times_in], axis=-1, name=f'concat_q_{q}')
165 | 
166 | 	layer1, _, _, _, _ = Bidirectional(LSTM(32, return_sequences = False, return_state = True), name=f'biLSTM_q_{q}')(combined_inputs)
167 | 	layer2 = Dense(48, name=f'dense1_q_{q}')(layer1)
168 | 
169 | 	if model_type == 'solar':
170 | 		layer2 = Activation('relu', name=f'relu_act_q_{q}')(layer2)
171 | 
172 | 	quantile_predictions.append(layer2)
173 | 
174 | model = Model(inputs = [x_input, times_in], outputs = quantile_predictions)
175 | 
176 | 
177 | ###########################################_____MODEL_TRAINING_____#################################################
178 | 
179 | #include clipvalue in optmisier
180 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
181 | 
182 | # define loss for each quantile
183 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
184 | 
185 | # append additional empty losses for temporal and spatial encoders
186 | # q_losses.append([None,None])
187 | 
188 | # compile and train model
189 | model.compile(loss = q_losses, optimizer= optimizer)
190 | print(model.summary())
191 | model.fit(training_generator, epochs = 20)
192 | 
193 | # save models - saving encoders individually for inference
194 | os.mkdir(f'../../models/bilstm/{model_type}')
195 | model.save(f'../../models/bilstm/{model_type}/{model_type}_bilstm.h5')
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/scripts/models/inference+testing/bilstm_seq2seq_predictions.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | from keras.models import load_model, model_from_json
  3 | from keras.backend import sigmoid
  4 | from tensorflow.keras.layers import Input, Activation, concatenate, Lambda
  5 | import numpy as np
  6 | import h5py
  7 | from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
  8 | from pickle import load, dump
  9 | import matplotlib.pyplot as plt
 10 | from keras.utils.generic_utils import get_custom_objects
 11 | 
 12 | 
 13 | from keras.utils.generic_utils import get_custom_objects# import custom classes 
 14 | from _shared.attention_layer import attention
 15 | 
 16 | # script to produce test-set predictions for Bi-directional LSTM model
 17 | 
 18 | # declare model type
 19 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal
 20 | 
 21 | # indicate model type 
 22 | forecast_var = 'price'
 23 | 
 24 | # quantiles - needed for key references - ensure aligns with trained model
 25 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
 26 | 
 27 | # define swish function for use within comptile model
 28 | def swish(x, beta = 1):
 29 |     return (x * sigmoid(beta * x))
 30 |   
 31 | # Below in place of swish you can take any custom key for the name 
 32 | get_custom_objects().update({'swish': Activation(swish)})
 33 | 
 34 | # load trainined model
 35 | if model_type != 'seq2seq+temporal':
 36 | 	model = load_model(f'../../../Models/{model_type}/{forecast_var}/q_all_{model_type}/{forecast_var}_{model_type}.h5', custom_objects = {'<lambda>': lambda y,f: defined_loss(q,y,f)})
 37 | else:
 38 | 	model = load_model(f'../../../Models/{model_type}/{forecast_var}/q_all_{model_type}/{forecast_var}_{model_type}.h5', custom_objects = {'<lambda>': lambda y,f: defined_loss(q,y,f), 'attention': attention, 'Activation': Activation(swish)})
 39 | 
 40 | # load time references
 41 | with open(f'../../../data/processed/{forecast_var}/time_refs_{forecast_var}_v2.pkl', 'rb') as time_file:
 42 | 	time_refs = load(time_file)
 43 | 
 44 | input_times = time_refs[f'input_times_test']
 45 | output_times = time_refs[f'output_times_test']
 46 | 
 47 | time_file.close()  
 48 | 
 49 | # load and process data
 50 | f = h5py.File(f"../../../data/processed/{forecast_var}/dataset_{forecast_var}.hdf5", "r")
 51 | 
 52 | set_type = 'test'
 53 | X_train1 = f[f'{set_type}_set'][f'X1_{set_type}']
 54 | X_train2 = f[f'{set_type}_set'][f'X2_{set_type}']
 55 | X_train3 = f[f'{set_type}_set'][f'X3_{set_type}']
 56 | X_train4 = f[f'{set_type}_set'][f'X1_{set_type}']
 57 | y_train = f[f'{set_type}_set'][f'y_{set_type}']
 58 | 
 59 | input_seq_size = 336
 60 | output_seq_size = 48
 61 | 
 62 | input_start, output_start = 0, input_seq_size
 63 | 
 64 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
 65 | 
 66 | times_in, times_out = [], []
 67 | 
 68 | # sequence the data
 69 | while (output_start + output_seq_size) <= len(y_train):
 70 | 	# offset handled during pre-processing
 71 | 	input_end = input_start + input_seq_size
 72 | 	output_end = output_start + output_seq_size
 73 | 
 74 | 	# inputs
 75 | 	seqX1.append(X_train1[input_start:input_end])
 76 | 	seqX2.append(X_train2[input_start:input_end])
 77 | 
 78 | 	times_in.append(input_times[input_start:input_end])
 79 | 
 80 | 	# outputs
 81 | 	seqY.append(y_train[output_start:output_end])
 82 | 	seqX3.append(X_train3[output_start:output_end])
 83 | 	seqX4.append(X_train4[output_start:output_end])
 84 | 	times_out.append(output_times[output_start:output_end])
 85 | 
 86 | 	input_start += output_seq_size
 87 | 	output_start += output_seq_size
 88 | 
 89 | x1, x2, x3, x4, y = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
 90 | times_in, times_out = np.array(times_in), np.array(times_out)
 91 | 
 92 | f.close() 
 93 | 
 94 | # load scaler 
 95 | scaler = load(open(f'../../../data/processed/{forecast_var}/_scaler/scaler_{forecast_var}.pkl', 'rb'))
 96 | 
 97 | # average inputs over spatial dimensions
 98 | if forecast_var != 'price':
 99 | 	if model_type != 'seq2seq+temporal': 
100 | 		x1 = np.average(x1, axis=(2,3))
101 | 
102 | 	x4 = np.average(x4, axis=(2,3))
103 | 	x4 = x4[:,:,1:]
104 | else:
105 | 	x4 = x4[:,:,:-1]
106 | 
107 | # cache test set length
108 | test_len = y.shape[0]
109 | 
110 | # delcare intial hidden states
111 | s0 = np.zeros((y.shape[0], 32,))
112 | c0 = np.zeros((y.shape[0], 32,))
113 | 
114 | 
115 | print('predicting')
116 | if model_type == 'bilstm':
117 | 	results = model.predict([x1, x2])
118 | elif model_type == 'seq2seq+temporal':
119 | 	results = model.predict([x1, x2, x3, x4, s0, c0])
120 | 	quantile_temporal_attns = results[-1]
121 | else:
122 | 	results = model.predict([x1, x2, x3, x4])
123 | 
124 | 
125 | results_dict = {}
126 | 
127 | # inverse transform predictions + transfer to dictionary
128 | for idx in range(len(quantiles)):
129 | 	results_dict[str(quantiles[idx])] = scaler.inverse_transform(results[idx].reshape(-1,1)).reshape(test_len, output_seq_size, 1)
130 | 
131 | # inverse transform true values 
132 | y_true = scaler.inverse_transform(y.reshape(-1,1)).reshape(test_len, output_seq_size, 1)
133 | 
134 | # create time_refs dictionary
135 | times_refs = {'input_times': times_in, 'output_times': times_out}
136 | 
137 | # create results dictionary for performance analysis / plotting
138 | results_dict['time_refs'] = times_refs
139 | results_dict['y_true'] = y_true
140 | 
141 | print(results_dict.keys())
142 | 
143 | # save results - forecasted timeseries matrix
144 | with open(f'../../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'wb') as ts_file:
145 | 	dump(results_dict, ts_file)
146 | 
147 | # save results - forecasted tempotal attention matrix
148 | if (model_type == 'seq2seq+temporal') and (forecast_var == 'price'):
149 | 
150 | 	# construct attention results dictionary
151 | 	attention_results = {}
152 | 	attention_results['0.5'] = quantile_temporal_attns
153 | 	attention_results['time_refs'] = times_refs
154 | 	attention_results['input_features'] = x1
155 | 
156 | 	with open(f'../../../results/{forecast_var}/{model_type}/attention_data_{forecast_var}_seq2seq+temporal.pkl', 'wb') as attention_file:
157 | 		dump(attention_results, attention_file)
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/scripts/models/inference+testing/inference_model_seq2seq+spatial+temporal_attn.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import os 
  4 | import sys
  5 | from sklearn.preprocessing import MinMaxScaler
  6 | import tensorflow as tf
  7 | from keras.models import load_model
  8 | from keras import Model
  9 | import tensorflow.keras
 10 | import tensorflow.keras.backend as K
 11 | from tensorflow.keras.layers import Input, Activation, concatenate, Lambda
 12 | from tensorflow.keras.layers import Reshape
 13 | from keras.callbacks import ModelCheckpoint
 14 | from keras.backend import sigmoid
 15 | from keras.utils.generic_utils import get_custom_objects
 16 | from pickle import load
 17 | import matplotlib.pyplot as plt
 18 | import scipy
 19 | from sklearn.metrics import mean_absolute_error, mean_squared_error
 20 | import h5py
 21 | 
 22 | import matplotlib.pyplot as plt
 23 | import matplotlib.gridspec as gridspec
 24 | from matplotlib.animation import FuncAnimation
 25 | import seaborn as sns
 26 | from pickle import dump, load
 27 | 
 28 | import geopandas
 29 | import contextily as ctx
 30 | 
 31 | # import custom classes 
 32 | from _shared.attention_layer import attention
 33 | 
 34 | 
 35 | 
 36 | # choose model type to run test for
 37 | model_type ="solar"
 38 | 
 39 | # declare dataset file
 40 | dataset_name = f'dataset_{model_type}.hdf5'
 41 | 
 42 | # choose to activate plot functions
 43 | plot_temporal_attention = False 
 44 | plot_spatial_attention = False
 45 | 
 46 | # declare quantiles
 47 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
 48 | 
 49 | # index to declare which test result to plot
 50 | plot_ref = 0
 51 | 
 52 | # load scaler 
 53 | scaler = load(open(f'../../data/processed/{model_type}/_scaler/scaler_{model_type}.pkl', 'rb'))
 54 | 
 55 | # collect param sizes
 56 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
 57 | features = np.empty_like(f['train_set']['X1_train'][0])
 58 | times_in = np.empty_like(f['train_set']['X2_train'][0])
 59 | times_out = np.empty_like(f['train_set']['X3_train'][0])
 60 | labels = np.empty_like(f['train_set']['y_train'][0])
 61 | x_len = f['train_set']['X1_train'].shape[0]
 62 | y_len = f['train_set']['y_train'].shape[0]
 63 | print('size parameters loaded')
 64 | 
 65 | # additional params dependent on wether spatial data is present
 66 | if model_type != "price":
 67 | 	height, width, channels = features.shape[0], features.shape[1], features.shape[2]
 68 | else:
 69 | 	channels = features.shape[-1]
 70 | 
 71 | times_in_dim = times_in.shape[-1]
 72 | times_out_dim = times_out.shape[-1]
 73 | 
 74 | # decalre additional usefule params
 75 | Tx = 336
 76 | Ty = 48
 77 | n_s = 32
 78 | input_seq_size = Tx
 79 | output_seq_size = Ty
 80 | 
 81 | # define swish function for use within comptile model
 82 | def swish(x, beta = 1):
 83 |     return (x * sigmoid(beta * x))
 84 |   
 85 | # Below in place of swish you can take any custom key for the name 
 86 | get_custom_objects().update({'swish': Activation(swish)})
 87 | 
 88 | # load main model
 89 | model = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_main.h5', custom_objects = {'<lambda>': lambda y,f: defined_loss(q,y,f), 'attention': attention, 'Activation': Activation(swish)})
 90 | 
 91 | # read encoder models - igoring the spatail encoder in the price forecasting case
 92 | temporal_enc = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_temporal_enc.h5') 
 93 | 
 94 | if model_type != "price":
 95 | 	spatial_enc = load_model(f'../../models/seq2seq+temporal+spatial/{model_type}/{model_type}_spatial_enc.h5') 
 96 | 
 97 | # load and process data
 98 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
 99 | 
100 | # load test or train data - too much memory to load all data, so just load segment
101 | set_type = 'test'
102 | X_train1 = f[f'{set_type}_set'][f'X1_{set_type}']
103 | X_train2 = f[f'{set_type}_set'][f'X2_{set_type}']
104 | X_train3 = f[f'{set_type}_set'][f'X3_{set_type}']
105 | X_train4 = f[f'{set_type}_set'][f'X1_{set_type}']
106 | y_train = f[f'{set_type}_set'][f'y_{set_type}']
107 | 
108 | # get time relevant time references
109 | with open(f'../../data/processed/{model_type}/time_refs_{model_type}.pkl', 'rb') as time_file:
110 | 	time_refs = load(time_file)
111 | 
112 | input_times = time_refs[f'input_times_{set_type}']
113 | output_times = time_refs[f'output_times_{set_type}']
114 | 
115 | time_file.close()  
116 | 
117 | # begin sequencing of data 
118 | input_start, output_start = 0, input_seq_size
119 | 
120 | seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
121 | 
122 | times_in, times_out = [], []
123 | 
124 | while (output_start + output_seq_size) <= len(y_train):
125 | 	# increment indexes for windowing of data
126 | 	input_end = input_start + input_seq_size
127 | 	output_end = output_start + output_seq_size
128 | 
129 | 	# inputs
130 | 	seqX1.append(X_train1[input_start:input_end])
131 | 	seqX2.append(X_train2[input_start:input_end])
132 | 	times_in.append(input_times[input_start:input_end])
133 | 
134 | 	# outputs
135 | 	seqX3.append(X_train3[output_start:output_end])
136 | 	if model_type != 'price':
137 | 		nwp_data = X_train4[output_start:output_end][:,:,:,1:]
138 | 		nwp_data = np.average(nwp_data, axis=(1,2))
139 | 	else:
140 | 		nwp_data = X_train4[output_start:output_end][:,1:]
141 | 	seqX4.append(nwp_data)
142 | 	seqY.append(y_train[output_start:output_end])
143 | 	times_out.append(output_times[output_start:output_end])
144 | 
145 | 	input_start += output_seq_size
146 | 	output_start += output_seq_size
147 | 
148 | # make sure all are numpy arrays
149 | x1, x2, x3, x4, y = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
150 | times_in, times_out = np.array(times_in), np.array(times_out)
151 | f.close() 
152 | 
153 | # scale actual values 
154 | y_idx = y.shape[0]
155 | y = scaler.inverse_transform(y.reshape(-1,1)).reshape(y_idx, Ty, 1)
156 | 
157 | # declare intial hidden states
158 | s0 = np.zeros((1, n_s))
159 | c0 = np.zeros((1, n_s))
160 | 
161 | # function for inference decoder model - one for each quantile
162 | def inference_dec_model(quantile):
163 | 
164 | 	# Encoder outputs for setup
165 | 	ccn_enc_output_test = Input(shape=(320, 128))
166 | 	lstm_enc_output_test = Input(shape=(Tx, n_s*2)) #+ times_in_dim
167 | 	prev_prediction = Input(shape=(1, 1))
168 | 
169 | 	# Decoder Input
170 | 	times_in = Input(shape=(1, times_in_dim))
171 | 	times_out = Input(shape=(1, times_out_dim))
172 | 	out_nwp = Input(shape=(1, channels-1))
173 | 	s_state0 = Input(shape=(32,))
174 | 	c_state0 = Input(shape=(32,))
175 | 	if model_type != "price":
176 | 		decoder_input = Input(shape=(1, times_out_dim + (channels-1)))
177 | 	else:
178 | 		decoder_input = Input(shape=(1, times_out_dim))
179 | 
180 | 	# define input for encoder
181 | 	if model_type != 'price':
182 | 		enc_in = concatenate([out_nwp, times_out], axis=-1)
183 | 	else:
184 | 		enc_in = times_out
185 | 
186 | 	# context and previous output
187 | 	attn_weights_temp_test, context = model.get_layer(f'temporal_attention_q_{quantile}')(lstm_enc_output_test, enc_in, s_state0, c_state0)
188 | 
189 | 	if model_type != 'price':
190 | 		attn_weights_spat_test, context_spat_test = model.get_layer(f'spatial_attention_q_{quantile}')(ccn_enc_output_test, enc_in, s_state0, c_state0)
191 | 
192 | 		# context & previous output combine
193 | 		context = concatenate([context, context_spat_test], axis=-1) 
194 | 
195 | 	decoder_input_with_prev = concatenate([decoder_input, prev_prediction])
196 | 
197 | 	# Decoder inference
198 | 	dec_output, s_state, c_state = model.get_layer(f'decoder_q_{quantile}')(decoder_input_with_prev, initial_state=[s_state0, c_state0])
199 | 
200 | 	# combine context and prediction
201 | 	prediction = concatenate([context, K.expand_dims(dec_output,axis=1)])
202 | 
203 | 	# final dense layer
204 | 	pred_test = model.get_layer(f'dense1_q_{quantile}')(prediction)
205 | 	pred_test = model.get_layer(f'dense3_q_{quantile}')(pred_test)
206 | 
207 | 	if model_type == "solar":
208 | 		pred_test = model.get_layer(f'relu_act_q_{quantile}')(pred_test)
209 | 
210 | 	# Inference Model
211 | 	if model_type != 'price':
212 | 		deoceder_test_model = Model(inputs=[times_in, times_out, out_nwp, decoder_input, ccn_enc_output_test, lstm_enc_output_test, prev_prediction, s_state0, c_state0], outputs=[pred_test, s_state, c_state, attn_weights_temp_test, attn_weights_spat_test])  
213 | 	else:
214 | 		deoceder_test_model = Model(inputs=[times_in, times_out, out_nwp, decoder_input, lstm_enc_output_test, prev_prediction, s_state0, c_state0], outputs=[pred_test, s_state, c_state, attn_weights_temp_test])  
215 | 	return deoceder_test_model
216 | 
217 | # dictionary to store decoder models
218 | decoder_models = {}
219 | 
220 | # instantiate model for each quantile
221 | for q in quantiles:
222 | 	decoder_models[f'{q}'] = inference_dec_model(q)
223 | 
224 | # store predictions
225 | predictions = {}
226 | quantile_temporal_attns = {}
227 | quantile_spatial_attns = {}
228 | 
229 | # loop through each sample, passing individually to model
230 | for q in quantiles:
231 | 	print(q)
232 | 
233 | 	# set hidden states to zero
234 | 	s_state, c_state = s0, c0
235 | 
236 | 	# empty arrays to store all results
237 | 	total_pred = np.empty((x1.shape[0], Ty, 1))
238 | 	total_temp = np.empty((x1.shape[0], Tx, Ty))
239 | 
240 | 	if model_type != 'price':
241 | 		total_spat = np.empty((x1.shape[0], 320, Ty)) # 320 is the fixed spatial attention res
242 | 
243 | 	decoder = decoder_models[f'{q}']
244 | 
245 | 	for idx in range(x1.shape[0]): # loop through each sample, to keep track of hidden states
246 | 
247 | 		# create empty results for results per sample
248 | 		outputs = []
249 | 		spatial_attns = []
250 | 		temporal_attns = []
251 | 
252 | 		# create final inference model
253 | 		lstm_enc_output, enc_s_state, enc_c_state = temporal_enc([x1[idx:idx+1], x2[idx:idx+1]])
254 | 
255 | 		if model_type != 'price':
256 | 			ccn_enc_output = spatial_enc(x1[idx:idx+1])
257 | 			intial_in = np.average(x1[idx:idx+1], axis=(2,3))
258 | 		else:
259 | 			intial_in = x1[idx:idx+1]
260 | 
261 | 		prev_prediction = intial_in[:,-1:,0:1]
262 | 
263 | 		for ts in range(Ty):
264 | 
265 | 			if model_type != 'price': 
266 | 				# declare decoder input 
267 | 				if ts > 0:
268 | 					decoder_input = concatenate([x4[idx:idx+1,ts-1:ts,:], x3[idx:idx+1,ts-1:ts,:]], axis=-1)
269 | 				else:
270 | 					decoder_input = concatenate([intial_in[:,-1:,1:], x2[idx:idx+1,-1:,:]], axis=-1)  
271 | 			else:
272 | 				if ts > 0:
273 | 					decoder_input = x3[idx:idx+1,ts-1:ts,:]
274 | 				else:
275 | 					decoder_input = x2[idx:idx+1,-1:,:]
276 | 
277 | 			if model_type != 'price':  
278 | 				pred, s_state, c_state, attn_weights_temp_test, attn_weights_spat_test = decoder([x2[idx:idx+1,ts:ts+1,:], x3[idx:idx+1,ts:ts+1,:], x4[idx:idx+1,ts:ts+1,:], decoder_input, ccn_enc_output, lstm_enc_output, prev_prediction, s_state, c_state])
279 | 				spatial_attns.append(attn_weights_spat_test)
280 | 			else:
281 | 				pred, s_state, c_state, attn_weights_temp_test = decoder([x2[idx:idx+1,ts:ts+1,:], x3[idx:idx+1,ts:ts+1,:], x4[idx:idx+1,ts:ts+1,:], decoder_input, lstm_enc_output, prev_prediction, s_state, c_state])
282 | 
283 | 			prev_prediction = pred 
284 | 
285 | 			outputs.append(pred)
286 | 			temporal_attns.append(attn_weights_temp_test)
287 | 
288 | 		combined_outputs = np.concatenate(outputs, axis=1)
289 | 		combined_temp_attn = np.concatenate(temporal_attns, axis=-1)
290 | 		combined_spat_attn = np.concatenate(spatial_attns, axis=-1)
291 | 		
292 | 		total_pred[idx, : , :] = scaler.inverse_transform(combined_outputs[0,:,:])
293 | 		total_temp[idx, : , :] = combined_temp_attn
294 | 
295 | 		if model_type != 'price':
296 | 			combined_spat_attn = np.concatenate(spatial_attns, axis=-1)
297 | 			total_spat[idx, : , :] = combined_spat_attn
298 | 
299 | 	predictions[f'{q}'] = total_pred
300 | 	quantile_temporal_attns[f'{q}'] = total_temp
301 | 	quantile_spatial_attns[f'{q}'] = total_spat
302 | 
303 | # plot predictions for specified index
304 | for idx, (key, values) in enumerate(predictions.items()):
305 | 	plt.plot(values[plot_ref:plot_ref+7,:].flatten(), label=f"prediction_{key}")
306 | 
307 | plt.plot(y[plot_ref:plot_ref+7,:,0].flatten(), label="actual")
308 | plt.legend()
309 | plt.show()
310 | 
311 | 
312 | # plot temporal attention (quantile 0.5)
313 | att_w_temp = np.transpose(quantile_temporal_attns['0.5'][plot_ref])
314 | if model_type != "price":
315 | 	x = np.average(x1, axis=(2,3))[plot_ref, :]
316 | else:
317 | 	x = x1[plot_ref, :]
318 | 
319 | y_attn = y[plot_ref, :, 0]
320 | y_hat = predictions['0.5'][plot_ref, :]
321 | 
322 | #make attention plotting function
323 | def temporal_attention_graph(x, y, att_w_temp):
324 | 
325 | 	fig = plt.figure(figsize=(24, 8))
326 | 	gs = gridspec.GridSpec(ncols=90, nrows=100)
327 | 
328 | 	upper_axis = fig.add_subplot(gs[0:20, 10:75])
329 | 	left_axis = fig.add_subplot(gs[25:, 0:8])
330 | 	atten_axis = fig.add_subplot(gs[25:, 10:])
331 | 
332 | 	upper_axis.plot(x)
333 | 	upper_axis.set_xlim([0, Tx])
334 | 	upper_axis.set_ylim([0, 1])
335 | 	upper_axis.set_xticks(range(0, Tx))
336 | 	upper_axis.set_xticklabels(range(0, Tx))
337 | 
338 | 	left_axis.plot(y, range(0,Ty), label='Prediction')
339 | 	left_axis.plot(y_hat, range(0,Ty), label='True')
340 | 	left_axis.set_ylim([0, Ty])
341 | 	left_axis.set_yticks(range(0, Ty, 6))
342 | 	left_axis.set_yticklabels(range(0, Ty, 6))
343 | 	left_axis.invert_yaxis()
344 | 
345 | 	sns.heatmap(att_w_temp, cmap='flare', ax = atten_axis, vmin=0, vmax=0.001)
346 | 	atten_axis.set_xticks(range(0, Tx))
347 | 	atten_axis.set_xticklabels(range(0, Tx))
348 | 	atten_axis.set_yticks(range(0, Ty, 4))
349 | 	atten_axis.set_yticklabels(range(0, Ty, 4))
350 | 
351 | 	plt.show()
352 | 
353 | 
354 | if plot_temporal_attention is True:
355 | 	temporal_attention_graph(x, y_attn, att_w_temp)
356 | 
357 | 
358 | 
359 | # plot spatial attention
360 | def plot_spatial_predictions(spatial_data, title, height_scale, width_scale, frame_num):
361 | 
362 | 	fig = plt.figure(figsize=[8,10])  # a new figure window
363 | 	ax_set = fig.add_subplot(1, 1, 1)
364 | 
365 | 	# create baseline map
366 | 	# spatial data on UK basemap
367 | 	df = pd.DataFrame({
368 | 		'LAT': [49.78, 61.03],
369 | 		'LON': [-11.95, 1.55],
370 | 	})
371 | 
372 | 	geo_df = geopandas.GeoDataFrame(df, crs = {'init': 'epsg:4326'}, 
373 | 			geometry=geopandas.points_from_xy(df.LON, df.LAT)).to_crs(epsg=3857)
374 | 
375 | 	ax = geo_df.plot(
376 | 		figsize= (8,10),
377 | 		alpha = 0,
378 | 		ax=ax_set,
379 | 	)
380 | 
381 | 	plt.title(title)
382 | 	ax.set_axis_off()
383 | 
384 | 	# add basemap
385 | 	url = 'http://tile.stamen.com/terrain/{z}/{x}/{y}.png'
386 | 	zoom = 10
387 | 	xmin, xmax, ymin, ymax = ax.axis()
388 | 	basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, url=url)
389 | 	ax.imshow(basemap, extent=extent, interpolation='gaussian')
390 | 	attn_over = np.resize(spatial_data[0], (height_scale, width_scale))
391 | 	
392 | 	gb_shape = geopandas.read_file("./Data/shapefiles/GBR_adm/GBR_adm0.shp").to_crs(epsg=3857)
393 | 	irl_shape = geopandas.read_file("./Data/shapefiles/IRL_adm/IRL_adm0.shp").to_crs(epsg=3857)
394 | 	gb_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
395 | 	irl_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
396 | 	overlay = ax.imshow(attn_over, cmap='viridis', alpha=0.5, extent=extent)
397 | 	# ax.axis((xmin, xmax, ymin, ymax))
398 | 	txt  = fig.text(.5, 0.09, '', ha='center')
399 | 
400 | 	
401 | 	def update(i):
402 | 		spatial_over = np.resize(spatial_data[i], (height_scale, width_scale))
403 | 		# overlay = ax.imshow(spatial_over, cmap='viridis', alpha=0.5, extent=extent)
404 | 		overlay.set_data(spatial_over)
405 | 		txt.set_text(f"Timestep: {i}")
406 | 		# plt.cla()
407 | 
408 | 		return [overlay, txt]
409 | 
410 | 	animation_ = FuncAnimation(fig, update, frames=frame_num, blit=False, repeat=False)
411 | 	plt.show(block=True)	
412 | 	# animation_.save(f'{title}_animation.gif', writer='imagemagick')
413 | 
414 | if plot_spatial_attention is True:
415 | 	# transpose spatial attention results
416 | 	att_w_spat = np.transpose(total_spat[plot_ref])
417 | 	# plot attention weights
418 | 	plot_spatial_predictions(att_w_spat, 'Spatial Context', 16, 20, 48)
419 | 
420 | 
421 | 
422 | 
423 | 
424 | # add date references to result dictionaries
425 | time_refs = {'input_times': times_in, 'output_times': times_out}
426 | 
427 | predictions['time_refs'] = time_refs
428 | quantile_temporal_attns['time_refs'] = time_refs 
429 | 
430 | # add x-input data 
431 | quantile_temporal_attns['input_features'] = x1
432 | 
433 | # add true value for reference to prediction dictionary
434 | predictions['y_true'] = y
435 | 
436 | # performance evaluation
437 | # evaluate_predictions(predictions)
438 | 
439 | 
440 | # save results - forecasted timeseries matrix
441 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/forecasted_time_series_{model_type}.pkl', 'wb') as ts_file:
442 | 	dump(predictions, ts_file)
443 | 
444 | # save results - forecasted temporal attention matrix
445 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/attention_data_{model_type}.pkl', 'wb') as attention_file:
446 | 	dump(quantile_temporal_attns, attention_file)
447 | 
448 | # save results - forecasted spatial attention matrix
449 | with open(f'../../results/seq2seq+temporal+spatial/{model_type}/attention_data_{model_type}.pkl', 'wb') as spatial_file:
450 | 	dump(quantile_spatial_attns, spatial_file)
451 | 
452 | 
453 | 
454 | 
455 | 
456 | 
457 | 
458 | 
459 | 
460 | 
461 | 
462 | 


--------------------------------------------------------------------------------
/scripts/models/seq2seq+spatial+temporal_attn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys, os
  3 | import h5py 
  4 | import tensorflow as tf
  5 | import tensorflow.keras
  6 | import tensorflow.keras.backend as K
  7 | from tensorflow.keras import Model
  8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 
  9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
 10 | from keras.backend import sigmoid
 11 | from keras.utils.generic_utils import get_custom_objects
 12 | 
 13 | # import custom classes 
 14 | from _shared.attention_layer import attention
 15 | from _shared.timeseries_data_generator import DataGenerator
 16 | 
 17 | np.set_printoptions(threshold=sys.maxsize)
 18 | tf.random.set_seed(180)
 19 | 
 20 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
 21 | model_type ="solar"
 22 | 
 23 | # declare dataset file
 24 | dataset_name = f'dataset_{model_type}.hdf5'
 25 | 
 26 | # declare quantiles for model
 27 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
 28 | 
 29 | # get useful size parameters
 30 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
 31 | features = np.empty_like(f['train_set']['X1_train'][0])
 32 | times_in = np.empty_like(f['train_set']['X2_train'][0])
 33 | times_out = np.empty_like(f['train_set']['X3_train'][0])
 34 | labels = np.empty_like(f['train_set']['y_train'][0])
 35 | x_len = f['train_set']['X1_train'].shape[0]
 36 | y_len = f['train_set']['y_train'].shape[0]
 37 | f.close()
 38 | 
 39 | # input / output sequence sizes
 40 | input_seq_size = 336
 41 | output_seq_size = 48
 42 | n_s = 32 # number of hidden states used through model
 43 | 
 44 | ###########################################_____DATA_GENERATOR_____#################################################
 45 | 
 46 | # data generator input parameters - avoid shuffle in this case 
 47 | params = {'batch_size': 16,
 48 | 		'shuffle': False } 
 49 | 
 50 | # instantiate data generator object
 51 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, hidden_states = n_s,  **params)
 52 | 
 53 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
 54 | 
 55 | # cpature some more useful dimensions
 56 | Tx = input_seq_size
 57 | Ty = output_seq_size
 58 | 
 59 | if model_type != "price":
 60 | 	height, width, channels = features.shape[0], features.shape[1], features.shape[2]
 61 | else:
 62 | 	channels = features.shape[-1]
 63 | 
 64 | times_in_dim = times_in.shape[-1]
 65 | times_out_dim = times_out.shape[-1]
 66 | 
 67 | # spatial encoder
 68 | def cnn_encoder(ccn_input):
 69 |     # input shape -> (batch, time, width, height, features)
 70 |     # output shape -> (batch, time, width x height, embedding_size)
 71 | 
 72 | 	ccn_enc_output = TimeDistributed(Conv2D(16, kernel_size=3, strides=1, activation="relu"))(ccn_input)
 73 | 	ccn_enc_output = BatchNormalization()(ccn_enc_output) 
 74 | 	ccn_enc_output = TimeDistributed(AveragePooling2D(pool_size=(2, 2), data_format="channels_last"))(ccn_enc_output)
 75 | 	ccn_enc_output = TimeDistributed(Conv2D(32, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output)
 76 | 	ccn_enc_output = BatchNormalization()(ccn_enc_output)  
 77 | 	ccn_enc_output = TimeDistributed(Conv2D(64, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output)
 78 | 	ccn_enc_output = BatchNormalization()(ccn_enc_output)  
 79 | 	ccn_enc_output = TimeDistributed(Conv2D(128, kernel_size=3, strides=1, activation="relu"))(ccn_enc_output)
 80 | 	ccn_enc_output = BatchNormalization()(ccn_enc_output)  
 81 | 
 82 | 	ccn_enc_output = Reshape((ccn_enc_output.shape[1], -1, ccn_enc_output.shape[-1]))(ccn_enc_output) 
 83 | 
 84 | 	ccn_enc_output = K.mean(ccn_enc_output, axis=1) 
 85 | 
 86 | 	return ccn_enc_output
 87 | 
 88 | # temporal encoder layers
 89 | lstm_encoder = Bidirectional(LSTM(n_s*2, return_sequences = True, return_state = True))
 90 | 
 91 | def encoder(input, times_in):
 92 |     
 93 |     # accomodate for case without 2D dataset
 94 | 	if model_type != "price":
 95 | 		enc_output = K.mean(input, axis=(2,3))
 96 | 	else:
 97 | 		enc_output = input       
 98 | 
 99 | 	# concat input time features with input
100 | 	enc_output = concatenate([enc_output, times_in], axis=-1)
101 | 
102 | 	enc_output, forward_h, forward_c, backward_h, backward_c = lstm_encoder(enc_output)
103 | 	# enc_output, enc_h, enc_s = lstm_encoder(enc_output)
104 | 
105 | 	enc_h = concatenate([forward_h, backward_h], axis=-1)
106 | 	enc_s = concatenate([forward_c, backward_c], axis=-1)
107 | 
108 | 	# # concat input time features with input
109 | 	# enc_output = concatenate([enc_output, times_in], axis=-1)
110 | 
111 | 	return enc_output, enc_h, enc_s
112 | 
113 | # declare decoder layer
114 | lstm_decoder = LSTM(n_s, return_sequences = True, return_state = True)
115 | 
116 | def decoder(context, h_state, cell_state):
117 | 
118 |     # concat encoder input and time features
119 | 	# context = concatenate([context, times_out], axis=-1)
120 |     
121 | 	dec_output, h_state , c_state = state = lstm_decoder(context, initial_state = [h_state, cell_state])
122 | 
123 | 	return dec_output, h_state, c_state
124 | 
125 | # make custom activation - swish
126 | def swish(x, beta = 1):
127 | 	return (x * sigmoid(beta * x))
128 | 
129 | # add swish activation to keras
130 | get_custom_objects().update({'swish': Activation(swish)})
131 |   
132 | # define inputs for model
133 | if model_type != "price":
134 | 	x_input = Input(shape=(Tx, height, width, channels))
135 | else:
136 | 	x_input = Input(shape=(Tx, channels))
137 | 
138 | times_in = Input(shape=(Tx, times_in_dim))
139 | times_out = Input(shape=(Ty, times_out_dim))
140 | out_nwp = Input(shape=(Ty, channels-1))
141 | s_state0 = Input(shape=(32,))
142 | c_state0 = Input(shape=(32,))
143 | 
144 | # create empty list for outputs
145 | qunatile_predictions = []
146 | temporal_attns = [] 
147 | spatial_attns = [] 
148 | 
149 | # call CCN_encoder function
150 | if model_type != "price":
151 | 	ccn_enc_output = cnn_encoder(x_input)
152 | 
153 | # call LSTM_encoder function 
154 | lstm_enc_output, enc_s_state, enc_c_state = encoder(x_input, times_in)
155 | 
156 | # call decoder
157 | for q in quantiles: 
158 |   	
159 |   	# reset model parameters for each qunatile prediction
160 | 	ts_predictions = []
161 | 	temp_attns = []
162 | 	spatial_attns = []
163 | 
164 | 	if model_type != "price":
165 | 		intial_in = K.mean(x_input, axis=(2,3))
166 | 		prev_prediction = intial_in[:,-1:,0:1]
167 | 
168 | 	decoder = LSTM(32, return_sequences = False, return_state = True, name=f'decoder_q_{q}')
169 | 	spatial_attention = attention(n_s, name=f"spatial_attention_q_{q}")
170 | 	temporal_attention = attention(n_s, name=f"temporal_attention_q_{q}")
171 | 
172 | 	output_1 = Dense(32, activation="swish", name=f'dense1_q_{q}')
173 | 	output_2 = Dense(1, name=f'dense3_q_{q}')
174 | 	final_act = Activation('relu', name=f'relu_act_q_{q}')
175 | 
176 |     # reset hidden states
177 | 	s_state = s_state0
178 | 	c_state = c_state0
179 | 
180 | 	# make prediction for each output timestep
181 | 	for ts in range(Ty):
182 | 
183 | 		if model_type != "price":
184 | 			enc_out = concatenate([out_nwp[:,ts:ts+1,:], times_out[:,ts:ts+1,:]], axis=-1, name=f'concat1_q_{q}_{ts}')        
185 | 		else:
186 | 			enc_out = times_out[:,ts:ts+1,:] 
187 | 
188 | 		# get context matrix (temporal)
189 | 		attn_weights_temp, context = temporal_attention(lstm_enc_output, enc_out, s_state, c_state)
190 | 
191 | 		# get context matrix (spatial)
192 | 		if model_type != "price":
193 | 			attn_weights_spat, context_spat = spatial_attention(ccn_enc_output, enc_out, s_state, c_state)
194 | 
195 | 			# combine spatial and temporal context
196 | 			context = concatenate([context, context_spat], axis=-1, name=f'concat1.5_q_{q}_{ts}') 
197 | 
198 | 			# make decoder input - nwp + time features if not price predictions, other wise just time features 
199 | 			if ts > 0:
200 | 				decoder_input = concatenate([out_nwp[:,ts-1:ts,:], times_out[:,ts-1:ts,:]], axis=-1, name=f'concat2_q_{q}_{ts}')
201 | 			else:
202 | 				decoder_input = concatenate([intial_in[:,-1:,1:], times_in[:,-1:,:]], axis=-1, name=f'concat3_q_{q}_{ts}')  
203 | 		else:
204 | 			if ts > 0:
205 | 				decoder_input = times_out[:,ts-1:ts,:]
206 | 			else:
207 | 				decoder_input = times_in[:,-1:,:]                             
208 | 
209 | 		# call decoder 
210 | 		dec_output, s_state, c_state = decoder(decoder_input, initial_state = [s_state, c_state])
211 | 
212 | 		# combine context with decoder output
213 | 		prediction = concatenate([context, K.expand_dims(dec_output,axis=1)], axis=-1, name=f'concat5_q_{q}_{ts}')
214 | 
215 | 		# pass through MLP
216 | 		output = output_1(prediction)
217 | 		output = output_2(output)
218 | 
219 | 		if model_type == "solar":
220 | 			output = final_act(output)
221 | 
222 | 		# collect outputs for final predictions
223 | 		prev_prediction = output
224 | 		ts_predictions.append(output)
225 | 		temp_attns.append(attn_weights_temp)
226 | 
227 | 		if model_type != "price":        
228 | 			spatial_attns.append(attn_weights_spat)
229 | 
230 | 	ts_predictions_total = concatenate(ts_predictions, axis = 1)
231 | 	temp_attns_total = concatenate(temp_attns, axis = -1)
232 | 
233 | 	if model_type != "price":
234 | 		sptial_attns_total = concatenate(spatial_attns, axis = -1)
235 | 
236 | 	qunatile_predictions.append(ts_predictions_total)
237 | 
238 | # append spatial and temporal predictions - if using final model as inference
239 | # qunatile_predictions.extend([temp_attns_total])
240 | # qunatile_predictions.extend([sptial_attns_total])
241 | 
242 | # instantiate model
243 | model = Model(inputs = [x_input, times_in, times_out, out_nwp, s_state0, c_state0], outputs = qunatile_predictions)
244 | 
245 | 
246 | ###########################################_____MODEL_TRAINING_____#################################################
247 | 
248 | #include clipvalue in optmisier
249 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
250 | 
251 | # define loss for each quantile
252 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
253 | 
254 | # append additional empty losses for temporal and spatial encoders
255 | # q_losses.append([None,None])
256 | 
257 | # compile and train model
258 | model.compile(loss = q_losses, optimizer= optimizer)
259 | print(model.summary())
260 | model.fit(training_generator, epochs = 20)
261 | 
262 | # save models - saving encoders individually for inference
263 | os.mkdir(f'../../models/{model_type}')
264 | model.save(f'../../models/{model_type}/{model_type}_main.h5')
265 | 
266 | # save some additional models for inference
267 | enoder_temporal_model = Model(inputs = [x_input, times_in], outputs=[lstm_enc_output, enc_s_state, enc_c_state])
268 | enoder_temporal_model.save(f'../../models/{model_type}/{model_type}_temporal_enc.h5')
269 | 
270 | # save spatial encoders if not price forecasting
271 | if model_type != 'price':
272 | 	enoder_spatial_model = Model(x_input, ccn_enc_output)
273 | 	enoder_spatial_model.save(f'../../models/{model_type}/{model_type}_spatial_enc.h5')
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 


--------------------------------------------------------------------------------
/scripts/models/seq2seq+temporal_attn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys, os
  3 | import h5py 
  4 | import tensorflow as tf
  5 | import tensorflow.keras
  6 | import tensorflow.keras.backend as K
  7 | from tensorflow.keras import Model
  8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 
  9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
 10 | from keras.backend import sigmoid
 11 | from keras.utils.generic_utils import get_custom_objects
 12 | 
 13 | # import custom classes 
 14 | from _shared.attention_layer import attention
 15 | from _shared.timeseries_data_generator import DataGenerator
 16 | 
 17 | np.set_printoptions(threshold=sys.maxsize)
 18 | tf.random.set_seed(180)
 19 | 
 20 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
 21 | model_type ="solar"
 22 | 
 23 | # declare dataset file
 24 | dataset_name = f'dataset_{model_type}.hdf5'
 25 | 
 26 | # declare quantiles for model
 27 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
 28 | 
 29 | # get useful size parameters
 30 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
 31 | features = np.empty_like(f['train_set']['X1_train'][0])
 32 | times_in = np.empty_like(f['train_set']['X2_train'][0])
 33 | times_out = np.empty_like(f['train_set']['X3_train'][0])
 34 | labels = np.empty_like(f['train_set']['y_train'][0])
 35 | x_len = f['train_set']['X1_train'].shape[0]
 36 | y_len = f['train_set']['y_train'].shape[0]
 37 | f.close()
 38 | 
 39 | # input / output sequence sizes
 40 | input_seq_size = 336
 41 | output_seq_size = 48
 42 | n_s = 32 # number of hidden states used through model
 43 | 
 44 | ###########################################_____DATA_GENERATOR_____#################################################
 45 | 
 46 | # data generator input parameters - avoid shuffle in this case 
 47 | params = {'batch_size': 16,
 48 | 		'shuffle': False } 
 49 | 
 50 | # instantiate data generator object
 51 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len, hidden_states = n_s,  **params)
 52 | 
 53 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
 54 | 
 55 | # cpature some more useful dimensions
 56 | Tx = input_seq_size
 57 | Ty = output_seq_size
 58 | 
 59 | if model_type != "price":
 60 | 	height, width, channels = features.shape[0], features.shape[1], features.shape[2]
 61 | else:
 62 | 	channels = features.shape[-1]
 63 | 
 64 | times_in_dim = times_in.shape[-1]
 65 | times_out_dim = times_out.shape[-1]
 66 | 
 67 | 
 68 | # temporal encoder layers
 69 | lstm_encoder = Bidirectional(LSTM(n_s*2, return_sequences = True, return_state = True))
 70 | 
 71 | def encoder(input, times_in):
 72 |     
 73 |     # accomodate for case without 2D dataset
 74 | 	if model_type != "price":
 75 | 		enc_output = K.mean(input, axis=(2,3))
 76 | 	else:
 77 | 		enc_output = input       
 78 | 
 79 | 	# concat input time features with input
 80 | 	enc_output = concatenate([enc_output, times_in], axis=-1)
 81 | 
 82 | 	enc_output, forward_h, forward_c, backward_h, backward_c = lstm_encoder(enc_output)
 83 | 	# enc_output, enc_h, enc_s = lstm_encoder(enc_output)
 84 | 
 85 | 	enc_h = concatenate([forward_h, backward_h], axis=-1)
 86 | 	enc_s = concatenate([forward_c, backward_c], axis=-1)
 87 | 
 88 | 	# concat input time features with input
 89 | 	# enc_output = concatenate([enc_output, times_in], axis=-1)
 90 | 
 91 | 	return enc_output, enc_h, enc_s
 92 | 
 93 | # declare decoder layer
 94 | lstm_decoder = LSTM(n_s, return_sequences = True, return_state = True)
 95 | 
 96 | def decoder(context, h_state, cell_state):
 97 | 
 98 |     # concat encoder input and time features
 99 | 	# context = concatenate([context, times_out], axis=-1)
100 |     
101 | 	dec_output, h_state , c_state = state = lstm_decoder(context, initial_state = [h_state, cell_state])
102 | 
103 | 	return dec_output, h_state, c_state
104 | 
105 | # make custom activation - swish
106 | def swish(x, beta = 1):
107 | 	return (x * sigmoid(beta * x))
108 | 
109 | # add swish activation to keras
110 | get_custom_objects().update({'swish': Activation(swish)})
111 |   
112 | # define inputs for model
113 | x_input = Input(shape=(Tx, channels))
114 | 
115 | times_in = Input(shape=(Tx, times_in_dim))
116 | times_out = Input(shape=(Ty, times_out_dim))
117 | out_nwp = Input(shape=(Ty, channels-1))
118 | s_state0 = Input(shape=(32,))
119 | c_state0 = Input(shape=(32,))
120 | 
121 | # create empty list for outputs
122 | qunatile_predictions = []
123 | temporal_attns = [] 
124 | 
125 | # call LSTM_encoder function 
126 | lstm_enc_output, enc_s_state, enc_c_state = encoder(x_input, times_in)
127 | 
128 | # call decoder
129 | for q in quantiles: 
130 |   	
131 |   	# reset model parameters for each qunatile prediction
132 | 	ts_predictions = []
133 | 	temp_attns = []
134 | 	spatial_attns = []
135 | 
136 | 	if model_type != "price":
137 | 		intial_in = K.mean(x_input, axis=(2,3))
138 | 		prev_prediction = intial_in[:,-1:,0:1]
139 | 
140 | 	decoder = LSTM(32, return_sequences = False, return_state = True, name=f'decoder_q_{q}')
141 | 	spatial_attention = attention(n_s, name=f"spatial_attention_q_{q}")
142 | 	temporal_attention = attention(n_s, name=f"temporal_attention_q_{q}")
143 | 
144 | 	output_1 = Dense(32, activation="swish", name=f'dense1_q_{q}')
145 | 	output_2 = Dense(1, name=f'dense3_q_{q}')
146 | 	final_act = Activation('relu', name=f'relu_act_q_{q}')
147 | 
148 |     # reset hidden states
149 | 	s_state = s_state0
150 | 	c_state = c_state0
151 | 
152 | 	# make prediction for each output timestep
153 | 	for ts in range(Ty):
154 | 
155 | 		if model_type != "price":
156 | 			enc_out = concatenate([out_nwp[:,ts:ts+1,:], times_out[:,ts:ts+1,:]], axis=-1, name=f'concat1_q_{q}_{ts}')        
157 | 		else:
158 | 			enc_out = times_out[:,ts:ts+1,:] 
159 | 
160 | 		# get context matrix (temporal)
161 | 		attn_weights_temp, context = temporal_attention(lstm_enc_output, enc_out, s_state, c_state)
162 | 
163 | 		# get context matrix (spatial)
164 | 		if model_type != "price":
165 | 
166 | 			# make decoder input - nwp + time features if not price predictions, other wise just time features 
167 | 			if ts > 0:
168 | 				decoder_input = concatenate([out_nwp[:,ts-1:ts,:], times_out[:,ts-1:ts,:]], axis=-1, name=f'concat2_q_{q}_{ts}')
169 | 			else:
170 | 				decoder_input = concatenate([intial_in[:,-1:,1:], times_in[:,-1:,:]], axis=-1, name=f'concat3_q_{q}_{ts}')  
171 | 		else:
172 | 			if ts > 0:
173 | 				decoder_input = times_out[:,ts-1:ts,:]
174 | 			else:
175 | 				decoder_input = times_in[:,-1:,:]                             
176 | 
177 | 		# call decoder 
178 | 		dec_output, s_state, c_state = decoder(decoder_input, initial_state = [s_state, c_state])
179 | 
180 | 		# combine context with decoder output
181 | 		prediction = concatenate([context, K.expand_dims(dec_output,axis=1)], axis=-1, name=f'concat5_q_{q}_{ts}')
182 | 
183 | 		# pass through MLP
184 | 		output = output_1(prediction)
185 | 		output = output_2(output)
186 | 
187 | 		if model_type == "solar":
188 | 			output = final_act(output)
189 | 
190 | 		# collect outputs for final predictions
191 | 		prev_prediction = output
192 | 		ts_predictions.append(output)
193 | 		temp_attns.append(attn_weights_temp)
194 | 
195 | 	ts_predictions_total = concatenate(ts_predictions, axis = 1)
196 | 	temp_attns_total = concatenate(temp_attns, axis = -1)
197 | 
198 | 	qunatile_predictions.append(ts_predictions_total)
199 | 
200 | # append spatial and temporal predictions - if using final model as inference
201 | # qunatile_predictions.extend([temp_attns_total])
202 | # qunatile_predictions.extend([sptial_attns_total])
203 | 
204 | # instantiate model
205 | model = Model(inputs = [x_input, times_in, times_out, out_nwp, s_state0, c_state0], outputs = qunatile_predictions)
206 | 
207 | 
208 | ###########################################_____MODEL_TRAINING_____#################################################
209 | 
210 | #include clipvalue in optmisier
211 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
212 | 
213 | # define loss for each quantile
214 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
215 | 
216 | # append additional empty losses for temporal and spatial encoders
217 | # q_losses.append([None,None])
218 | 
219 | # compile and train model
220 | model.compile(loss = q_losses, optimizer= optimizer)
221 | print(model.summary())
222 | model.fit(training_generator, epochs = 20)
223 | 
224 | # save models - saving encoders individually for inference
225 | os.mkdir(f'../../models/seq2seq+temporal/{model_type}')
226 | model.save(f'../../models//seq2seq+temporal/{model_type}/{model_type}_main.h5')
227 | 
228 | # save some additional models for inference
229 | enoder_temporal_model = Model(inputs = [x_input, times_in], outputs=[lstm_enc_output, enc_s_state, enc_c_state])
230 | enoder_temporal_model.save(f'../../models/seq2seq+temporal/{model_type}/{model_type}_temporal_enc.h5')
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 


--------------------------------------------------------------------------------
/scripts/models/seq2seq_model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys, os
  3 | import h5py 
  4 | import tensorflow as tf
  5 | import tensorflow.keras
  6 | import tensorflow.keras.backend as K
  7 | from tensorflow.keras import Model
  8 | from tensorflow.keras.layers import Conv2D, Bidirectional, Dense, TimeDistributed, LSTM 
  9 | from tensorflow.keras.layers import Input, Activation, AveragePooling2D, Lambda, concatenate, Reshape
 10 | from keras.backend import sigmoid
 11 | from keras.utils.generic_utils import get_custom_objects
 12 | 
 13 | 
 14 | 
 15 | np.set_printoptions(threshold=sys.maxsize)
 16 | tf.random.set_seed(180)
 17 | 
 18 | ###########################################_____SET_MODEL_PARAMETERS_____############################################
 19 | model_type ="solar"
 20 | 
 21 | # declare dataset file
 22 | dataset_name = f'dataset_{model_type}.hdf5'
 23 | 
 24 | # declare quantiles for model
 25 | quantiles = [0.05, 0.15, 0.25, 0.35, 0.5, 0.65, 0.75, 0.85, 0.95]
 26 | 
 27 | # get useful size parameters
 28 | f = h5py.File(f"../../data/processed/{model_type}/{dataset_name}", "r")
 29 | features = np.empty_like(f['train_set']['X1_train'][0])
 30 | times_in = np.empty_like(f['train_set']['X2_train'][0])
 31 | times_out = np.empty_like(f['train_set']['X3_train'][0])
 32 | labels = np.empty_like(f['train_set']['y_train'][0])
 33 | x_len = f['train_set']['X1_train'].shape[0]
 34 | y_len = f['train_set']['y_train'].shape[0]
 35 | f.close()
 36 | 
 37 | # input / output sequence sizes
 38 | input_seq_size = 336
 39 | output_seq_size = 48
 40 | n_s = 32 # number of hidden states used through model
 41 | 
 42 | ###########################################_____DATA_GENERATOR_____#################################################
 43 | 
 44 | # data generator input parameters - avoid shuffle in this case 
 45 | 
 46 | params = {'batch_size': 64,
 47 | 		'shuffle': False } 
 48 | 
 49 | class DataGenerator(tensorflow.keras.utils.Sequence):
 50 | 
 51 | 	def __init__(self, dataset_name, x_length, y_length, batch_size, shuffle):
 52 | 		self.dataset_name = dataset_name
 53 | 		self.batch_size = batch_size
 54 | 		self.shuffle = shuffle
 55 | 		self.xlen = x_length
 56 | 		self.ylen = y_length 
 57 | 		self.index_ref = 0         
 58 | 		self.on_epoch_end()
 59 | 
 60 | 	def __len__(self):
 61 | 		# 'number of batches per Epoch'      
 62 | 		# return int(np.floor((self.xlen - (input_seq_size-1)) / self.batch_size))
 63 | 		return int(np.floor((self.ylen - input_seq_size - (output_seq_size-1)) / self.batch_size))
 64 | 
 65 | 	def __getitem__(self, index):
 66 | 
 67 | 
 68 | 		input_indexes = self.input_indexes[(index*self.batch_size) : (index*self.batch_size) + (self.batch_size + (input_seq_size-1))]
 69 | 		output_indexes = self.output_indexes[(index*self.batch_size) + input_seq_size : (index*self.batch_size) + input_seq_size + (self.batch_size + (output_seq_size-1))]
 70 | 
 71 | 		# Generate data
 72 | 		(X_train1, X_train2, X_train3, X_train4), y_train = self.__data_generation(input_indexes, output_indexes)  
 73 | 
 74 | 		y_trues = [y_train for i in quantiles]          
 75 | 
 76 | 		return (X_train1, X_train2, X_train3, X_train4), (y_trues) # pass empty training outputs to extract extract attentions
 77 | 
 78 | 	def on_epoch_end(self):
 79 | 		# set length of indexes for each epoch
 80 | 		self.input_indexes = np.arange(self.xlen)
 81 | 		self.output_indexes = np.arange(self.ylen)
 82 |  
 83 | 		if self.shuffle == True:
 84 | 			np.random.shuffle(self.input_indexes)
 85 | 
 86 | 	def to_sequence(self, x1, x2, x3, x4, y):
 87 | 		# convert timeseries batch in sequences
 88 | 		input_start, output_start = 0, 0
 89 | 
 90 | 		seqX1, seqX2, seqX3, seqX4, seqY = [], [], [], [], []
 91 | 
 92 | 		while (input_start + input_seq_size) <= len(x1):
 93 | 			# offset handled during pre-processing
 94 | 			input_end = input_start + input_seq_size
 95 | 			output_end = output_start + output_seq_size
 96 | 
 97 | 			# inputs
 98 | 			seqX1.append(x1[input_start:input_end])
 99 | 			seqX2.append(x2[input_start:input_end])
100 | 
101 | 			# outputs
102 | 			seqX3.append(x3[output_start:output_end])
103 | 			seqX4.append(x4[output_start:output_end])
104 | 			seqY.append(y[output_start:output_end])
105 | 
106 | 			input_start += 1  
107 | 			output_start += 1
108 |             
109 | 		seqX1, seqX2, seqX3, seqX4, seqY = np.array(seqX1), np.array(seqX2), np.array(seqX3), np.array(seqX4), np.array(seqY)
110 | 		
111 | 		return seqX1, seqX2, seqX3, seqX4, seqY
112 | 
113 | 	def __data_generation(self, input_indexes, output_indexes):
114 | 
115 | 		f = h5py.File(f"../../data/processed/{model_type}/{self.dataset_name}", "r")      
116 | 		# X_train1 = f['train_set']['X1_train'][input_indexes]
117 | 		X_train2 = f['train_set']['X2_train'][input_indexes]
118 | 		X_train3 = f['train_set']['X3_train'][output_indexes]
119 | 
120 | 		if model_type != 'price':        
121 | 			X_train1 = f['train_set']['X1_train'][input_indexes][:,:,:,:]
122 | 			X_train1 = np.average(X_train1, axis=(1,2))
123 | 
124 | 			X_train4 = f['train_set']['X1_train'][output_indexes][:,:,:,:]
125 | 			X_train4 = np.average(X_train4, axis=(1,2))
126 | 			X_train4 = X_train4[:,1:]
127 | 
128 | 		else: 
129 | 			X_train1 = f['train_set']['X1_train'][input_indexes][:,:]
130 | 
131 | 			X_train4 = f['train_set']['X1_train'][output_indexes][:,:]
132 | 			X_train4 = X_train4[:,:-1]
133 | 
134 | 
135 | 		y_train = f['train_set']['y_train'][output_indexes]
136 | 		f.close()  
137 | 		
138 |         # convert to sequence data
139 | 		X_train1, X_train2, X_train3, X_train4, y_train = self.to_sequence(X_train1, X_train2, X_train3, X_train4, y_train)
140 |      
141 | 		return (X_train1, X_train2, X_train3, X_train4), y_train
142 | 
143 | training_generator = DataGenerator(dataset_name = dataset_name, x_length = x_len, y_length = y_len,  **params)
144 | 
145 | ###########################################_____MODEL_ARCHITECTURE_____#################################################
146 | 
147 | # cpature some more useful dimensions
148 | Tx = input_seq_size
149 | Ty = output_seq_size
150 | 
151 | channels = features.shape[-1]
152 | 
153 | times_in_dim = times_in.shape[-1]
154 | times_out_dim = times_out.shape[-1]
155 | 
156 | # make custom activation - swish
157 | def swish(x, beta = 1):
158 | 	return (x * sigmoid(beta * x))
159 | 
160 | # add swish activation to keras
161 | get_custom_objects().update({'swish': Activation(swish)})
162 |   
163 | # define inputs for model
164 | x_input = Input(shape=(Tx, channels))
165 | 
166 | times_in = Input(shape=(Tx, times_in_dim))
167 | times_out = Input(shape=(Ty, times_out_dim))
168 | out_nwp = Input(shape=(Ty, channels-1))
169 | s_state0 = Input(shape=(32,))
170 | c_state0 = Input(shape=(32,))
171 | 
172 | # create empty list for outputs
173 | quantile_predictions = []
174 | temporal_attns = [] 
175 | 
176 | for q in quantiles: 
177 | 
178 | 	combined_inputs = concatenate([x_input, times_in], axis=-1, name=f'concat_q_{q}')
179 | 
180 | 	encoder_output, forward_h, forward_c, backward_h, backward_c = Bidirectional(LSTM(32, return_sequences = False, return_state = True), name=f'biLSTM_q_{q}')(combined_inputs)
181 | 	repeat_layer = RepeatVector(48)(encoder_output)
182 | 
183 | 	enc_h = concatenate([forward_h, backward_h])
184 | 	enc_s = concatenate([backward_h, backward_h])
185 | 
186 | 	decoder_input = concatenate([repeat_layer, times_out])
187 | 	decoder_input = concatenate([decoder_input, out_nwp])
188 | 
189 | 	decoder_out, _, _  = LSTM(64, return_sequences = True, return_state = True, name=f'decoder_LSTM_q_{q}')(decoder_input, initial_state = [enc_h, enc_s])
190 | 
191 | 	dense_out = TimeDistributed(Dense(1, name=f'dense_q_{q}'))(decoder_out)
192 | 
193 | 	if model_type == 'solar':
194 | 		dense_out = Activation('relu', name=f'relu_act_q_{q}')(dense_out)
195 | 
196 | 	quantile_predictions.append(dense_out)
197 | 
198 | model = Model(inputs = [x_input, times_in, times_out, out_nwp], outputs = quantile_predictions)
199 | 
200 | 
201 | ###########################################_____MODEL_TRAINING_____#################################################
202 | 
203 | #include clipvalue in optmisier
204 | optimizer = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
205 | 
206 | # define loss for each quantile
207 | q_losses = [lambda y,f: K.mean(K.maximum(q*(y - f), (q-1) * (y - f)), axis = -1) for q in quantiles]
208 | 
209 | # append additional empty losses for temporal and spatial encoders
210 | # q_losses.append([None,None])
211 | 
212 | # compile and train model
213 | model.compile(loss = q_losses, optimizer= optimizer)
214 | print(model.summary())
215 | model.fit(training_generator, epochs = 20)
216 | 
217 | # save models - saving encoders individually for inference
218 | os.mkdir(f'../../models/{model_type}')
219 | model.save(f'../../models/{model_type}/{model_type}_seq2seq.h5')
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 


--------------------------------------------------------------------------------
/scripts/postprocessing/d3_scripts/forecasting_graph.js:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | function prob_forecast(file, ref, color_array) {
  4 | 
  5 |   
  6 |   // set the dimensions and margins of the graph
  7 |   var margin = {top: 10, right: 0, bottom: 50, left: 80},
  8 |       width = 1000 - margin.left - margin.right,
  9 |       height = 600 - margin.top - margin.bottom;
 10 | 
 11 | 
 12 | 
 13 |   // append the svg object to the body of the page
 14 |   var svg = d3.select("#my_dataviz_" + ref)
 15 |     .append("svg")
 16 |       // .attr("width", width + margin.left + margin.right)
 17 |       // .attr("height", height + margin.top + margin.bottom)
 18 |       .attr("width", "100%")
 19 |       .attr("height", "100%")
 20 |       .attr("viewBox", "0 0 1000 600")
 21 |       .attr("preserveAspectRatio", "xMinYMin meet")
 22 |     .append("g")
 23 |       .attr("transform",
 24 |             "translate(" + margin.left + "," + margin.top + ")");
 25 | 
 26 |   // var svg = d3.select("#my_dataviz")
 27 |   //     .append("svg")
 28 |   //     .attr("width", "50%")
 29 |   //     .attr("height", "50%")
 30 |   //     .attr("viewBox", "0 0 740 800");
 31 | 
 32 |   svg.append("rect")
 33 |       .attr("x",0)
 34 |       .attr("y",0)
 35 |       .attr("height", height)
 36 |       .attr("width", width)
 37 |       .style("fill","#DEDEDE") //EBEBEB
 38 |       .style("stroke","none")
 39 |       .style("opacity", 0.3)
 40 | 
 41 |   // svg.append('text')
 42 |   //     .attr("x",width/2)
 43 |   //     .attr("y",height/2)
 44 |   //     .attr('font-family', 'FontAwesome')
 45 |   //     .attr('font-size', 100)
 46 |   //     .text(function(d) { return '\uf185' })
 47 |   //     .style("fill","white")
 48 |   //     .style("opacity", 0.4) ; 
 49 | 
 50 | 
 51 | 
 52 | 
 53 |   // Parse the Data
 54 |   d3.csv(file, 
 55 | 
 56 |   function(d){
 57 |     return { date: d3.timeParse("%d/%m/%Y %H:%M")(d.Datetime), 
 58 |              one: d.q_05 = +d.q_05,
 59 |              second: d.q_15 = +d.q_15,
 60 |              third: d.q_25 = +d.q_25,
 61 |              fourth: d.q_35 = +d.q_35,
 62 |              five: d.q_5 = +d.q_5,
 63 |              six: d.q_65 = +d.q_65,
 64 |              seven: d.q_75 = +d.q_75,
 65 |              eight: d.q_85 = +d.q_85,
 66 |              nine: d.q_95 = +d.q_95,
 67 |              actual: d.actual = +d.actual,
 68 |     }
 69 |   },
 70 | 
 71 |   function(data) {
 72 | 
 73 |     // data.forEach(function(d) {
 74 |     //   d.actual= +d.actual;
 75 |     //   d.five= +d.five;
 76 |     //   d.date = +d.date;
 77 |     //   // d.Datetime = d3.timeParse(d.Datetime);
 78 |     // });
 79 | 
 80 |     //declare parse dates
 81 |     var parseDate = d3.timeParse("%A");
 82 | 
 83 | 
 84 | 
 85 |     // List of groups = header of the csv files
 86 |     var keys = data.columns.slice(1)
 87 | 
 88 | 
 89 | 
 90 |     // Add X axis
 91 |     var x = d3.scaleTime()
 92 |       .domain(d3.extent(data, function(d) { return d.date; }))
 93 |       .range([ 0, width ])
 94 | 
 95 |     svg.append("g")
 96 |       .attr("transform", "translate(0," + height + ")")
 97 |       .call(d3.axisBottom(x).tickFormat(d3.timeFormat(parseDate)).tickSizeInner(-height).tickSizeOuter(0).ticks(7).tickPadding(20)) //.tickFormat(d3.timeFormat(parseDate))
 98 |       .selectAll(".tick text")
 99 |       .attr("transform", "translate(" + (width / 7) / 2 + ",0)")
100 |       .style("text-transform", "uppercase")
101 |       .style("font-size", "16px")
102 |       .style("opacity", 0.5)
103 |       // .tickArguments([5])
104 |       // .tickCenterLabel(true)
105 |       .select(".domain").remove()
106 | 
107 |     svg.append("g")
108 |       .attr("transform", "translate(0," + height + ")")
109 |       .call(d3.axisBottom(x).tickFormat(d3.timeFormat("(%d/%m/%y)")).tickSizeInner(-height).tickSizeOuter(0).ticks(7).tickPadding(20)) //.tickFormat(d3.timeFormat(parseDate))
110 |       .selectAll(".tick text")
111 |       .attr("transform", "translate(" + (width / 7) / 2 + ",17)")
112 |       .style("text-transform", "uppercase")
113 |       .style("font-size", "14px")
114 |       .style("font-style", "italic")
115 |       .style("opacity", 1)
116 |       .select(".domain").remove()
117 | 
118 | 
119 |     // x-axis mini tick marks
120 |     // d3.svg.axis()
121 |     // .scale()
122 |     //     .orient('bottom')
123 |     //     .tickFormat('')
124 |     //     .tickSize(30)
125 |     //     .tickPadding(6)
126 | 
127 | 
128 |     // Add X axis label:
129 |     svg.append("text")
130 |         .attr("text-anchor", "middle")
131 |         .attr("x", width/2)
132 |         .attr("y", height + margin.top + 30)
133 |         // .text("Day")
134 |         .style("font", "12px arial")
135 | 
136 | 
137 |     // Add Y axis label:
138 |     if (ref === "price") {
139 |     svg.append("text")
140 |         .attr("text-anchor", "end")
141 |         // .attr("y", +margin.left)
142 |         // .attr("x",  -margin.top + height/2)
143 |         .attr("y", -margin.left + 35)
144 |         .attr("x", -height/2 + 60)
145 |         .text(ref +" (£/MW)")
146 |         .style("font", "14px arial")
147 |         .style("text-transform", "uppercase")
148 |         // .attr("transform",
149 |         //     "translate(" + (height/2) + ")")
150 |         .attr("transform", "rotate(-90)");
151 |     } else if (ref === "demand") {
152 |     svg.append("text")
153 |         .attr("text-anchor", "end")
154 |         // .attr("y", +margin.left)
155 |         // .attr("x",  -margin.top + height/2)
156 |         .attr("y", -margin.left + 35)
157 |         .attr("x", -height/2 + 60)
158 |         .text(ref +" Demand (GW)")
159 |         .style("font", "14px arial")
160 |         .style("text-transform", "uppercase")
161 |         // .attr("transform",
162 |         //     "translate(" + (height/2) + ")")
163 |         .attr("transform", "rotate(-90)");
164 |     } else {
165 |     svg.append("text")
166 |         .attr("text-anchor", "end")
167 |         // .attr("y", +margin.left)
168 |         // .attr("x",  -margin.top + height/2)
169 |         .attr("y", -margin.left + 35)
170 |         .attr("x", -height/2 + 95)
171 |         .text(ref +" Generation (GW)")
172 |         .style("font", "14px arial")
173 |         .style("text-transform", "uppercase")
174 |         // .attr("transform",
175 |         //     "translate(" + (height/2) + ")")
176 |         .attr("transform", "rotate(-90)");
177 |     }
178 | 
179 |     // Add Y axis
180 |     var y = d3.scaleLinear()
181 |       .domain([d3.min(data, function(d) { return +d.one; }) * 0.95, d3.max(data, function(d) { return +d.nine; }) * 1.05])
182 |       .range([ height, 0 ])
183 |     svg.append("g")
184 |       .call(d3.axisLeft(y).tickSizeInner(-width).ticks(8).tickPadding(12.5))
185 |       .style("font", "15px arial")
186 |       .select(".domain").remove();
187 |     svg.selectAll(".tick line").attr("stroke", "white").attr('stroke-width',1)
188 | 
189 | 
190 | 
191 |     // group the data
192 |     var sumstat = d3.nest()
193 |       .key(function(d) { return d.name;})
194 |       .entries(data);
195 | 
196 |     //stack the data
197 |     var stackedData = d3.stack()
198 |       // .offset(d3.stackOffsetSilhouette)
199 |       .keys(keys)
200 |       // .value(function(d, key){
201 |       //   return d.values[key]
202 |       // })
203 |       (data)
204 |       console.log(stackedData.keys)
205 | 
206 |     // create a tooltip
207 |     var Tooltip = svg
208 |       .select("#my_dataviz_" + ref)
209 |       .append("text")
210 |       .attr("x", 0)
211 |       .attr("y", 0)
212 |       .style("opacity", 0)
213 |       .style("font-size", 17)
214 | 
215 |     // Three function that change the tooltip when user hover / move / leave a cell
216 |     var mouseover = function(d) {
217 | 
218 |       Tooltip.style("opacity", 0.5)
219 |       d3.selectAll(".myArea").style("opacity", .2)
220 |       d3.select(this)
221 |         .style("stroke", "black")
222 |         .style("opacity", 0.5)
223 |     }
224 |     var mousemove = function(d,i) {
225 |       grp = keys[i]
226 |       Tooltip.text(grp)
227 |     }
228 | 
229 |     var mouseleave = function(d) {
230 |       Tooltip.style("opacity", 0)
231 |       d3.selectAll(".myArea").style("opacity", 0.5).style("stroke", "none")
232 |     }
233 | 
234 |     // Area generator
235 |     var area = d3.area()
236 |       .curve(d3.curveMonotoneX)
237 |       .x(function(d) { return x(d.data.date); })
238 |       .y0(function(d) { return y(d.data.one); })
239 |       .y1(function(d) { return y(d.data.nine); })
240 | 
241 |     // Area generator
242 |     var area2 = d3.area()
243 |       .curve(d3.curveMonotoneX)
244 |       .x(function(d) { return x(d.data.date); })
245 |       .y0(function(d) { return y(d.data.second); })
246 |       .y1(function(d) { return y(d.data.eight); })
247 | 
248 |     // Area generator
249 |     var area3 = d3.area()
250 |       .curve(d3.curveMonotoneX)
251 |       .x(function(d) { return x(d.data.date); })
252 |       .y0(function(d) { return y(d.data.third); })
253 |       .y1(function(d) { return y(d.data.seven); })
254 | 
255 |     // Area generator
256 |     var area4 = d3.area()
257 |       .curve(d3.curveMonotoneX)
258 |       .x(function(d) { return x(d.data.date); })
259 |       .y0(function(d) { return y(d.data.fourth); })
260 |       .y1(function(d) { return y(d.data.six); })
261 | 
262 |     // Area generator
263 |     var line = d3.line()
264 |       // .curve(d3.curveMonotoneX)
265 |       .x(function(d) { return x(d.data.date); })
266 |       .y(function(d) { return y(d.data.actual); })
267 |       
268 | 
269 |     // Area generator
270 |     var line2 = d3.line()
271 |       .curve(d3.curveMonotoneX)
272 |       .x(function(d) { return x(d.data.date); })
273 |       .y(function(d) { return y(d.data.five); })
274 | 
275 |     // Area generator
276 |     var line3 = d3.line()
277 |       .curve(d3.curveMonotoneX)
278 |       .x(function(d) { return x(d.data.date); })
279 |       .y(function(d) { return y(d.data.one); })
280 | 
281 |     // Area generator
282 |     var line4 = d3.line()
283 |       .curve(d3.curveMonotoneX)
284 |       .x(function(d) { return x(d.data.date); })
285 |       .y(function(d) { return y(d.data.nine); })
286 |       
287 |     //  graph colors
288 |     var legendColors = d3.scaleOrdinal().range(color_array)
289 | 
290 |     var areas = [area, area2]
291 | 
292 |     // var line = d3.svg.line()
293 |     //   .x(function(d) { return x(d.data.date) })
294 |     //   .y(function(d) { return y(y(d[0])); });
295 | 
296 |     // d3.selectAll('.line')
297 |     //   .attr("d", line)
298 | 
299 |     // Show the areas
300 |     svg
301 |       .selectAll("mylayers")
302 |       .data(stackedData)
303 |       .enter()
304 |       .append("path")
305 |         .attr("class", "myArea")
306 |         .style("fill",legendColors(0))
307 |         .attr("d", area)
308 |         .attr("clip-path", "url(#clip)");
309 |         // .on("mouseover", mouseover)
310 |         // // .on("mousemove", mousemove)
311 |         // .on("mouseleave", mouseleave)
312 |         // .attr("fill-opacity","0.3")
313 | 
314 |     svg
315 |       .selectAll("mylayers")
316 |       .data(stackedData)
317 |       .enter()
318 |       .append("path")
319 |         .attr("class", "myArea")
320 |         .style("fill" ,legendColors(1))
321 |         .attr("d", area2)
322 |         .attr("clip-path", "url(#clip)");
323 |         // .on("mouseover", mouseover)
324 |         // // .on("mousemove", mousemove)
325 |         // .on("mouseleave", mouseleave)
326 |         // .attr("fill-opacity","0.5")
327 | 
328 |     svg
329 |       .selectAll("mylayers")
330 |       .data(stackedData)
331 |       .enter()
332 |       .append("path")
333 |         .attr("class", "myArea")
334 |         .style("fill",legendColors(2))
335 |         // .attr("fill-opacity","0.9")
336 |         .attr("d", area3)
337 |         .attr("clip-path", "url(#clip)");
338 |         // .on("mouseover", mouseover)
339 |         // // .on("mousemove", mousemove)
340 |         // .on("mouseleave", mouseleave)
341 |         
342 |     var area4 = svg
343 |       .selectAll("mylayers")
344 |       .data(stackedData)
345 |       .enter()
346 |       .append("path")
347 |         .attr("class", "myArea")
348 |         .style("fill", legendColors(3))
349 |         // .attr("fill-opacity","0.5")
350 |         .attr("d", area4)
351 |         .attr("clip-path", "url(#clip)");
352 |         // .on("mouseover", mouseover)
353 |         // // .on("mousemove", mousemove)
354 |         // .on("mouseleave", mouseleave)
355 | 
356 | 
357 | 
358 |     var totalLength = 50000
359 |     var totalLength2 = area4.node().getTotalLength();
360 | 
361 |     // mean predictions 
362 |     var path2 = svg
363 |       .selectAll("mylayers")
364 |       .data(stackedData)
365 |       .enter()
366 |       .append("path")
367 |         .attr("class", "test-line")
368 |         .style("fill", 'none')
369 |         .attr("stroke", "white")
370 |         .attr("stroke-width", 0.05)
371 |         .attr("clip-path", "url(#clip)")
372 |         .attr("d", line2)
373 | 
374 |     var path3 = svg
375 |       .selectAll("mylayers")
376 |       .data(stackedData)
377 |       .enter()
378 |       .append("path")
379 |         .attr("class", "test-line")
380 |         .style("fill", 'none')
381 |         .attr("stroke", legendColors(4))
382 |         .attr("stroke-width", 0)
383 |         .attr("clip-path", "url(#clip)")
384 |         .attr("d", line3)
385 | 
386 |     var path4 = svg
387 |       .selectAll("mylayers")
388 |       .data(stackedData)
389 |       .enter()
390 |       .append("path")
391 |         .attr("class", "test-line")
392 |         .style("fill", 'none')
393 |         .attr("stroke", legendColors(4))
394 |         .attr("stroke-width", 0)
395 |         .attr("clip-path", "url(#clip)")
396 |         .attr("d", line4)
397 | 
398 |     // var clip = svg.append("clipPath")
399 |     //   .attr("id", "clip");
400 |     // var clipRect = clip.append("rect")
401 |     //   .attr("width", 100)
402 |     //   .attr("height", height)
403 | 
404 |     // clipRect
405 |     //   .transition()
406 |     //     .delay(1000)
407 |     //     .duration(8000)
408 |     //     .ease(d3.easeLinear)
409 |     //     .attr("width", width)
410 | 
411 |     // path
412 |     //   .attr("stroke-dasharray", totalLength + " " + totalLength)
413 |     //   .attr("stroke-dashoffset", totalLength)
414 |       // .transition()
415 |       //   .duration(9000)
416 |       //   .ease(d3.easeLinear)
417 |       //   .attr("stroke-dashoffset", 0)
418 |       //   .on("end")
419 | 
420 | 
421 |     // legend
422 |     var count = ['1','2','3','4','5','6'] 
423 |     var legendKeys = d3.scaleOrdinal().range(['Quantile 5 - 95', 'Quantile 15 - 85', 'Quantile 25 - 75', 'Quantile 35 - 65', 'Mean', 'Actual']);
424 |     
425 | 
426 |     // Add one dot in the legend for each name.
427 |     var size = 12.5
428 |     svg.selectAll("myrects")
429 |       .data(count)
430 |       .enter()
431 |       .append("rect")
432 |         .attr("x", width - 150)
433 |         .attr("y", function(d,i){ if(i < 4) {return 20 + i*(size+10)}; if(i >= 4) {return 25 + i*(size+10)}; }) 
434 |         .attr("width", size)
435 |         .attr("height", function(d,i){ if(i < 4) {return size}; if(i >= 4) {return size/5}; })
436 |         .style("fill", function(d, i){ return legendColors(i) })
437 | 
438 |     // Add one dot in the legend for each name.
439 |     svg.selectAll("mylabels")
440 |       .data(count)
441 |       .enter()
442 |       .append("text")
443 |         .attr("x", (width - 150) + size*1.5)
444 |         .attr("y", function(d,i){ return 20 + i*(size+10.25) + (size/2)})
445 |         .style("fill", '#000000')
446 |         .text(function(d, i){ return legendKeys(i)})
447 |         .style("font", "14px arial")
448 |         .style("fill", "grey")
449 |         // .style("text-transform", "uppercase")
450 |         .attr("text-anchor", "left")
451 |         .style("alignment-baseline", "middle")
452 | 
453 |             // actual, measured data
454 |     var path = svg
455 |       .selectAll("mylayers")
456 |       .data(stackedData)
457 |       .enter()
458 |       .append("path")
459 |         .attr("class", "test-line")
460 |         .style("fill", 'none')
461 |         .attr("stroke", '#1c2f33') //D21404
462 |         .attr("stroke-width", 0.15)
463 |         .attr("stroke-opacity", 0.9)
464 |         .attr("d", line)
465 | 
466 |   // create cursor highlight //////////////////////////////////////
467 | 
468 |      var mouseG = svg
469 |         .append("g")
470 |         .attr("class", "mouse-over-effects");
471 | 
472 |       mouseG
473 |         .append("path") // this is the black vertical line to follow mouse
474 |         .attr("class", "mouse-line")
475 |         .style("stroke", "#393B45") //6E7889
476 |         .style("stroke-width", "0.5px")
477 |         .style("opacity", 0.75)
478 | 
479 |       mouseG.append("text")
480 |         .attr("class", "mouse-text")
481 |         // .style("font-size", "200%")
482 |         // .text("test")
483 |         .style("opacity", 0)
484 | 
485 |       // var lines = document.getElementsByClassName('line');
486 |       var lines = [path, path3, path4]
487 | 
488 |       var mousePerLine = mouseG.selectAll('.mouse-per-line')
489 |         .data(data)
490 |         .enter()
491 |         .append("g")
492 |         .attr("class", "mouse-per-line");
493 | 
494 |       var res = sumstat.map(function(d){ return d.key })
495 |       var color = d3.scaleOrdinal()
496 |             .domain(res)
497 |             .range(['darkblue','darkblue','darkblue','darkblue'])
498 | 
499 | 
500 |       mousePerLine.append("circle")
501 |         .attr("r", 7)
502 |         .style("stroke", function(d, i) {
503 |           return color(i);
504 |         })
505 |         .style("fill", "none")
506 |         .style("stroke-width", "1px")
507 |         .style("opacity", "0");
508 | 
509 |       mousePerLine.append("text")
510 |         .attr("transform", "translate(10,3)");
511 | 
512 |       mousePerLine.append("text")
513 |         .attr("class", "timetext");
514 | 
515 |       mouseG
516 |         .append('svg:rect') // append a rect to catch mouse movements on canvas
517 |         .attr('width', width) // can't catch mouse events on a g element
518 |         .attr('height', height)
519 |         .attr('fill', 'none')
520 |         .attr('pointer-events', 'all')
521 |         .on('mouseout touchout', function() { // on mouse out hide line, circles and text
522 |           d3.select("#my_dataviz_" + ref)
523 |             .select(".mouse-line ")
524 |             .style("opacity", "0" );
525 |           d3.select("#my_dataviz_" + ref)
526 |             .select(".mouse-text")
527 |             .style("opacity", "0");
528 |           d3.select("#my_dataviz_" + ref)
529 |             .selectAll(".mouse-per-line circle")
530 |             .style("opacity", "0");
531 |           d3.select("#my_dataviz_" + ref)
532 |             .selectAll(".mouse-per-line text")
533 |             .style("opacity", "0")
534 |         })
535 |         .on('mouseover touchover', function() { // on mouse in show line, circles and text
536 |           d3.select("#my_dataviz_" + ref)
537 |             .select(".mouse-line")
538 |             .style("opacity", "1");
539 |           d3.select("#my_dataviz_" + ref)
540 |             .select(".mouse-text")
541 |             .style("opacity", "1");
542 |           // d3.selectAll(".mouse-per-line circle")
543 |           //   .style("opacity", "1");
544 |           d3.select("#my_dataviz_" + ref)
545 |             .selectAll(".mouse-per-line text" )
546 |             .style("opacity", "1");
547 | 
548 |         })
549 |         .on('mousemove touchmove', function() { // mouse moving over canvas
550 |           var mouse = d3.mouse(this);
551 |           d3.select("#my_dataviz_" + ref)
552 |             .select(".mouse-text")
553 |             .attr("x", mouse[0])
554 |             .attr("transform", "translate(10,30)")
555 |           d3.select("#my_dataviz_" + ref)
556 |             .select(".mouse-line")
557 |             .attr("d", function() {
558 |               var d = "M" + mouse[0] + "," + height;
559 |               d += " " + mouse[0] + "," + 0;
560 |               return d;
561 |             })
562 | 
563 | 
564 |           d3.select("#my_dataviz_" + ref)
565 |             .selectAll(".mouse-per-line")
566 |               .attr("transform", function(d, i) {
567 |                 if (i >= 4){ return null };
568 | 
569 |                 var xDate = x.invert(mouse[0])
570 |                 time = d3.timeFormat("%H:%M %p")(xDate)
571 | 
572 |                 // bisect = d3.bisector(function(d) { return d.date; }).left;
573 |                 // idx = bisect(data, xDate, 1);
574 |                 
575 |                 var beginning = 0,
576 |                     // end = lines[i].node().getTotalLength()
577 |                     end = totalLength
578 |                     target = null;
579 | 
580 |                 while (true){
581 | 
582 |                   target = Math.floor((beginning + end) / 2);
583 |                   pos = lines[i].node().getPointAtLength(target);
584 |                   // pos = target;
585 |                   if ((target === end || target === beginning) && pos.x !== mouse[0]) {
586 |                       break;
587 |                   }
588 |                   if (pos.x > mouse[0])      end = target;
589 |                   else if (pos.x < mouse[0]) beginning = target;
590 | 
591 |                   else break; //position found
592 |                 }
593 | 
594 |                 if (ref == 'price') {
595 |                   unit = ' £/MWh'
596 |                 } else {
597 |                   unit = ' GW'
598 |                 }
599 | 
600 |                 if (i === 0) {
601 |                 d3.select(this).select('text')
602 |                   .text(y.invert(pos.y).toFixed(1) + unit) 
603 |                   .attr("transform", "translate(10,0)")
604 |                   .style("font", "18px arial")
605 |                   .style('fill', 'blue')
606 |                 }  else {
607 |                 d3.select(this).select('text')
608 |                   .text(y.invert(pos.y).toFixed(1) + unit) 
609 |                   .attr("transform", "translate(-75,0)")
610 |                   .style("font", "16px arial")
611 |                   .style('fill', 'black');
612 |                 }
613 | 
614 |                 d3.select(this).select('circle')
615 |                   .style("opacity", 1)
616 |                 var parseDate = d3.timeParse("%a %d");
617 |                 var timestamp = d3.select("#my_dataviz_" + ref).select('.mouse-text')
618 |                   .text(time)
619 |                   .style("opacity", 0.5)
620 |                   .style("text-transform", "uppercase")
621 |                   .style("font", "arial")
622 |                   .style("font-size", "22.5px")
623 | 
624 |                 return "translate(" + mouse[0] + "," + pos.y +")";
625 |               });
626 |         })
627 | 
628 | 
629 |     // Add Y line:
630 |     svg.append("line")
631 |         // .attr("transform", "rotate(-90)")
632 |         .attr("y1", height)
633 |         .attr("x1",  0)
634 |         .style("stroke-width", 1)
635 |         .style("stroke", "#263238")
636 | 
637 |     // Add X line:
638 |     svg.append("line")
639 |         // .attr("transform", "rotate(-90)")
640 |         .attr("y1", height)
641 |         .attr("x1", 0)
642 |         .attr("y2", height)
643 |         .attr("x2",  width)
644 |         .style("stroke-width", 1)
645 |         .style("stroke", "#263238")
646 | 
647 | 
648 |     //add minor tick marks to x-axis
649 |     var m 
650 |     for (m = 0; m < width; ){
651 |       svg.append("line")
652 |       .attr("y1", height) 
653 |       .attr("x1", m )
654 |       .attr("y2", height + 5)
655 |       .attr("x2", m )
656 |       .style("stroke-width", 1)
657 |       .style("stroke", "#263238")
658 |       .style("opacity", 0.5);
659 |       m = m + (width / 167.5 )
660 |     }
661 | 
662 |     //add main tick marks to x-axis
663 |     var i 
664 |     for (i = (width / 7); i < width; i++){
665 |       svg.append("line")
666 |       .attr("y1", height) 
667 |       .attr("x1", i )
668 |       .attr("y2", height + 20)
669 |       .attr("x2", i )
670 |       .style("stroke-width", 1.5)
671 |       .style("stroke", "#263238");
672 |       i = i + (width / 7) - 0.5
673 |     }
674 | 
675 |     //add noon tick marks to x-axis
676 |     var n 
677 |     for (n = (width / 14); n < width; n++){
678 |       svg.append("line")
679 |       .attr("y1", height) 
680 |       .attr("x1", n )
681 |       .attr("y2", height + 12)
682 |       .attr("x2", n )
683 |       .style("stroke-width", 1.5)
684 |       .style("stroke", "#263238");
685 |       n = n + (width / 7) - 0.5 
686 |     }
687 | 
688 |     //add main tick marks to x-axis
689 |     var i 
690 |     for (i = (width / 7); i < width; i++){
691 |       svg.append("line")
692 |       .attr("y1", height) 
693 |       .attr("x1", i )
694 |       .attr("y2", 0)
695 |       .attr("x2", i )
696 |       .style("stroke-width", 0.5)
697 |       .style("stroke-dasharray", ("3, 3"))
698 |       .style("stroke", "#263238");
699 |       i = i + (width / 7) - 0.5
700 |     }
701 | 
702 | 
703 | 
704 | 
705 |     //add y-axis tick marks to y-axis
706 |     // var u 
707 |     // for (u = 0; u < height; u++){
708 |     //   svg.append("line")
709 |     //   .attr("y1", u) 
710 |     //   .attr("x1", -5)
711 |     //   .attr("y2", u)
712 |     //   .attr("x2", 0)
713 |     //   .style("stroke-width", 1.0)
714 |     //   .style("stroke", "#263238");
715 |     //   u = u + (height / 9) - 1
716 |     // }
717 | 
718 |   })
719 | }
720 | 


--------------------------------------------------------------------------------
/scripts/postprocessing/format_results_Qforecast_plot.py:
--------------------------------------------------------------------------------
 1 | # format prediction results for qunatile forecasting d3 plot
 2 | import numpy as np
 3 | import pandas as pd
 4 | from datetime import datetime, timedelta
 5 | import csv
 6 | from pickle import load
 7 | from sklearn.preprocessing import MinMaxScaler
 8 | 
 9 | 
10 | 
11 | # declare model type
12 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal, seq2seq+temporal+spatial
13 | 
14 | # forecasting model
15 | forecast_var = 'price' 
16 | 
17 | # select start example index reference, 7-days plotted from here
18 | ex_idx = 4
19 | 
20 | # load prediction data
21 | with open(f'../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'rb') as forecast_data:
22 | 	predictions = load(forecast_data)
23 | 
24 | print(len(predictions['0.5']))	
25 | 
26 | # get start date  
27 | out_start_time = predictions['time_refs']['output_times'][ex_idx][0]
28 | 
29 | print(out_start_time)
30 | 
31 | # produce date range for week-long predictions
32 | ouput_sequence_len = 336 # (Half-Hours)
33 | input_num_of_days = ouput_sequence_len / 48
34 | # start_date = datetime.strptime(str(out_start_time)[:10], "%Y-%m-%d")
35 | # out_date_range = pd.date_range(start=start_date, end=start_date + timedelta(days=input_num_of_days) , freq="30min")[:-1]# remove HH entry form unwanted day
36 | 
37 | out_start_time = predictions['time_refs']['output_times'][ex_idx:ex_idx+int(input_num_of_days)]
38 | out_date_range = pd.to_datetime(out_start_time.ravel(), format='%Y-%m-%d')
39 | 
40 | # index ref
41 | idx_ref = [x for x in range(1, ouput_sequence_len+1)]
42 | 
43 | # final params for df 
44 | final_params = {'year': idx_ref, 
45 | 				'Datetime': out_date_range }
46 | 
47 | # loop through to write results for each quantile
48 | for q in list(predictions.keys())[:-2]:
49 | 
50 | 	final_params[f'q_{q[2:]}'] = predictions[str(q)][ex_idx:ex_idx+7, :, 0].reshape((-1))
51 | 
52 | # add actual values for reference
53 | final_params['actual'] = predictions['y_true'][ex_idx:ex_idx+7, :, 0].reshape((-1))
54 | 
55 | print(final_params.keys())
56 | 
57 | # convert to pandas df
58 | df = pd.DataFrame(dict([(keys ,pd.Series(values, dtype = 'object')) for keys, values in final_params.items()])) # set all as objects to avoid warning on empty cells
59 | 
60 | # divide to GW
61 | if forecast_var != "price":
62 | 	df.iloc[:,2:] = df.iloc[:,2:] / 1000
63 | 
64 | # copy to clipboard
65 | df.to_clipboard()
66 | 
67 | # save data to file
68 | df.to_csv(f'../../results/{forecast_var}/{model_type}/quantile_prediction_results_{forecast_var}_{model_type}.csv', index=False)
69 | 
70 | 


--------------------------------------------------------------------------------
/scripts/postprocessing/format_results_attn_plot.py:
--------------------------------------------------------------------------------
  1 | # format attention results for context d3 plot
  2 | import numpy as np
  3 | import pandas as pd
  4 | from datetime import datetime, timedelta
  5 | import csv
  6 | from pickle import load
  7 | from sklearn.preprocessing import MinMaxScaler
  8 | 
  9 | # forecasting model
 10 | type = 'solar' # 'wind', 'solar', 'price', 'demand'
 11 | 
 12 | # select example refernce
 13 | ex_idx = 26
 14 | 
 15 | # load attention data 
 16 | if type != "price":
 17 | 	with open(f'../../results/{type}/seq2seq+temporal+spatial/attention_data_{type}_seq2seq+temporal+spatial.pkl', 'rb') as attention_data:
 18 | 		attention_results = load(attention_data)
 19 | 
 20 | 	# load prediction data
 21 | 	with open(f'../../results/{type}/seq2seq+temporal+spatial/forecasted_time_series_{type}_seq2seq+temporal+spatial.pkl', 'rb') as forecast_data:
 22 | 		predictions = load(forecast_data)
 23 | else:
 24 | 	with open(f'../../results/{type}/seq2seq+temporal/attention_data_{type}_seq2seq+temporal.pkl', 'rb') as attention_data:
 25 | 		attention_results = load(attention_data)
 26 | 
 27 | 	# load prediction data
 28 | 	with open(f'../../results/{type}/seq2seq+temporal/forecasted_time_series_{type}_seq2seq+temporal.pkl', 'rb') as forecast_data:
 29 | 		predictions = load(forecast_data)
 30 | 
 31 | 
 32 | print(attention_results.keys())
 33 | 
 34 | # get start dates for inputs and outputs
 35 | in_start_time = attention_results['time_refs']['input_times'][ex_idx][0]
 36 | out_start_time = attention_results['time_refs']['output_times'][ex_idx][0]
 37 | 
 38 | # log start date of selected index
 39 | print(f'input time start date: {in_start_time}')
 40 | print(f'output time start date: {out_start_time}')
 41 | 
 42 | # input data for reference
 43 | if type != 'price':
 44 | 	input_data = np.average(attention_results['input_features'][ex_idx, :, :, :, 0], axis=(1,2))
 45 | else:
 46 | 	input_data = attention_results['input_features'][ex_idx, :, -1:]
 47 | 
 48 | 
 49 | # get prediction result for current index
 50 | current_prediction = predictions['0.5'][ex_idx, :, 0]
 51 | 
 52 | # attention values for current index
 53 | current_attention_vals = attention_results['0.5'][ex_idx]
 54 | 
 55 | attention_vals = np.empty((current_attention_vals.shape[0] * current_attention_vals.shape[1]))
 56 | 
 57 | # make sure attention values are in correct format
 58 | iidx = 0
 59 | for idx in range(current_attention_vals.shape[0]):
 60 | 	attention_vals[iidx:iidx+48] = current_attention_vals[idx, :]
 61 | 	iidx += 48
 62 | 
 63 | # input params
 64 | input_sequence_len = 336 
 65 | input_num_of_days = input_sequence_len / 48
 66 | start_date = datetime.strptime(str(in_start_time)[:10], "%Y-%m-%d")
 67 | target_data =  datetime.strptime(str(out_start_time)[:10], "%Y-%m-%d")
 68 | input_date_range = pd.date_range(start=start_date, end=start_date + timedelta(days=input_num_of_days) , freq="30min")[:-1]# remove HH entry form unwanted day
 69 | 
 70 | # out_start_time = predictions['time_refs']['output_times'][ex_idx:ex_idx+int(input_num_of_days)]
 71 | # input_date_range = pd.to_datetime(out_start_time.ravel(), format='%Y-%m-%d')
 72 | 
 73 | # create index values
 74 | group_index = [48 * [idx] for idx in range(input_sequence_len)]
 75 | variable_index = [[idx for idx in range(48)] for iidx in range(input_sequence_len)]
 76 | 
 77 | # flatten lists if lists
 78 | group_index = sum(group_index, [])
 79 | variable_index = sum(variable_index, [])
 80 | 
 81 | # create data ranges
 82 | group = [48 * [date_time] for date_time in input_date_range]
 83 | variable =  [pd.date_range(start=target_data, end=target_data + timedelta(days=1) , freq="30min").tolist()[:-1] for idx in range(input_sequence_len)] # remove HH entry for next day
 84 | 
 85 | # flatten timestamps into single list
 86 | group = sum(group, [])
 87 | variable = sum(variable, [])
 88 | 
 89 | # create output time idxs
 90 | output_time_ref = [idx for idx in range(48)]
 91 | 
 92 | # create input time idxs
 93 | input_time_ref = [idx for idx in range(input_sequence_len)]
 94 | 
 95 | # input times
 96 | input_time = [date_time for date_time in input_date_range]
 97 | 
 98 | # output times
 99 | output_time =  pd.date_range(start=target_data, end=target_data + timedelta(days=1) , freq="30min").tolist()[:-1]
100 | 
101 | # load and apply scaler
102 | # load scaler 
103 | scaler = load(open(f'../../data/processed/{type}/_scaler/scaler_{type}_v2.pkl', 'rb'))
104 | 
105 | input_data = np.squeeze(input_data)
106 | 
107 | # transform input data
108 | input_data = scaler.inverse_transform(input_data)
109 | 
110 | attention_vals_int = attention_vals
111 | 
112 | # take log of attention values
113 | # scaler = MinMaxScaler(feature_range = (0, 1))
114 | # attention_vals_scaled = scaler.fit_transform(attention_vals.reshape(-1,1)).reshape(-1)
115 | 
116 | # attention_vals_scaled = np.sqrt(attention_vals_scaled)
117 | 
118 | attention_vals_scaled = attention_vals
119 | 
120 | # get true values for reference
121 | y_true = predictions['y_true'][ex_idx][:,0]
122 | 
123 | # final params for df 
124 | final_params = {'group_index': group_index, 
125 | 				'variable_index': variable_index, 
126 | 				'group': group, 
127 | 				'variable': variable, 
128 | 				'value_scaled': attention_vals_scaled, 
129 | 				'value': attention_vals_int, 
130 | 				'input_time_ref': input_time_ref, 
131 | 				'input_time': input_time, 
132 | 				'input_values': input_data, 
133 | 				'output_time_ref': output_time_ref, 
134 | 				'output_time': output_time, 
135 | 				'prediction': current_prediction,
136 | 				'y_true': y_true }
137 | 
138 | # convert to pandas df
139 | df = pd.DataFrame(dict([(keys ,pd.Series(values, dtype = 'object')) for keys, values in final_params.items()])) # set all as objects to avoid warning on empty cells
140 | 
141 | # copy to clipboard
142 | df.to_clipboard()
143 | 
144 | # save data to file
145 | # df.to_csv(f'../../results/{type}/attention_plot_results_{type}.csv', index=False)
146 | 
147 | 
148 | 


--------------------------------------------------------------------------------
/scripts/postprocessing/results_summary.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import scipy
  4 | from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
  5 | from pickle import load
  6 | 
  7 | 
  8 | # declare model type
  9 | model_type = 'seq2seq+temporal' # - bilstm, seq2seq, seq2seq+temporal, seq2seq+temporal+spatial
 10 | 
 11 | # desired var to run analysis
 12 | forecast_var = 'price'
 13 | 
 14 | # load quantile prediction results
 15 | with open(f'../../results/{forecast_var}/{model_type}/forecasted_time_series_{forecast_var}_{model_type}.pkl', 'rb') as forecast_data:
 16 | 	results = load(forecast_data)
 17 | 
 18 | 
 19 | def mean_absolute_percentage_error(y_true, y_pred): 
 20 | 
 21 | 	y_true, y_pred = np.array(y_true), np.array(y_pred)
 22 | 	return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
 23 | 
 24 | def smape(y_true, y_pred):
 25 |     return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))
 26 | 
 27 | # function to evaluate general & quantile performance
 28 | def evaluate_predictions(predictions):
 29 | 	'''
 30 | 	Theory from Bazionis & Georgilakis (2021): https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&ved=2ahUKEwiUprb39qbyAhXNgVwKHWVsA50QFnoECAMQAQ&url=https%3A%2F%2Fwww.mdpi.com%2F2673-4826%2F2%2F1%2F2%2Fpdf&usg=AOvVaw1AWP-zHuNGrw8pgDfUS09e
 31 | 	func to caluclate probablistic forecast performance
 32 | 	Prediction Interval Coverage Probability (PICP)
 33 | 	Prediction Interval Nominla Coverage (PINC)
 34 | 	Average Coverage Error (ACE) [PICP - PINC]
 35 | 	'''
 36 | 	test_len = len(predictions['y_true'])
 37 | 
 38 | 	print(test_len)
 39 | 
 40 | 	y_true = predictions['y_true'].ravel()
 41 | 	lower_pred = predictions[list(predictions.keys())[0]].ravel()
 42 | 	upper_pred = predictions[list(predictions.keys())[-3]].ravel()
 43 | 	central_case = predictions['0.5'].ravel()
 44 | 
 45 | 	alpha = float(list(predictions.keys())[-3]) - float(list(predictions.keys())[0])
 46 | 
 47 | 	# picp_ind = np.sum((y_true > lower_pred) & (y_true <= upper_pred))
 48 | 
 49 | 	picp = ((np.sum((y_true >= lower_pred) & (y_true <= upper_pred))) / (test_len * 48) ) * 100
 50 | 
 51 | 	pinc = alpha * 100
 52 | 
 53 | 	ace = (picp - pinc) # closer to '0' higher the reliability
 54 | 
 55 | 	r = np.max(y_true) - np.min(y_true)
 56 | 
 57 | 	# PI normalised width
 58 | 	pinaw = (1 / (test_len * r)) * np.sum((upper_pred - lower_pred))
 59 | 
 60 | 	# PI normalised root-mean-sqaure width 
 61 | 	pinrw = (1/r) * np.sqrt( (1/test_len) * np.sum((upper_pred - lower_pred)**2))
 62 | 
 63 | 	# calculate MAE & RMSE
 64 | 	mae = mean_absolute_error(y_true, central_case)
 65 | 	mape = mean_absolute_percentage_error(y_true, central_case)
 66 | 	rmse = mean_squared_error(y_true, central_case, squared=False)
 67 | 
 68 | 	# calculate MAE & RMSE for persistence
 69 | 	persistence_prediction = predictions['y_true'][:-1].ravel()
 70 | 	persistence_true = predictions['y_true'][1:].ravel()
 71 | 
 72 | 	mae_base = mean_absolute_error(persistence_true, persistence_prediction)
 73 | 	mape_base = mean_absolute_percentage_error(persistence_true, persistence_prediction)
 74 | 	rmse_base = mean_squared_error(persistence_true, persistence_prediction, squared=False)
 75 | 
 76 | 	# create pandas df
 77 | 	metrics = pd.DataFrame({'PICP': picp, 'PINC': pinc, 'ACE': ace, 'PINAW': pinaw, 'PINRW': pinrw, 'MAE': mae, 'MAPE': mape, 'RMSE': rmse}, index={alpha})
 78 | 	metrics.index.name = 'Prediction_Interval'
 79 | 
 80 | 	# create pandas df for baseline 
 81 | 	metrics_base = pd.DataFrame({'MAE': mae_base, 'MAPE': mape_base, 'RMSE': rmse_base}, index={'basline_persistence'})
 82 | 
 83 | 	print(metrics.to_string())
 84 | 	print(metrics_base.to_string())
 85 | 
 86 | 	# save performance metrics
 87 | 	metrics.to_csv(f'../../results/{forecast_var}/{model_type}/preformance_summary_{forecast_var}_{model_type}.csv', index=False)
 88 | 
 89 | 	return metrics
 90 | 
 91 | 
 92 | # function to evaluate trends
 93 | def correlation_analysis(X, Y):
 94 | 
 95 | 	rs = np.empty((X.shape[0], 1))
 96 | 	#caclulate 'R^2' for each feature - average over all days
 97 | 	for l in range(X.shape[0]):
 98 | 		slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(X[l,:,0], Y[l,:,0])
 99 | 		rs[l, 0] =r_value**2
100 | 		
101 | 	print('mean' + '\n R**2: %s' %rs.mean())
102 | 	print('max' + '\n R**2: %s' %rs.max())
103 | 	print('min' + '\n R**2: %s' %rs.min())
104 | 
105 | 	#get best
106 | 	best_fit = np.argmax(rs, axis=0)
107 | 	worst_fit = np.argmin(rs, axis=0)
108 | 	print(best_fit)
109 | 	print(worst_fit)
110 | 
111 | 	return 
112 | 
113 | # call evaluate performance
114 | evaluate_predictions(results)
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/scripts/postprocessing/spatial_attention_plots.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import geopandas
 5 | import contextily as ctx
 6 | from pickle import load
 7 | from matplotlib.animation import FuncAnimation
 8 | 
 9 | 
10 | # plot spatial attention
11 | def plot_spatial_predictions(spatial_data, title, height_scale, width_scale, frame_num):
12 | 
13 | 	fig = plt.figure(figsize=[8,10])  # a new figure window
14 | 	ax_set = fig.add_subplot(1, 1, 1)
15 | 
16 | 	# create baseline map
17 | 	# spatial data on UK basemap
18 | 	df = pd.DataFrame({
19 | 		'LAT': [49.78, 61.03],
20 | 		'LON': [-11.95, 1.55],
21 | 	})
22 | 
23 | 	geo_df = geopandas.GeoDataFrame(df, crs = {'init': 'epsg:4326'}, 
24 | 			geometry=geopandas.points_from_xy(df.LON, df.LAT)).to_crs(epsg=3857)
25 | 
26 | 	ax = geo_df.plot(
27 | 		figsize= (8,10),
28 | 		alpha = 0,
29 | 		ax=ax_set,
30 | 	)
31 | 
32 | 	plt.title(title)
33 | 	ax.set_axis_off()
34 | 
35 | 	# add basemap
36 | 	url = 'http://tile.stamen.com/terrain/{z}/{x}/{y}.png'
37 | 	zoom = 10
38 | 	xmin, xmax, ymin, ymax = ax.axis()
39 | 	basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, source=url)
40 | 	ax.imshow(basemap, extent=extent, interpolation='gaussian')
41 | 	attn_over = np.resize(spatial_data[0], (height_scale, width_scale))
42 | 	
43 | 	gb_shape = geopandas.read_file("../../data/raw/_mapping/shapefiles/GBR_adm/GBR_adm0.shp").to_crs(epsg=3857)
44 | 	irl_shape = geopandas.read_file("../../data/raw/_mapping/shapefiles/IRL_adm/IRL_adm0.shp").to_crs(epsg=3857)
45 | 	gb_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
46 | 	irl_shape.boundary.plot(ax=ax, edgecolor="black", linewidth=0.5, alpha=0.4)
47 | 	overlay = ax.imshow(attn_over, cmap='viridis', alpha=0.5, extent=extent)
48 | 	# ax.axis((xmin, xmax, ymin, ymax))
49 | 	txt  = fig.text(.5, 0.09, '', ha='center')
50 | 
51 | 	
52 | 	def update(i):
53 | 		spatial_over = np.resize(spatial_data[i], (height_scale, width_scale))
54 | 		print(spatial_over.shape)
55 | 		# overlay = ax.imshow(spatial_over, cmap='viridis', alpha=0.5, extent=extent)
56 | 		overlay.set_data(spatial_over)
57 | 		txt.set_text(f"Timestep: {i}")
58 | 		# plt.cla()
59 | 
60 | 		return [overlay, txt]
61 | 
62 | 
63 | 	animation_ = FuncAnimation(fig, update, frames=frame_num, blit=False, repeat=False)
64 | 	# plt.show(block=True)	
65 | 	animation_.save(f'{title}_animation.gif', writer='imagemagick')
66 | 
67 | 
68 | 
69 | # define model type to plot
70 | model_type = 'solar'
71 | 
72 | idx = 0
73 | 
74 | # load spatial attention data
75 | # save results - forecasted spatial attention matrix
76 | with open(f'../../results/{model_type}/seq2seq+temporal+spatial/spatial_attention_data_{model_type}.pkl', 'rb') as spatial_file:
77 | 	spatial_data = load(spatial_file)
78 | 
79 | 
80 | # grab relevant example
81 | spatial_data = spatial_data['0.5'][idx,:,:]
82 | 
83 | spatial_data = np.transpose(spatial_data)
84 | 
85 | print(spatial_data.shape)
86 | print(spatial_data[30, :])
87 | 
88 | # exit()
89 | 
90 | 
91 | # call plot function
92 | plot_spatial_predictions(spatial_data=spatial_data, title='Solar Spatial Attention', height_scale=16, width_scale=20, frame_num=48)


--------------------------------------------------------------------------------
/scripts/preprocessing/ERA5_downloader.py:
--------------------------------------------------------------------------------
  1 | import cdsapi
  2 | import os
  3 | 
  4 | 
  5 | os.chdir("PATH TO ERA5 DOWNLOADER FILE")
  6 | print(os.getcwd())
  7 | 
  8 | 
  9 | c = cdsapi.Client()
 10 | 
 11 | years = ['2020', '2021']
 12 | variables = ['surface_net_solar_radiation']
 13 | 
 14 | for l, var in enumerate(variables):
 15 |     for i, year in enumerate(years):
 16 | 
 17 |         print(f'year:{year}, var:{var}')
 18 | 
 19 |         if year == '2021':    
 20 |         	c.retrieve(
 21 |             	f'reanalysis-era5-single-levels',
 22 |             	{
 23 |                 	'product_type': 'reanalysis',
 24 |                 	'format': 'netcdf',
 25 |                 	'variable': [
 26 |                     	var,
 27 |                 	],
 28 |                     'year': [
 29 |                         year,
 30 |                     ],
 31 |                 	'month': [
 32 |                     	'01', '02', '03',
 33 |                     	'04', '05', '06',
 34 |                     	# '07', '08', '09',
 35 |                     	# '10', '11', '12',
 36 |                 	],
 37 |                 	'day': [
 38 |                     	'01', '02', '03',
 39 |                     	'04', '05', '06',
 40 |                     	'07', '08', '09',
 41 |                     	'10', '11', '12',
 42 |                     	'13', '14', '15',
 43 |                     	'16', '17', '18',
 44 |                     	'19', '20', '21',
 45 |                     	'22', '23', '24',
 46 |                     	'25', '26', '27',
 47 |                     	'28', '29', '30',
 48 |                     	'31',
 49 |                 	],
 50 |                 	'time': [
 51 |                     	'00:00', '01:00', '02:00',
 52 |                     	'03:00', '04:00', '05:00',
 53 |                     	'06:00', '07:00', '08:00',
 54 |                     	'09:00', '10:00', '11:00',
 55 |                     	'12:00', '13:00', '14:00',
 56 |                     	'15:00', '16:00', '17:00',
 57 |                     	'18:00', '19:00', '20:00',
 58 |                     	'21:00', '22:00', '23:00',
 59 |                 	],
 60 |                 	'area': [
 61 |                     	61.19, -11.95, 49.78,
 62 |                     	1.76,
 63 |                 	],
 64 |             	},
 65 |             	str(var) + '_' + str(year) + '.nc')
 66 | 
 67 | 
 68 |         else:
 69 |             c.retrieve(
 70 |                 f'reanalysis-era5-single-levels',
 71 |                 {
 72 |                     'product_type': 'reanalysis',
 73 |                     'format': 'netcdf',
 74 |                     'variable': [
 75 |                         var,
 76 |                     ],
 77 |                     'year': [
 78 |                         year,
 79 |                     ],
 80 |                     'month': [
 81 |                         '01', '02', '03',
 82 |                         '04', '05', '06',
 83 |                         '07', '08', '09',
 84 |                         '10', '11', '12',
 85 |                     ],
 86 |                     'day': [
 87 |                         '01', '02', '03',
 88 |                         '04', '05', '06',
 89 |                         '07', '08', '09',
 90 |                         '10', '11', '12',
 91 |                         '13', '14', '15',
 92 |                         '16', '17', '18',
 93 |                         '19', '20', '21',
 94 |                         '22', '23', '24',
 95 |                         '25', '26', '27',
 96 |                         '28', '29', '30',
 97 |                         '31',
 98 |                     ],
 99 |                     'time': [
100 |                         '00:00', '01:00', '02:00',
101 |                         '03:00', '04:00', '05:00',
102 |                         '06:00', '07:00', '08:00',
103 |                         '09:00', '10:00', '11:00',
104 |                         '12:00', '13:00', '14:00',
105 |                         '15:00', '16:00', '17:00',
106 |                         '18:00', '19:00', '20:00',
107 |                         '21:00', '22:00', '23:00',
108 |                     ],
109 |                     'area': [
110 |                         61.19, -11.95, 49.78,
111 |                         1.76,
112 |                     ],
113 |                 },
114 |                 str(var) + '_' + str(year) + '.nc')
115 | 
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/__pycache__/preprocessing_funcs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/scripts/preprocessing/__pycache__/preprocessing_funcs.cpython-38.pyc


--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_demand.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import sys
 4 | import os
 5 | from pickle import dump, load
 6 | import h5py
 7 | 
 8 | from preprocessing_funcs import demand_data_processing
 9 | 
10 | np.set_printoptions(threshold=sys.maxsize)
11 | 
12 | ###########################################_____LOAD & PRE-PROCESS DATA_____###########################################
13 | 
14 | #cache current working directory of main script
15 | workingDir = os.getcwd()
16 | 
17 | # paths to nc files for x_value features:
18 | filepaths = {
19 | 	 'temperature': '../../data/raw/temperature',
20 | }
21 | 
22 | #load labels (solar generation per HH)
23 | demandGenLabels = pd.read_csv('../../data/raw/demand_labels/HH_demand_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
24 | 
25 | # call main pre-processing function - sequence windowing no longer utilised
26 | dataset, time_refs = demand_data_processing(filepaths = filepaths, labels = demandGenLabels, workingDir = workingDir)
27 | 
28 | # print data summaries
29 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
30 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
31 | 
32 | # #save time timeseries (inputs & outputs) for reference
33 | print('saving data...')
34 | with open("../../data/processed/demand/time_refs_demand_v4.pkl", "wb") as times:
35 | 	dump(time_refs, times)
36 | 
37 | # save training set as dictionary (h5py dump)
38 | f = h5py.File('../../data/processed/demand/dataset_demand_v4.hdf5', 'w')
39 | 
40 | for group_name in dataset:
41 | 	group = f.create_group(group_name)
42 | 	for dset_name in dataset[group_name]:
43 | 		dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
44 | f.close()
45 | 
46 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_price.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from sklearn.preprocessing import MinMaxScaler, StandardScaler
  4 | from pickle import dump
  5 | import h5py
  6 | 
  7 | from workalendar.europe import UnitedKingdom
  8 | cal = UnitedKingdom()
  9 | 
 10 | # load input data
 11 | windGen_data = pd.read_csv('../../data/raw/wind_labels/HH_windGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
 12 | solarGen_data = pd.read_csv('../../data/raw/solar_labels/HH_PVGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
 13 | demand_data = pd.read_csv('../../data/raw/demand_labels/HH_demand_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
 14 | 
 15 | # load labels
 16 | price_data = pd.read_csv('../../data/raw/price_labels/N2EX_UK_DA_Auction_Hourly_Prices_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
 17 | 
 18 | # interpolate hourly prices into HH resolution
 19 | price_data = price_data.reindex(pd.date_range(start=price_data.index.min(), end=price_data.index.max() + pd.Timedelta(minutes=30), freq='30T'))  
 20 | price_data = price_data.interpolate()
 21 | 
 22 | # combine vars into feature array
 23 | arrays = [windGen_data.values, solarGen_data.values, demand_data.values]
 24 | 
 25 | feature_array = []
 26 | 
 27 | # normalise feature array
 28 | for i, array in enumerate(arrays):
 29 | 	scaler = StandardScaler()
 30 | 	feature_array.append(scaler.fit_transform(array))
 31 | 
 32 | # normalise labels
 33 | # scaler = MinMaxScaler() #normalise data
 34 | scaler = StandardScaler()
 35 | price_data = scaler.fit_transform(price_data.values)
 36 | 
 37 | # save price data scaler
 38 | dump(scaler, open('../../data/processed/price/_scaler/scaler_price_v2.pkl', 'wb'))
 39 | 
 40 | # stack features
 41 | feature_array = np.concatenate(feature_array, axis=-1)
 42 | 
 43 | # mask data (eliminate nans)
 44 | wind_mask = windGen_data.iloc[:,-1].isna().groupby(windGen_data.index.normalize()).transform('any')
 45 | solar_mask = solarGen_data.iloc[:,-1].isna().groupby(solarGen_data.index.normalize()).transform('any')
 46 | demand_mask = demand_data.iloc[:,-1].isna().groupby(demand_data.index.normalize()).transform('any')
 47 | price_mask = demand_data.iloc[:,-1].isna().groupby(demand_data.index.normalize()).transform('any')
 48 | 
 49 | # eliminate all missing values with common mask
 50 | mask_all = wind_mask | solar_mask | demand_mask | price_mask
 51 | 
 52 | # apply mask, removing days with more than one nan value
 53 | feature_array = feature_array[~mask_all]
 54 | 
 55 | price_data = price_data[~mask_all]
 56 | 
 57 | # combine price data to other features for complete feature array
 58 | feature_array = [feature_array, price_data]
 59 | feature_array = np.concatenate(feature_array, axis=-1)
 60 | 
 61 | # time refs
 62 | time_refs = windGen_data.index
 63 | time_refs = time_refs[~mask_all]
 64 | 
 65 | # time data engineering 
 66 | df_times_outputs = pd.DataFrame()
 67 | df_times_outputs['date'] = time_refs.date
 68 | df_times_outputs['hour'] = time_refs.hour
 69 | df_times_outputs['month'] = time_refs.month - 1
 70 | df_times_outputs['year'] = time_refs.year
 71 | df_times_outputs['day_of_week'] = time_refs.dayofweek
 72 | df_times_outputs['day_of_year'] = time_refs.dayofyear - 1
 73 | df_times_outputs['weekend'] = df_times_outputs['day_of_week'].apply(lambda x: 1 if x>=5 else 0)
 74 | 
 75 | # account for bank / public holidays
 76 | start_date = time_refs.min()
 77 | end_date = time_refs.max()
 78 | start_year = df_times_outputs['year'].min()
 79 | end_year = df_times_outputs['year'].max()
 80 | 
 81 | holidays = set(holiday[0] 
 82 | 	for year in range(start_year, end_year + 1) 
 83 | 	for holiday in cal.holidays(year)
 84 | 	if start_date <=  holiday[0] <= end_date)
 85 | 
 86 | df_times_outputs['holiday'] = df_times_outputs['date'].isin(holidays).astype(int)
 87 | 
 88 | #process output times for half hours
 89 | for idx, row in df_times_outputs.iterrows():
 90 | 	if idx % 2 != 0:
 91 | 		df_times_outputs.iloc[idx, 1] = df_times_outputs.iloc[idx, 1] + 0.5
 92 | 
 93 | # create sin / cos of output hour
 94 | times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
 95 | times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
 96 | 
 97 | # create sin / cos of output month
 98 | times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
 99 | times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
100 | 
101 | # create sin / cos of output year
102 | times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
103 | 
104 | # create sin / cos of output day of week
105 | times_out_DoW_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
106 | times_out_DoW_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
107 | 
108 | # create sin / cos of output day of year
109 | times_out_DoY_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)
110 | times_out_DoY_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)		
111 | 
112 | weekends = np.expand_dims(df_times_outputs['weekend'].values, axis =-1)
113 | holidays = np.expand_dims(df_times_outputs['holiday'].values, axis =-1)
114 | 
115 | time_features = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_DoW_sin, times_out_DoW_cos,
116 | 								 times_out_DoY_sin, times_out_DoY_cos, times_out_year, weekends, holidays), axis=-1)
117 | 
118 | # combine demand / solar / wind with time features
119 | # combined_data = np.concatenate([feature_array, output_times], axis=-1)
120 | 
121 | test_split_seq = 8544 # larger test test to compensate adverse demand from COVID
122 | 
123 | # split data into train and test sets
124 | dataset = {
125 | 	'train_set' : {
126 | 		'X1_train': feature_array[:-test_split_seq],
127 | 		'X2_train': time_features[:-test_split_seq],
128 | 		'X3_train': time_features[:-test_split_seq],
129 | 		'y_train': price_data[:-test_split_seq] 
130 | 		},
131 | 	'test_set' : {
132 | 		'X1_test': feature_array[-test_split_seq:],
133 | 		'X2_test': time_features[-test_split_seq:],
134 | 		'X3_test': time_features[-test_split_seq:],
135 | 		'y_test': price_data[-test_split_seq:] 
136 | 		}
137 | 	}
138 | 
139 | time_refs = {
140 | 	'input_times_train': time_refs[:-test_split_seq],
141 | 	'input_times_test': time_refs[-test_split_seq:], 
142 | 	'output_times_train': time_refs[:-test_split_seq],
143 | 	'output_times_test': time_refs[-test_split_seq:]
144 | }
145 | 
146 | # print data for info
147 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
148 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
149 | 
150 | # save dataset
151 | with open("../../data/processed/price/time_refs_price_v2.pkl", "wb") as times:
152 | 	dump(time_refs, times)
153 | 
154 | # save training set as dictionary (h5py dump)
155 | f = h5py.File('../../data/processed/price/dataset_price_v2.hdf5', 'w')
156 | 
157 | for group_name in dataset:
158 | 	group = f.create_group(group_name)
159 | 	for dset_name in dataset[group_name]:
160 | 		dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
161 | f.close()
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_solar.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import sys
 4 | import os
 5 | from pickle import dump, load
 6 | import h5py
 7 | 
 8 | from preprocessing_funcs import solar_data_processing
 9 | 
10 | np.set_printoptions(threshold=sys.maxsize)
11 | 
12 | ###########################################_____LOAD & PRE-PROCESS DATA_____###########################################
13 | 
14 | #cache current working directory of main script
15 | workingDir = os.getcwd()
16 | 
17 | 
18 | # paths to nc files for x_value features:
19 | filepaths = {
20 | 	 'solarRad': '../../data/raw/net_solar_radiation',
21 | 	 'lowcloudcover': '../../data/raw/low_cloud_Cover',
22 | 	 'temperature': '../../data/raw/temperature'
23 | }
24 | 
25 | # load labels (solar generation per HH)
26 | solarGenLabels = pd.read_csv('../../data/raw/solar_labels/HH_PVGen_v2.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
27 | 
28 | # call main pre-processing function - sequence windowing no longer utilised
29 | dataset, time_refs = solar_data_processing(filepaths = filepaths, labels = solarGenLabels, input_seq_size = 336, output_seq_size = 48, workingDir = workingDir)
30 | 
31 | # print data summaries
32 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
33 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
34 | 
35 | # save time timeseries (inputs & outputs) for reference
36 | print('saving data...')
37 | with open("../../data/processed/solar/time_refs_solar_min_v4.pkl", "wb") as times:
38 | 	dump(time_refs, times)
39 | 
40 | # save training set as dictionary (h5py dump)
41 | f = h5py.File('../../data/processed/solar/dataset_solar_min_v4.hdf5', 'w')
42 | 
43 | for group_name in dataset:
44 | 	group = f.create_group(group_name)
45 | 	for dset_name in dataset[group_name]:
46 | 		dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
47 | f.close()
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/data_preprocessing_wind.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import sys
 4 | import os
 5 | from pickle import dump, load
 6 | import h5py
 7 | 
 8 | from preprocessing_funcs import wind_data_processing
 9 | 
10 | 
11 | np.set_printoptions(threshold=sys.maxsize)
12 | 
13 | ###########################################_____LOAD & PRE-PROCESS DATA_____###########################################
14 | 
15 | #cache current working directory of main script
16 | workingDir = os.getcwd()
17 | 
18 | # paths to nc files for x_value features:
19 | filepaths = {
20 | 	 'u_wind_component_10': '../../data/raw/10m_u_component_of_wind',
21 | 	 'v_wind_component_10': '../../data/raw/10m_v_component_of_wind',
22 |  	 'u_wind_component_100': '../../data/raw/100m_u_component_of_wind',
23 | 	 'v_wind_component_100': '../../data/raw/100m_v_component_of_wind',
24 | 	 'instantaneous_10m_wind_gust': '../../data/raw/instantaneous_10m_wind_gust',
25 | 	 'surface_pressure': '../../data/raw/surface_pressure',
26 | 	 'temperature': '../../data/raw/temperature'
27 | }
28 | 
29 | #load labels (wind generation per HH)
30 | windGenLabels = pd.read_csv('../../data/raw/wind_labels/HH_windGen_v4.csv', parse_dates=True, index_col=0, header=0, dayfirst=True)
31 | 
32 | # call main pre-processing function - sequence windowing no longer utilised
33 | dataset, time_refs = wind_data_processing(filepaths = filepaths, labels = windGenLabels, input_seq_size = 336, output_seq_size = 48, workingDir = workingDir)
34 | 
35 | # print data summaries
36 | print(*[f'{key}: {dataset["train_set"][key].shape}' for key in dataset['train_set'].keys()], sep='\n')
37 | print(*[f'{key}: {dataset["test_set"][key].shape}' for key in dataset['test_set'].keys()], sep='\n')
38 | 
39 | # #save time timeseries references (inputs & outputs) for reference
40 | print('saving data...')
41 | with open("../../data/processed/wind/time_refs_wind_v4.pkl", "wb") as times:
42 | 	dump(time_refs, times)
43 | 
44 | # save training set as dictionary (h5py dump)
45 | f = h5py.File('../../data/processed/wind/dataset_wind_v4.hdf5', 'w')
46 | 
47 | for group_name in dataset:
48 | 	group = f.create_group(group_name)
49 | 	for dset_name in dataset[group_name]:
50 | 		dset = group.create_dataset(dset_name, data = dataset[group_name][dset_name])
51 | f.close()
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/preprocessing_funcs.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import netCDF4
  4 | from netCDF4 import Dataset
  5 | import os 
  6 | import glob
  7 | import sys
  8 | from datetime import datetime
  9 | from sklearn.preprocessing import MinMaxScaler, StandardScaler
 10 | import scipy
 11 | import matplotlib.pyplot as plt
 12 | from pickle import dump, load
 13 | import time
 14 | import h5py
 15 | 
 16 | import h5py
 17 | from workalendar.europe import UnitedKingdom
 18 | 
 19 | # define calender reference (allows for easy identification of holidays)
 20 | cal = UnitedKingdom()
 21 | 
 22 | # function to extract data from yearly .nc files passing directory
 23 | def ncExtract(directory, current_wrk_dir): # will append files if multiple present
 24 | 
 25 | 	#intialising parameters
 26 | 	os.chdir(directory)
 27 | 	files = []
 28 | 	readVariables = {}
 29 | 	consistentVars = ['longitude', 'latitude', 'time']
 30 | 
 31 | 	#read files in directory
 32 | 	for file in glob.glob("*.nc"):
 33 | 		files.append(file)
 34 | 		files.sort()
 35 | 	
 36 | 	for i, file in enumerate(files):
 37 | 		print(file)
 38 | 		#read nc file using netCDF4
 39 | 		ncfile = Dataset(file) 
 40 | 		varaibles = list(ncfile.variables.keys())
 41 | 		#find unique vars 
 42 | 		uniqueVars = list(set(varaibles) - set(consistentVars))
 43 | 
 44 | 		#iteriate and concat each unique variable
 45 | 		for variable in uniqueVars:
 46 | 
 47 | 			if i == 0:
 48 | 				readVariables['data'] = np.empty([0,ncfile.variables['latitude'].shape[0],
 49 | 					ncfile.variables['longitude'].shape[0]])
 50 | 
 51 | 			readVar = ncfile.variables[variable][:]
 52 | 
 53 | 			readVariables['data'] = np.concatenate([readVariables['data'],readVar])
 54 | 
 55 | 		#read & collect time
 56 | 		if i == 0:
 57 | 			readVariables['time'] = np.empty([0])
 58 | 		
 59 | 		timeVar = ncfile.variables['time']
 60 | 		datesVar = netCDF4.num2date(timeVar[:], timeVar.units, timeVar.calendar)
 61 | 		readVariables['time'] = np.concatenate([readVariables['time'],datesVar])
 62 | 
 63 | 	#read lat and long
 64 | 	readVariables['latitude'] = ncfile.variables['latitude'][:]
 65 | 	readVariables['longitude'] = ncfile.variables['longitude'][:]
 66 | 
 67 | 	#close ncfile file
 68 | 	Dataset.close(ncfile)
 69 | 
 70 | 	#change directory back
 71 | 	os.chdir(current_wrk_dir)
 72 | 
 73 | 	#define name of extracted data
 74 | 	fileNameLoc = directory.rfind('/') + 1
 75 | 	fileName = str(directory[fileNameLoc:])
 76 | 
 77 | 	return readVariables
 78 | 
 79 | 
 80 | 
 81 | # helper function to filter irregular values out
 82 | def lv_filter(data):
 83 | 	#define +ve and -ve thresholds
 84 | 	filter_thres_pos = np.mean(np.mean(data)) * (10**(-10))
 85 | 	filter_thres_neg = filter_thres_pos * (-1)
 86 | 
 87 | 	#filter data relevant to thresholds
 88 | 	data[(filter_thres_neg <= data) & (data <= filter_thres_pos)] = 0
 89 | 
 90 | 	return data
 91 | 
 92 | 
 93 | # helper function to convert 24hr input to 48hrs
 94 | def interpolate_4d(array):
 95 | 	interp_array = np.empty((array.shape[0]*2 , array.shape[1], array.shape[2], array.shape[3]))
 96 | 	for ivar in range(array.shape[-1]):
 97 | 		for interp_idx in range(interp_array.shape[0]):
 98 | 			if (interp_idx % 2 == 0) or (int(np.ceil(interp_idx/2)) == array.shape[0]): 
 99 | 				interp_array[interp_idx, :, :, ivar] = array[int(np.floor(interp_idx/2)), :, :, ivar]
100 | 			else:
101 | 				interp_array[interp_idx, :, :, ivar] = (array[int(np.floor(interp_idx/2)), :, :, ivar] + array[int(np.ceil(interp_idx/2)), :, :, ivar]) / 2
102 | 
103 | 	return interp_array
104 | 
105 | 
106 | # helper function to interpolate time array
107 | def interpolate_time(time_array):
108 | 	interp_time = np.linspace(time_array[0], time_array[-1], len(time_array)*2)
109 | 
110 | 	return interp_time
111 | 
112 | 
113 | # helper function to check for missing nans - if so delete day
114 | def remove_nan_days(x_in, y_out): # assume both are
115 | 	# check for missing vals in outputs
116 | 	idx = 0
117 | 	for i in range(len(y_out)):
118 | 		if y_out[idx].isnull().values.any() or x_in[idx].isnull().values.any():
119 | 			del x_in[idx]
120 | 			del y_out[idx]
121 | 			idx -= 1
122 | 		idx += 1 
123 | 
124 | 	return x_in, y_out
125 | 
126 | # function to window time series data relative to specified input and output sequence lengths
127 | # NO LONGER USED #
128 | def format_data_into_timesteps(X1, X2, X3, Y, input_seq_size, output_seq_size, input_times_reference, output_times_reference):
129 | 	print('formating data into timesteps & interpolating input data')
130 | 
131 | 	#number of timesteps to be included in each sequence
132 | 	seqX1, seqX2, seqX3, seqY_in, seqY, in_times, out_times = [], [], [], [], [], [], []
133 | 	input_start, input_end = 0, 0
134 | 	output_start = input_seq_size + output_seq_size 
135 | 
136 | 	while (output_start + output_seq_size) < len(X1):
137 | 
138 | 		x1 = np.empty((input_seq_size , X1.shape[1], X1.shape[2], X1.shape[3]))
139 | 		x2 = np.empty((input_seq_size , X2.shape[1]))
140 | 		x3 = np.empty((output_seq_size , X3.shape[1]))
141 | 		y_in = np.empty(((input_seq_size), 1))
142 | 		y = np.empty((output_seq_size, 1))
143 | 
144 | 		in_time = np.empty(((input_seq_size)), dtype = 'datetime64[ns]')
145 | 		out_time = np.empty(((output_seq_size)), dtype = 'datetime64[ns]')
146 | 
147 | 		#define sequences
148 | 		input_end = input_start + input_seq_size
149 | 		output_end = output_start + output_seq_size
150 | 
151 | 		#add condition to ommit any days with nan values
152 | 		if np.isnan(X1[input_start:input_end]).any() == True or np.isnan(X2[input_start:input_end]).any() == True or np.isnan(Y[input_start:input_end]).any() == True:
153 | 			input_start += input_seq_size 
154 | 			output_start += input_seq_size 
155 | 			continue
156 | 		elif np.isnan(X3[output_start:output_end]).any() == True or np.isnan(Y[output_start:output_end]).any() == True:
157 | 			input_start += output_seq_size 
158 | 			output_start += output_seq_size 
159 | 			continue
160 | 
161 | 		x1[:,:,:,:] = X1[input_start:input_end]
162 | 		seqX1.append(x1)
163 | 		x2[:,:] = X2[input_start:input_end]
164 | 		seqX2.append(x2)
165 | 		x3[:,:] = X3[output_start:output_end]
166 | 		seqX3.append(x3)
167 | 		y_in[:,:] = Y[input_start:input_end]
168 | 		# y_in[-48:,:] = 0 # elinimate metered output - only NWP available for prediction day
169 | 		seqY_in.append(y_in)
170 | 		y[:] = Y[output_start:output_end]
171 | 		seqY.append(y)
172 | 
173 | 		in_time[:] = np.squeeze(input_times_reference[input_start:input_end])
174 | 		in_times.append(in_time)
175 | 		out_time[:] = np.squeeze(output_times_reference[output_start:output_end])
176 | 		out_times.append(out_time)
177 | 		
178 | 		input_start += 1  # divide by 2 to compensate for 24hr period (edited)
179 | 		output_start += 1
180 | 
181 | 	print('converting to float32 numpy arrays')
182 | 	seqX1 = np.array(seqX1, dtype=np.float32)
183 | 	seqX2 = np.array(seqX2, dtype=np.float32)
184 | 	seqX3 = np.array(seqX3, dtype=np.float32)
185 | 	seqY_in = np.array(seqY_in, dtype=np.float32)
186 | 	seqY = np.array(seqY, dtype=np.float32)
187 | 
188 | 
189 | 	# stack 'Y_inputs' onto the spatial array
190 | 	print('combining feature array with lagged outputs')
191 | 	broadcaster = np.ones((seqX1.shape[0], seqX1.shape[1], seqX1.shape[2], seqX1.shape[3],  1), dtype=np.float32)
192 | 	broadcaster = broadcaster * np.expand_dims(np.expand_dims(seqY_in, axis =2), axis=2)
193 | 	seqX1 = np.concatenate((broadcaster, seqX1), axis = -1)
194 | 
195 | 	#split data for train and test sets
196 | 	test_set_percentage = 0.1
197 | 	test_split = int(len(seqX1) * (1 - test_set_percentage))
198 | 
199 | 
200 | 	dataset = {
201 | 		'train_set' : {
202 | 			'X1_train': seqX1[:test_split],
203 | 			'X2_train': seqX2[:test_split], # input time features
204 | 			'X3_train': seqX3[:test_split], # output time features
205 | 			'y_train': seqY[:test_split] 
206 | 			},
207 | 		'test_set' : {
208 | 			'X1_test': seqX1[test_split:],
209 | 			'X2_test': seqX2[test_split:], 
210 | 			'X3_test': seqX3[test_split:],
211 | 			'y_test': seqY[test_split:] 
212 | 			}
213 | 	}
214 | 
215 | 	#create dictionary for time references
216 | 	time_refs = {
217 | 		'input_times_train': in_times[:test_split],
218 | 		'input_times_test': in_times[test_split:], 
219 | 		'output_times_train': out_times[:test_split],
220 | 		'output_times_test': out_times[test_split:]
221 | 	}
222 | 
223 | 	return dataset, time_refs
224 | 	# train_set, test_set, time_refs
225 | 
226 | 
227 | ###### WIND ##############################################################################################################################################
228 | 
229 | # main function for preprocessing of data - wind specific updates applied
230 | def wind_data_processing(filepaths, labels, input_seq_size, output_seq_size, workingDir):
231 | 
232 | 	#get dictionary keys
233 | 	keys = list(filepaths.keys())
234 | 
235 | 	#dictionaries for extracted vars
236 | 	vars_extract = {}
237 | 	vars_extract_filtered = {}
238 | 	vars_extract_filtered_masked = {}
239 | 	vars_extract_filtered_masked_norm = {}
240 | 
241 | 	#define daylight hours mask - relative to total solar radiation 
242 | 	# solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation')
243 | 	# solar_rad_reference = lv_filter(solar_rad_reference['data'])
244 | 	# daylight_hr_mask = solar_rad_reference > 0
245 | 
246 | 	#cache matrix dimensions
247 | 	# dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]]
248 | 
249 | 	#loop to extract data features
250 | 	for i, key in enumerate(filepaths):
251 | 		vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files
252 | 
253 | 		#break in 1-iteration to get time features & cache dimensions
254 | 		if i == 0:
255 | 			times_in = vars_extract[str(key)]['time'] 
256 | 			dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]]
257 | 
258 | 		vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data 
259 | 		# vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data 
260 | 		# scaler = MinMaxScaler() #normalise data
261 | 		# vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2])
262 | 
263 | 	# convert u and v components to wind speed and direction
264 | 	ws_10 = np.sqrt((vars_extract_filtered['u_wind_component_10']**2) + (vars_extract_filtered['v_wind_component_10']**2)) 
265 | 	ws_100 = np.sqrt((vars_extract_filtered['u_wind_component_100']**2) + (vars_extract_filtered['v_wind_component_100']**2)) 
266 | 
267 | 	wd_10 = np.mod(180+np.rad2deg(np.arctan2(vars_extract_filtered['u_wind_component_10'], vars_extract_filtered['v_wind_component_10'])), 360)
268 | 	wd_100 = np.mod(180+np.rad2deg(np.arctan2(vars_extract_filtered['u_wind_component_100'], vars_extract_filtered['v_wind_component_100'])), 360)
269 | 
270 | 	# convert ws and wd to float 32
271 | 	ws_10 = ws_10.astype('float32')
272 | 	wd_10 = wd_10.astype('float32')
273 | 	ws_100 = ws_100.astype('float32')
274 | 	wd_100 = wd_100.astype('float32')
275 | 
276 | 	# combine into an array
277 | 	feature_array = [ws_10, wd_10, ws_100, wd_100, vars_extract_filtered['temperature'], vars_extract_filtered['surface_pressure']]
278 | 
279 | 	#stack features into one matrix
280 | 	feature_array = np.stack(feature_array, axis = -1)
281 | 
282 | 	# interpolate feature array from 24hrs to 48hrs
283 | 	print('interpolating data...')
284 | 	feature_array = interpolate_4d(feature_array)
285 | 
286 | 	# remove nan values - by day
287 | 	outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any')
288 | 	# outputs_mask = labels['MW'].isna()
289 | 
290 | 	# apply mask, removing days with more than one nan value
291 | 	feature_array = feature_array[~outputs_mask]
292 | 	labels = labels[~outputs_mask]
293 | 
294 | 	dimensions = feature_array.shape
295 | 	feature_array_final = np.empty_like(feature_array)
296 | 
297 | 	# normalise features
298 | 	for i in range(feature_array.shape[-1]):
299 | 		# scaler = StandardScaler(with_mean=False) #normalise data
300 | 		scaler = MinMaxScaler()
301 | 		array = feature_array[:,:,:,i]
302 | 		feature_array_final[:,:,:,i:i+1] = scaler.fit_transform(array.reshape(array.shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2], 1)
303 | 
304 | 	#Do time feature engineering for input times
305 | 	times_in = pd.DataFrame({"datetime": times_in})
306 | 	times_in['datetime'] = times_in['datetime'].astype('str')
307 | 	times_in['datetime'] = pd.to_datetime(times_in['datetime'])
308 | 	times_in.set_index('datetime', inplace = True)
309 | 	in_times = times_in.index
310 | 
311 | 	# get hours and months from datetime
312 | 	hour_in = times_in.index.hour 
313 | 	hour_in = np.float32(hour_in)
314 | 
315 | 	# add HH to hours
316 | 	index = 0
317 | 	for idx, time in enumerate(hour_in):
318 | 		if time == 24:
319 | 			index += 1
320 | 		else:
321 | 			hour_in = np.insert(hour_in, index+1, time+0.5)
322 | 			index += 2
323 | 
324 | 	month_in = times_in.index.month - 1 
325 | 	year_in = times_in.index.year
326 | 
327 | 	# duplicate months to compensate for switch from 24hr to 48hr input data 
328 | 	index = 0
329 | 	for idx, month in enumerate(month_in):
330 | 		if idx % 24 == 0:
331 | 			index += 1
332 | 		else:
333 | 			month_in = np.insert(month_in, index+1, month)
334 | 			index += 2
335 | 
336 | 	# create one_hot encoding input times: hour and month 
337 | 	one_hot_months_in = pd.get_dummies(month_in, prefix='month_')
338 | 	one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_')
339 | 
340 | 	times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1)
341 | 	times_in = times_in_df.values
342 | 
343 | 	# create sin / cos of input times
344 | 	times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1)
345 | 	times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1)
346 | 
347 | 	times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1)
348 | 	times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1)
349 | 
350 | 	times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times))
351 | 
352 | 	#Process output times as secondary input for decoder 
353 | 	#cache output times
354 | 	label_times = labels.index
355 | 
356 | 	#declare 'output' time features
357 | 	df_times_outputs = pd.DataFrame()
358 | 	df_times_outputs['hour'] = labels.index.hour 
359 | 	df_times_outputs['month'] = labels.index.month - 1
360 | 	df_times_outputs['year'] = labels.index.year
361 | 
362 | 	#process output times for half hours
363 | 	for idx, row in df_times_outputs.iterrows():
364 | 		if idx % 2 != 0:
365 | 			df_times_outputs.iloc[idx, 0] = df_times_outputs.iloc[idx, 0] + 0.5
366 | 
367 | 	months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_')
368 | 	hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_')
369 | 
370 | 	times_out_df = pd.concat([hours_out, months_out], axis=1)
371 | 	times_out = times_out_df.values
372 | 
373 | 	# create sin / cos of input times
374 | 	times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
375 | 	times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
376 | 
377 | 	times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
378 | 	times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
379 | 
380 | 	times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
381 | 
382 | 	# print(times_out_hour_cos[:50])
383 | 	labels['MW'] = labels['MW'].astype('float32')
384 | 
385 | 	#normalise labels
386 | 	scaler = StandardScaler(with_mean=False)
387 | 	# scaler = MinMaxScaler()
388 | 	labels[['MW']] = scaler.fit_transform(labels[['MW']])
389 | 
390 | 	# save the scaler for inference
391 | 	dump(scaler, open('../../data/processed/wind/_scaler/scaler_wind_v3.pkl', 'wb'))
392 | 
393 | 	# make single array for 
394 | 	time_refs = [in_times, label_times]
395 | 
396 | 	# one-hot method 
397 | 	# input_times = times_in_df.values
398 | 	# output_times = times_out_df.values
399 | 
400 | 	# cyclic method
401 | 	output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_year), axis=-1)
402 | 
403 | 	labels = labels.values
404 | 
405 | 	# testing input 24hr and 48hr input data - convert to 48hrs for X2
406 | 	input_times = output_times
407 | 
408 | 	# add labels to inputs
409 | 	broadcaster = np.ones((feature_array_final.shape[0], feature_array_final.shape[1], feature_array_final.shape[2],  1), dtype=np.float32)
410 | 	broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2)
411 | 	feature_array_final = np.concatenate((broadcaster, feature_array_final), axis = -1)
412 | 
413 | 
414 | 	# decalre train test split
415 | 	test_split_seq = 8544 # use the last 100 days, around 10%
416 | 	
417 | 	# create dataset
418 | 	dataset = {
419 | 		'train_set' : {
420 | 			'X1_train': feature_array_final[:-test_split_seq],
421 | 			'X2_train': input_times[:-test_split_seq], # input time features
422 | 			'X3_train': output_times[:-test_split_seq], # output time features
423 | 			'y_train': labels[:-test_split_seq] 
424 | 			},
425 | 		'test_set' : {
426 | 			'X1_test': feature_array_final[-test_split_seq:],
427 | 			'X2_test': input_times[-test_split_seq:], 
428 | 			'X3_test': output_times[-test_split_seq:],
429 | 			'y_test': labels[-test_split_seq:] 
430 | 			}
431 | 		}
432 | 
433 | 	time_refs = {
434 | 		'input_times_train': in_times[:-test_split_seq],
435 | 		'input_times_test': in_times[-test_split_seq:], 
436 | 		'output_times_train': label_times[:-test_split_seq],
437 | 		'output_times_test': label_times[-test_split_seq:]
438 | 	}
439 | 
440 | 	return dataset, time_refs
441 | 
442 | 
443 | ###### SOLAR ##############################################################################################################################################
444 | 
445 | # function to process data in train and test sets
446 | def solar_data_processing(filepaths, labels, input_seq_size, output_seq_size, workingDir):
447 | 
448 | 	#get dictionary keys
449 | 	keys = list(filepaths.keys())
450 | 
451 | 	#dictionaries for extracted vars
452 | 	vars_extract = {}
453 | 	vars_extract_filtered = {}
454 | 	vars_extract_filtered_masked = {}
455 | 	vars_extract_filtered_masked_norm = {}
456 | 
457 | 	#define daylight hours mask - relative to total solar radiation 
458 | 	# solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation')
459 | 	# solar_rad_reference = lv_filter(solar_rad_reference['data'])
460 | 	# daylight_hr_mask = solar_rad_reference > 0
461 | 
462 | 	#cache matrix dimensions
463 | 	# dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]]
464 | 
465 | 	#loop to extract data features
466 | 	for i, key in enumerate(filepaths):
467 | 		vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files
468 | 
469 | 		#break in 1-iteration to get time features & cache dimensions
470 | 		if i == 0:
471 | 			times_in = vars_extract[str(key)]['time'] 
472 | 			dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]]
473 | 
474 | 		vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data 
475 | 		# vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data 
476 | 		# scaler = MinMaxScaler() #normalise data
477 | 		scaler = StandardScaler(with_mean=False)
478 | 		vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2])
479 | 
480 | 
481 | 	#stack features into one matrix
482 | 	feature_array = [vars_extract_filtered_masked_norm[str(i)] for i in vars_extract_filtered_masked_norm]
483 | 	feature_array = np.stack([x for x in vars_extract_filtered_masked_norm.values()], axis = -1)
484 | 
485 | 	# interpolate feature array from 24hrs to 48hrs
486 | 	feature_array = interpolate_4d(feature_array)
487 | 
488 | 	# remove nan values - by day
489 | 	outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any')
490 | 
491 | 
492 | 	# apply mask, removing days with more than one nan value
493 | 	feature_array = feature_array[~outputs_mask]
494 | 	labels = labels[~outputs_mask]
495 | 
496 | 	dimensions = feature_array.shape
497 | 
498 | 	#Do time feature engineering for input times
499 | 	times_in = pd.DataFrame({"datetime": times_in})
500 | 	times_in['datetime'] = times_in['datetime'].astype('str')
501 | 	times_in['datetime'] = pd.to_datetime(times_in['datetime'])
502 | 	times_in.set_index('datetime', inplace = True)
503 | 	in_times = times_in.index
504 | 
505 | 	# get hours and months from datetime
506 | 	hour_in = times_in.index.hour 
507 | 	hour_in = np.float32(hour_in)
508 | 
509 | 	# add HH to hours
510 | 	index = 0
511 | 	for idx, time in enumerate(hour_in):
512 | 		if time == 24:
513 | 			index += 1
514 | 		else:
515 | 			hour_in = np.insert(hour_in, index+1, time+0.5)
516 | 			index += 2
517 | 
518 | 	month_in = times_in.index.month - 1 
519 | 	year_in = times_in.index.year
520 | 
521 | 	# duplicate months to compensate for switch from 24hr to 48hr input data 
522 | 	index = 0
523 | 	for idx, month in enumerate(month_in):
524 | 		if idx % 24 == 0:
525 | 			index += 1
526 | 		else:
527 | 			month_in = np.insert(month_in, index+1, month)
528 | 			index += 2
529 | 
530 | 	# create one_hot encoding input times: hour and month 
531 | 	one_hot_months_in = pd.get_dummies(month_in, prefix='month_')
532 | 	one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_')
533 | 
534 | 	times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1)
535 | 	times_in = times_in_df.values
536 | 
537 | 	# create sin / cos of input times
538 | 	times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1)
539 | 	times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1)
540 | 
541 | 	times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1)
542 | 	times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1)
543 | 
544 | 	times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times))
545 | 
546 | 	#Process output times as secondary input for decoder 
547 | 	#cache output times
548 | 	label_times = labels.index
549 | 
550 | 	#declare 'output' time features
551 | 	df_times_outputs = pd.DataFrame()
552 | 	df_times_outputs['hour'] = labels.index.hour 
553 | 	df_times_outputs['month'] = labels.index.month - 1
554 | 	df_times_outputs['year'] = labels.index.year
555 | 
556 | 	#process output times for half hours
557 | 	for idx, row in df_times_outputs.iterrows():
558 | 		if idx % 2 != 0:
559 | 			df_times_outputs.iloc[idx, 0] = df_times_outputs.iloc[idx, 0] + 0.5
560 | 
561 | 	months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_')
562 | 	hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_')
563 | 
564 | 	times_out_df = pd.concat([hours_out, months_out], axis=1)
565 | 	times_out = times_out_df.values
566 | 
567 | 	# create sin / cos of input times
568 | 	times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
569 | 	times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
570 | 
571 | 	times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
572 | 	times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
573 | 
574 | 	times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
575 | 
576 | 	# normalise y labels
577 | 	scaler = StandardScaler(with_mean=False)
578 | 	# scaler = MinMaxScaler()
579 | 	labels[['MW']] = scaler.fit_transform(labels[['MW']])
580 | 
581 | 	# save the scaler for inference
582 | 	dump(scaler, open('../../data/processed/solar/_scaler/scaler_solar_v4.pkl', 'wb'))
583 | 
584 | 	in_times = label_times
585 | 	time_refs = [in_times, label_times]
586 | 
587 | 	# one-hot method 
588 | 	# input_times = times_in_df.values
589 | 	# output_times = times_out_df.values
590 | 
591 | 	# cyclic method
592 | 	# input_times = np.concatenate((times_in_hour_sin, times_in_hour_cos, times_in_month_sin, times_in_month_cos), axis=-1) swtich to output times for HH periods
593 | 	output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_year), axis=-1)
594 | 
595 | 	labels = labels.values
596 | 
597 | 	# add labels to inputs
598 | 	print('combining feature array with lagged outputs')
599 | 	broadcaster = np.ones((feature_array.shape[0], feature_array.shape[1], feature_array.shape[2],  1), dtype=np.float32)
600 | 	broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2)
601 | 	feature_array = np.concatenate((broadcaster, feature_array), axis = -1)
602 | 
603 | 	# testing input 24hr and 48hr input data - convert to 48hrs for X2
604 | 	input_times = output_times
605 | 
606 | 	test_split_seq = 8544 # use the last 100 days, around 10%
607 | 	
608 | 	# create dataset
609 | 	dataset = {
610 | 		'train_set' : {
611 | 			'X1_train': feature_array[:-test_split_seq],
612 | 			'X2_train': input_times[:-test_split_seq], # input time features
613 | 			'X3_train': output_times[:-test_split_seq], # output time features
614 | 			'y_train': labels[:-test_split_seq] 
615 | 			},
616 | 		'test_set' : {
617 | 			'X1_test': feature_array[-test_split_seq:],
618 | 			'X2_test': input_times[-test_split_seq:], 
619 | 			'X3_test': output_times[-test_split_seq:],
620 | 			'y_test': labels[-test_split_seq:] 
621 | 			}
622 | 		}
623 | 
624 | 	time_refs = {
625 | 		'input_times_train': in_times[:-test_split_seq],
626 | 		'input_times_test': in_times[-test_split_seq:], 
627 | 		'output_times_train': label_times[:-test_split_seq],
628 | 		'output_times_test': label_times[-test_split_seq:]
629 | 	}
630 | 
631 | 	return dataset, time_refs
632 | 	# return train_set, test_set, time_refs
633 | 
634 | ###### DEMAND ##############################################################################################################################################
635 | 
636 | #function to process data in train and test sets
637 | def demand_data_processing(filepaths, labels, workingDir):
638 | 
639 | 	#get dictionary keys
640 | 	keys = list(filepaths.keys())
641 | 
642 | 	#dictionaries for extracted vars
643 | 	vars_extract = {}
644 | 	vars_extract_filtered = {}
645 | 	vars_extract_filtered_masked = {}
646 | 	vars_extract_filtered_masked_norm = {}
647 | 
648 | 	#define daylight hours mask - relative to total solar radiation 
649 | 	# solar_rad_reference = ncExtract('./Data/solar/Raw_Data/Net_Solar_Radiation')
650 | 	# solar_rad_reference = lv_filter(solar_rad_reference['data'])
651 | 	# daylight_hr_mask = solar_rad_reference > 0
652 | 
653 | 	#cache matrix dimensions
654 | 	# dimensions = [solar_rad_reference.shape[0], solar_rad_reference.shape[1], solar_rad_reference.shape[2]]
655 | 
656 | 	#loop to extract data features
657 | 	for i, key in enumerate(filepaths):
658 | 		vars_extract[str(key)] = ncExtract(filepaths[key], workingDir) #extract files
659 | 
660 | 		#break in 1-iteration to get time features & cache dimensions
661 | 		if i == 0:
662 | 			times_in = vars_extract[str(key)]['time'] 
663 | 			dimensions = [vars_extract[str(key)]['data'].shape[0], vars_extract[str(key)]['data'].shape[1], vars_extract[str(key)]['data'].shape[2]]
664 | 
665 | 		vars_extract_filtered[str(key)] = lv_filter(vars_extract[str(key)]['data']) # filter data 
666 | 		# vars_extract_filtered[str(key)][~daylight_hr_mask] = 0 #mask data 
667 | 		# scaler = MinMaxScaler() #normalise data
668 | 		scaler = StandardScaler(with_mean=False)
669 | 		vars_extract_filtered_masked_norm[str(key)] = scaler.fit_transform(vars_extract_filtered[str(key)].reshape(vars_extract_filtered[str(key)].shape[0],-1)).reshape(dimensions[0], dimensions[1], dimensions[2])
670 | 
671 | 	#stack features into one matrix
672 | 	feature_array = [vars_extract_filtered_masked_norm[str(i)] for i in vars_extract_filtered_masked_norm]
673 | 	feature_array = np.stack(feature_array, axis = -1)
674 | 	# feature_array = np.concatenate((feature_array, input_timefeatures), axis = -1)
675 | 
676 | 	# interpolate feature array from 24hrs to 48hrs
677 | 	feature_array = interpolate_4d(feature_array)
678 | 
679 | 	# remove nan values
680 | 	outputs_mask = labels['MW'].isna().groupby(labels.index.normalize()).transform('any')
681 | 
682 | 	# apply mask, removing days with more than one nan value
683 | 	feature_array = feature_array[~outputs_mask]
684 | 	labels = labels[~outputs_mask]
685 | 
686 | 	# do time feature engineering for input times
687 | 	times_in = pd.DataFrame({"datetime": times_in})
688 | 	times_in['datetime'] = times_in['datetime'].astype('str')
689 | 	times_in['datetime'] = pd.to_datetime(times_in['datetime'])
690 | 	times_in.set_index('datetime', inplace = True)
691 | 	in_times = times_in.index
692 | 
693 | 	# get hours and months from datetime
694 | 	hour_in = times_in.index.hour 
695 | 	hour_in = np.float32(hour_in)
696 | 
697 | 	# add HH to hours
698 | 	index = 0
699 | 	for idx, time in enumerate(hour_in):
700 | 		if time == 24:
701 | 			index += 1
702 | 		else:
703 | 			hour_in = np.insert(hour_in, index+1, time+0.5)
704 | 			index += 2
705 | 
706 | 	month_in = times_in.index.month - 1 
707 | 	year_in = times_in.index.year
708 | 
709 | 	# duplicate months to compensate for switch from 24hr to 48hr input data 
710 | 	index = 0
711 | 	for idx, month in enumerate(month_in):
712 | 		if idx % 24 == 0:
713 | 			index += 1
714 | 		else:
715 | 			month_in = np.insert(month_in, index+1, month)
716 | 			index += 2
717 | 
718 | 	# create one_hot encoding input times: hour and month 
719 | 	one_hot_months_in = pd.get_dummies(month_in, prefix='month_')
720 | 	one_hot_hours_in = pd.get_dummies(hour_in, prefix='hour_')
721 | 
722 | 	times_in_df = pd.concat([one_hot_hours_in, one_hot_months_in], axis=1)
723 | 	times_in = times_in_df.values
724 | 
725 | 	# create sin / cos of input times
726 | 	times_in_hour_sin = np.expand_dims(np.sin(2*np.pi*hour_in/np.max(hour_in)), axis=-1)
727 | 	times_in_month_sin = np.expand_dims(np.sin(2*np.pi*month_in/np.max(month_in)), axis=-1)
728 | 
729 | 	times_in_hour_cos = np.expand_dims(np.cos(2*np.pi*hour_in/np.max(hour_in)),axis=-1)
730 | 	times_in_month_cos = np.expand_dims(np.cos(2*np.pi*month_in/np.max(month_in)), axis=-1)
731 | 
732 | 	times_in_year = (in_times - np.min(in_times)) / (np.max(in_times) - np.min(in_times))
733 | 
734 | 	#Process output times as secondary input for decoder 
735 | 	#cache output times
736 | 	label_times = labels.index
737 | 
738 | 	#declare 'output' time features
739 | 	df_times_outputs = pd.DataFrame()
740 | 	df_times_outputs['date'] = labels.index.date
741 | 	df_times_outputs['hour'] = labels.index.hour 
742 | 	df_times_outputs['month'] = labels.index.month - 1
743 | 	df_times_outputs['year'] = labels.index.year
744 | 	df_times_outputs['day_of_week'] = labels.index.dayofweek
745 | 	df_times_outputs['day_of_year'] = labels.index.dayofyear - 1
746 | 	df_times_outputs['weekend'] = df_times_outputs['day_of_week'].apply(lambda x: 1 if x>=5 else 0)
747 | 
748 | 
749 | 	# account for bank / public holidays
750 | 	start_date = labels.index.min()
751 | 	end_date = labels.index.max()
752 | 	start_year = df_times_outputs['year'].min()
753 | 	end_year = df_times_outputs['year'].max()
754 | 
755 | 	holidays = set(holiday[0] 
756 | 		for year in range(start_year, end_year + 1) 
757 | 		for holiday in cal.holidays(year)
758 | 		if start_date <=  holiday[0] <= end_date)
759 | 
760 | 	df_times_outputs['holiday'] = df_times_outputs['date'].isin(holidays).astype(int)
761 | 
762 | 	#process output times for half hours
763 | 	for idx, row in df_times_outputs.iterrows():
764 | 		if idx % 2 != 0:
765 | 			df_times_outputs.iloc[idx, 1] = df_times_outputs.iloc[idx, 1] + 0.5
766 | 
767 | 	months_out = pd.get_dummies(df_times_outputs['month'], prefix='month_')
768 | 	hours_out = pd.get_dummies(df_times_outputs['hour'], prefix='hour_')
769 | 
770 | 	times_out_df = pd.concat([hours_out, months_out], axis=1)
771 | 	times_out = times_out_df.values
772 | 
773 | 	# create sin / cos of output hour
774 | 	times_out_hour_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
775 | 	times_out_hour_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['hour']/np.max(df_times_outputs['hour'])), axis=-1)
776 | 
777 | 	# create sin / cos of output month
778 | 	times_out_month_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
779 | 	times_out_month_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['month']/np.max(df_times_outputs['month'])), axis=-1)
780 | 
781 | 	# create sin / cos of output year
782 | 	times_out_year = np.expand_dims((df_times_outputs['year'].values - np.min(df_times_outputs['year'])) / (np.max(df_times_outputs['year']) - np.min(df_times_outputs['year'])), axis=-1)
783 | 
784 | 	# create sin / cos of output day of week
785 | 	times_out_DoW_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
786 | 	times_out_DoW_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_week']/np.max(df_times_outputs['day_of_week'])), axis=-1)
787 | 
788 | 	# create sin / cos of output day of year
789 | 	times_out_DoY_sin = np.expand_dims(np.sin(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)
790 | 	times_out_DoY_cos = np.expand_dims(np.cos(2*np.pi*df_times_outputs['day_of_year']/np.max(df_times_outputs['day_of_year'])), axis=-1)		
791 | 
792 | 	#normalise labels
793 | 	scaler = StandardScaler(with_mean=False)
794 | 	labels[['MW']] = scaler.fit_transform(labels[['MW']])
795 | 
796 | 	# save the scaler for inference
797 | 	dump(scaler, open('../../data/processed/demand/_scaler/scaler_demand_v2.pkl', 'wb'))
798 | 
799 | 	time_refs = [in_times, label_times]
800 | 
801 | 	# one-hot method 
802 | 	# input_times = times_in_df.values
803 | 	# output_times = times_out_df.values
804 | 
805 | 	weekends = np.expand_dims(df_times_outputs['weekend'].values, axis =-1)
806 | 	holidays = np.expand_dims(df_times_outputs['holiday'].values, axis =-1)
807 | 
808 | 	# cyclic method
809 | 	# input_times = np.concatenate((times_in_hour_sin, times_in_hour_cos, times_in_month_sin, times_in_month_cos), axis=-1) swtich to output times for HH periods
810 | 	output_times = np.concatenate((times_out_hour_sin, times_out_hour_cos, times_out_month_sin, times_out_month_cos, times_out_DoW_sin, times_out_DoW_cos,
811 | 									 times_out_DoY_sin, times_out_DoY_cos, times_out_year, weekends, holidays), axis=-1)
812 | 
813 | 	labels = labels.values
814 | 
815 | 	# testing input 24hr and 48hr input data - convert to 48hrs for X2
816 | 	input_times = output_times
817 | 
818 | 	# add labels to inputs
819 | 	print('combining feature array with lagged outputs')
820 | 	broadcaster = np.ones((feature_array.shape[0], feature_array.shape[1], feature_array.shape[2],  1), dtype=np.float32)
821 | 	broadcaster = broadcaster * np.expand_dims(np.expand_dims(labels, axis =2), axis=2)
822 | 	feature_array = np.concatenate((broadcaster, feature_array), axis = -1)
823 | 
824 | 	#divide into timesteps & train and test sets
825 | 	# dataset, time_refs = format_data_into_timesteps(X1 = feature_array, X2 = input_times , X3 = output_times, Y = labels, input_seq_size = 240, output_seq_size = 48, input_times_reference = time_refs[1], output_times_reference = time_refs[1]) # converting from 24hr to 48hr inputs hence can use output time references
826 | 	# train_set, test_set, time_refs
827 | 
828 | 	# def to_float32(input_dict):
829 | 	# 	for idx, key in enumerate(input_dict.keys()):
830 | 	# 		input_dict[key] = input_dict[key].astype(np.float32)
831 | 	# 	return input_dict
832 | 
833 | 	# train_set = to_float32(train_set)
834 | 	# test_set = to_float32(test_set)	
835 | 
836 | 	test_split_seq = 8544 # use the last 100 days, around 10%
837 | 	
838 | 	# input_test_seq =  test_split_seq + (input_seq_size - 1)
839 | 	# output_test_seq = test_split_seq + (output_seq_size - 1)
840 | 
841 | 	# create dataset
842 | 	dataset = {
843 | 		'train_set' : {
844 | 			'X1_train': feature_array[:-test_split_seq],
845 | 			'X2_train': input_times[:-test_split_seq], # input time features
846 | 			'X3_train': output_times[:-test_split_seq], # output time features
847 | 			'y_train': labels[:-test_split_seq] 
848 | 			},
849 | 		'test_set' : {
850 | 			'X1_test': feature_array[-test_split_seq:],
851 | 			'X2_test': input_times[-test_split_seq:], 
852 | 			'X3_test': output_times[-test_split_seq:],
853 | 			'y_test': labels[-test_split_seq:] 
854 | 			}
855 | 		}
856 | 
857 | 	time_refs = {
858 | 		'input_times_train': label_times[:-test_split_seq],
859 | 		'input_times_test': label_times[-test_split_seq:], 
860 | 		'output_times_train': label_times[:-test_split_seq],
861 | 		'output_times_test': label_times[-test_split_seq:]
862 | 	}
863 | 
864 | 	# def to_float32(input_dict):
865 | 	# 	for idx, key in enumerate(input_dict.keys()):
866 | 	# 		input_dict[key] = input_dict[key].astype(np.float32)
867 | 	# 	return input_dict
868 | 
869 | 	# train_set = to_float32(train_set)
870 | 	# test_set = to_float32(test_set)	
871 | 
872 | 	return dataset, time_refs
873 | 	# return train_set, test_set, time_refs
874 | 
875 | 
876 | 


--------------------------------------------------------------------------------
/visualisations/cloud_cover_(input)_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/cloud_cover_(input)_animation.gif


--------------------------------------------------------------------------------
/visualisations/d3_quantile_plot_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_quantile_plot_examples.png


--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_demand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_demand.png


--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_price.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_price.png


--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_solar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_solar.png


--------------------------------------------------------------------------------
/visualisations/d3_temporal_attention_plot_wind.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/d3_temporal_attention_plot_wind.png


--------------------------------------------------------------------------------
/visualisations/memory_leak_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/memory_leak_test.png


--------------------------------------------------------------------------------
/visualisations/model_architecture_schematic_markup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/model_architecture_schematic_markup.png


--------------------------------------------------------------------------------
/visualisations/performance_breakdown_markup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/performance_breakdown_markup.png


--------------------------------------------------------------------------------
/visualisations/solar_spatial_attentions_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/solar_spatial_attentions_animation.gif


--------------------------------------------------------------------------------
/visualisations/tabular_performance_breakdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichardFindlay/day-ahead-probablistic-forecasting-with-quantile-regression/32b87582f94ffac2d97f263ae00a4523e880cb5e/visualisations/tabular_performance_breakdown.png


--------------------------------------------------------------------------------