├── data
    └── figures
    │   ├── comparison.png
    │   └── comparison_table.png
├── requirements.txt
├── src
    ├── plotting.py
    └── scores.py
├── .gitignore
└── README.md


/data/figures/comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giotto-ai/time_series_energy_demand/master/data/figures/comparison.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | giotto-time==0.2.0
2 | pandas==0.25.3
3 | plotly==4.1.0
4 | scikit-learn==0.22
5 | numpy==1.18.0
6 | jupyter
7 | 


--------------------------------------------------------------------------------
/data/figures/comparison_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giotto-ai/time_series_energy_demand/master/data/figures/comparison_table.png


--------------------------------------------------------------------------------
/src/plotting.py:
--------------------------------------------------------------------------------
 1 | import plotly.graph_objects as go
 2 | from plotly.subplots import make_subplots
 3 | 
 4 | 
 5 | def plot_time_series(df, y_columns, title='', x_axis_title='', y_axis_title = '', fontsize=None):
 6 |     """Plot different time series with the same y-axis.
 7 |     
 8 |     Parameters
 9 |     ----------
10 |     df : pandas DataFrame
11 |         The dataframe with the data to plot (incl. the x-values as an index)
12 |     y_columns : list
13 |         The columns of the dataframe to use. Maximal length = 2
14 |     title : str, optional, default: ''
15 |         Title to put above the plot
16 |     x_axis_title : str, optional, default: ''
17 |         Title of the x-axis
18 |     y_axis_titles : list, optional, default: ['', '']
19 |         List of y_axis_titles to use. Maximal length = 2
20 |     fontsize : int, optional, default: None
21 |         Fontsize to use
22 |     """
23 |     x = df.index
24 |     
25 |     fig = make_subplots(specs=[[{"secondary_y": False}]])
26 | 
27 |     for i in y_columns:
28 |         # Add traces
29 |         fig.add_trace(
30 |             go.Scatter(x=x, y=df[i].values, name=i),
31 |         )
32 | 
33 |     # Add figure title
34 |     fig.update_layout(
35 |         title={
36 |         'text':title,
37 |         'y':0.9,
38 |         'x':0.5,
39 |         }
40 |     )
41 | 
42 |     if fontsize != None:
43 |         fig.update_layout(
44 |             title={
45 |                 'text':title,
46 |                 'y':0.9,
47 |                 'x':0.5,
48 |             },
49 |             font=dict(
50 |                 size=fontsize)
51 |     )
52 | 
53 |     # Set x-axis title
54 |     fig.update_xaxes(title_text=x_axis_title)
55 | 
56 |     # Set y-axes titles
57 |     fig.update_yaxes(title_text=y_axis_title)
58 | 
59 |     return fig


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | 
  5 | # C extensions
  6 | *.so
  7 | 
  8 | # Distribution / packaging
  9 | .Python
 10 | env/
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | notebooks/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | 
 47 | # Translations
 48 | *.mo
 49 | *.pot
 50 | 
 51 | # Django stuff:
 52 | *.log
 53 | 
 54 | # Sphinx documentation
 55 | docs/_build/
 56 | doc/build/
 57 | doc/generated/
 58 | doc/reference/generated/
 59 | 
 60 | # PyBuilder
 61 | target/
 62 | 
 63 | # DotEnv configuration
 64 | .env
 65 | 
 66 | # Database
 67 | *.db
 68 | *.rdb
 69 | 
 70 | # Pycharm
 71 | .idea
 72 | 
 73 | # VS Code
 74 | .vscode/
 75 | 
 76 | # Spyder
 77 | .spyproject/
 78 | 
 79 | # Jupyter NB Checkpoints
 80 | .ipynb_checkpoints/
 81 | Untitled*
 82 | 
 83 | # exclude data from source control by default
 84 | /data/
 85 | 
 86 | # Mac OS-specific storage files
 87 | .DS_Store
 88 | 
 89 | # vim
 90 | *.swp
 91 | *.swo
 92 | 
 93 | # Mypy cache
 94 | .mypy_cache/
 95 | 
 96 | # ignore huge time_series_models
 97 | models/*.joblib
 98 | 
 99 | # Hypothesis
100 | .hypothesis/
101 | 
102 | # PyTest
103 | .pytest_cache/
104 | 
105 | # Excel temporary
106 | ~$*.xls*


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![logo](https://raw.githubusercontent.com/giotto-ai/giotto-tda/master/doc/images/tda_logo.svg)
 2 | 
 3 | # Energy Demand Prediction in Switzerland with giotto-time
 4 | 
 5 | ## What is it?
 6 | This repository contains the code for the [blog post](https://towardsdatascience.com/getting-started-with-giotto-time-d9b2088d60ca) 'Energy Demand Prediction in Switzerland with giotto-time' where we use the Python time series library [giotto-time](https://github.com/giotto-ai/giotto-time) to predict the mean daily energy demand (in megawatts) in Switzerland 21 days ahead using generalized autoregression models and linear regression with custom loss functions, both of which are provided by giotto-time. 
 7 | 
 8 | The 'energy_demand_time_series.ipynb' showcases the most important functionalities of giotto-time and how to use them to:
 9 | * remove trends and deal with seasonalities,
10 | * make a causality test and thereby find the ideal shift between one time series and another to make predictions,
11 | * easily create a range of different features,
12 | * use generalized autoregression models and linear regression with custom loss functions to make predictions using the 'fit/predict' methods.
13 | 
14 | ## Getting started
15 | You want to start right away? The easiest way to get started is to create a conda environment as follows:
16 | ```
17 | conda create python=3.7 --name time -y
18 | conda activate time
19 | pip install -r requirements.txt
20 | ```
21 | Then the notebook 'energy_demand_time_series.ipynb' will walk you through the analysis and the prediction steps.
22 | 
23 | ## Data
24 | The data used for this project was collected by swissgrid, a Swiss transmission grid operator, and can be found here: https://www.swissgrid.ch/en/home/operation/grid-data/generation.html. Hourly data for the years 2016 up to 2019 was used and collected in a file located in the data/raw directory.
25 | 
26 | ## Results
27 | In this section we present the results. The figure below shows the reference values as well as the predictions for one of the models shown in the notebook. 
28 | 
29 | ![alt text](data/figures/comparison.png)
30 | 
31 | 
32 | An important point for this tutorial is to show different models giotto-time has to offer. In the table below we list the results for different models and with different metrics. The best results per column are marked in yellow.
33 | 
34 | ![alt text](data/figures/comparison_table.png)
35 | 


--------------------------------------------------------------------------------
/src/scores.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import mean_absolute_error, make_scorer
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | 
 6 | def relative_mean_absolute_error(y_true, y_pred):
 7 |     """Calculate the relative mean absolute error
 8 |     Parameters
 9 |     ----------
10 |     y_true : array-like object
11 |         The reference values
12 |     y_pred : array-like object
13 |         The predicted values
14 |     """
15 |     diff = np.abs(y_true-y_pred)
16 |     return np.mean(diff/y_true)
17 | 
18 | def relative_mean_squared_error(y_true, y_pred):
19 |     """Calculate the relative mean squared error
20 |     Parameters
21 |     ----------
22 |     y_true : array-like object
23 |         The reference values
24 |     y_pred : array-like object
25 |         The predicted values
26 |     """
27 |     diff_squared = np.abs(y_true-y_pred)**2
28 |     return np.mean(diff_squared/(y_true**2))
29 | 
30 | def calculate_score(y_true, y_pred, metric=mean_absolute_error):
31 |     """Function to calculate a score with a given metric for the output of the GAR model
32 |     
33 |     Parameters
34 |     ----------
35 |     y_true : pandas DataFrame
36 |         The dataframe with the reference data (has NaNs in the lower right half)
37 |     y_pred : pandas DataFrame
38 |         The dataframe with the predicted values, i.e. the output of the GAR model
39 |     metric : object, optional, default: mean_absolute_error (from scikit-learn)
40 |         A function that calculates a score and takes as input y_true and y_pred (e.g.
41 |         from scikit-learn)
42 |     """
43 |     df_results = pd.DataFrame(y_true.values, 
44 |                                 index=y_true.index, 
45 |                                 columns=['left']).join(pd.DataFrame(y_pred.values, 
46 |                                                                     index=y_pred.index)).dropna()
47 |     df_results.columns = ['y_true', 'y_pred']
48 |     df_results.dropna(axis='rows', inplace=True)
49 |     score = metric(df_results['y_true'], df_results['y_pred'])
50 |     return score
51 | 
52 | def highlight_top(data, color='yellow', greater_is_better=False):
53 |     """Highlight the top value of the score table
54 |     Parameters
55 |     ----------
56 |     data : pandas Series
57 |         The series (columns of the dataframe) return by df.apply()
58 |     color : str, default: 'yellow'
59 |         Color to use to mark the top value for each column
60 |     greater_is_better : boolean, default: False
61 |         For the correlation test, greater is better (and don't highlight the diagonal)
62 |     """
63 |     attr = 'background-color: {}'.format(color)
64 |     
65 |     if data.name == 'coeff. of determination':
66 |         is_max = data == data.max() # because top value is 1.0 (larger is better)
67 |     else:
68 |         if greater_is_better==False:
69 |             is_max = data == data.min() # others are error functions (smaller is better)
70 |         else:
71 |             is_max = data == data.max()
72 |     return [attr if v else '' for v in is_max]


--------------------------------------------------------------------------------