├── data └── figures │ ├── comparison.png │ └── comparison_table.png ├── requirements.txt ├── src ├── plotting.py └── scores.py ├── .gitignore └── README.md /data/figures/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/giotto-ai/time_series_energy_demand/master/data/figures/comparison.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | giotto-time==0.2.0 2 | pandas==0.25.3 3 | plotly==4.1.0 4 | scikit-learn==0.22 5 | numpy==1.18.0 6 | jupyter 7 | -------------------------------------------------------------------------------- /data/figures/comparison_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/giotto-ai/time_series_energy_demand/master/data/figures/comparison_table.png -------------------------------------------------------------------------------- /src/plotting.py: -------------------------------------------------------------------------------- 1 | import plotly.graph_objects as go 2 | from plotly.subplots import make_subplots 3 | 4 | 5 | def plot_time_series(df, y_columns, title='', x_axis_title='', y_axis_title = '', fontsize=None): 6 | """Plot different time series with the same y-axis. 7 | 8 | Parameters 9 | ---------- 10 | df : pandas DataFrame 11 | The dataframe with the data to plot (incl. the x-values as an index) 12 | y_columns : list 13 | The columns of the dataframe to use. Maximal length = 2 14 | title : str, optional, default: '' 15 | Title to put above the plot 16 | x_axis_title : str, optional, default: '' 17 | Title of the x-axis 18 | y_axis_titles : list, optional, default: ['', ''] 19 | List of y_axis_titles to use. Maximal length = 2 20 | fontsize : int, optional, default: None 21 | Fontsize to use 22 | """ 23 | x = df.index 24 | 25 | fig = make_subplots(specs=[[{"secondary_y": False}]]) 26 | 27 | for i in y_columns: 28 | # Add traces 29 | fig.add_trace( 30 | go.Scatter(x=x, y=df[i].values, name=i), 31 | ) 32 | 33 | # Add figure title 34 | fig.update_layout( 35 | title={ 36 | 'text':title, 37 | 'y':0.9, 38 | 'x':0.5, 39 | } 40 | ) 41 | 42 | if fontsize != None: 43 | fig.update_layout( 44 | title={ 45 | 'text':title, 46 | 'y':0.9, 47 | 'x':0.5, 48 | }, 49 | font=dict( 50 | size=fontsize) 51 | ) 52 | 53 | # Set x-axis title 54 | fig.update_xaxes(title_text=x_axis_title) 55 | 56 | # Set y-axes titles 57 | fig.update_yaxes(title_text=y_axis_title) 58 | 59 | return fig -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | notebooks/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | doc/build/ 57 | doc/generated/ 58 | doc/reference/generated/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | # DotEnv configuration 64 | .env 65 | 66 | # Database 67 | *.db 68 | *.rdb 69 | 70 | # Pycharm 71 | .idea 72 | 73 | # VS Code 74 | .vscode/ 75 | 76 | # Spyder 77 | .spyproject/ 78 | 79 | # Jupyter NB Checkpoints 80 | .ipynb_checkpoints/ 81 | Untitled* 82 | 83 | # exclude data from source control by default 84 | /data/ 85 | 86 | # Mac OS-specific storage files 87 | .DS_Store 88 | 89 | # vim 90 | *.swp 91 | *.swo 92 | 93 | # Mypy cache 94 | .mypy_cache/ 95 | 96 | # ignore huge time_series_models 97 | models/*.joblib 98 | 99 | # Hypothesis 100 | .hypothesis/ 101 | 102 | # PyTest 103 | .pytest_cache/ 104 | 105 | # Excel temporary 106 | ~$*.xls* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![logo](https://raw.githubusercontent.com/giotto-ai/giotto-tda/master/doc/images/tda_logo.svg) 2 | 3 | # Energy Demand Prediction in Switzerland with giotto-time 4 | 5 | ## What is it? 6 | This repository contains the code for the [blog post](https://towardsdatascience.com/getting-started-with-giotto-time-d9b2088d60ca) 'Energy Demand Prediction in Switzerland with giotto-time' where we use the Python time series library [giotto-time](https://github.com/giotto-ai/giotto-time) to predict the mean daily energy demand (in megawatts) in Switzerland 21 days ahead using generalized autoregression models and linear regression with custom loss functions, both of which are provided by giotto-time. 7 | 8 | The 'energy_demand_time_series.ipynb' showcases the most important functionalities of giotto-time and how to use them to: 9 | * remove trends and deal with seasonalities, 10 | * make a causality test and thereby find the ideal shift between one time series and another to make predictions, 11 | * easily create a range of different features, 12 | * use generalized autoregression models and linear regression with custom loss functions to make predictions using the 'fit/predict' methods. 13 | 14 | ## Getting started 15 | You want to start right away? The easiest way to get started is to create a conda environment as follows: 16 | ``` 17 | conda create python=3.7 --name time -y 18 | conda activate time 19 | pip install -r requirements.txt 20 | ``` 21 | Then the notebook 'energy_demand_time_series.ipynb' will walk you through the analysis and the prediction steps. 22 | 23 | ## Data 24 | The data used for this project was collected by swissgrid, a Swiss transmission grid operator, and can be found here: https://www.swissgrid.ch/en/home/operation/grid-data/generation.html. Hourly data for the years 2016 up to 2019 was used and collected in a file located in the data/raw directory. 25 | 26 | ## Results 27 | In this section we present the results. The figure below shows the reference values as well as the predictions for one of the models shown in the notebook. 28 | 29 | ![alt text](data/figures/comparison.png) 30 | 31 | 32 | An important point for this tutorial is to show different models giotto-time has to offer. In the table below we list the results for different models and with different metrics. The best results per column are marked in yellow. 33 | 34 | ![alt text](data/figures/comparison_table.png) 35 | -------------------------------------------------------------------------------- /src/scores.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import mean_absolute_error, make_scorer 2 | import numpy as np 3 | import pandas as pd 4 | 5 | 6 | def relative_mean_absolute_error(y_true, y_pred): 7 | """Calculate the relative mean absolute error 8 | Parameters 9 | ---------- 10 | y_true : array-like object 11 | The reference values 12 | y_pred : array-like object 13 | The predicted values 14 | """ 15 | diff = np.abs(y_true-y_pred) 16 | return np.mean(diff/y_true) 17 | 18 | def relative_mean_squared_error(y_true, y_pred): 19 | """Calculate the relative mean squared error 20 | Parameters 21 | ---------- 22 | y_true : array-like object 23 | The reference values 24 | y_pred : array-like object 25 | The predicted values 26 | """ 27 | diff_squared = np.abs(y_true-y_pred)**2 28 | return np.mean(diff_squared/(y_true**2)) 29 | 30 | def calculate_score(y_true, y_pred, metric=mean_absolute_error): 31 | """Function to calculate a score with a given metric for the output of the GAR model 32 | 33 | Parameters 34 | ---------- 35 | y_true : pandas DataFrame 36 | The dataframe with the reference data (has NaNs in the lower right half) 37 | y_pred : pandas DataFrame 38 | The dataframe with the predicted values, i.e. the output of the GAR model 39 | metric : object, optional, default: mean_absolute_error (from scikit-learn) 40 | A function that calculates a score and takes as input y_true and y_pred (e.g. 41 | from scikit-learn) 42 | """ 43 | df_results = pd.DataFrame(y_true.values, 44 | index=y_true.index, 45 | columns=['left']).join(pd.DataFrame(y_pred.values, 46 | index=y_pred.index)).dropna() 47 | df_results.columns = ['y_true', 'y_pred'] 48 | df_results.dropna(axis='rows', inplace=True) 49 | score = metric(df_results['y_true'], df_results['y_pred']) 50 | return score 51 | 52 | def highlight_top(data, color='yellow', greater_is_better=False): 53 | """Highlight the top value of the score table 54 | Parameters 55 | ---------- 56 | data : pandas Series 57 | The series (columns of the dataframe) return by df.apply() 58 | color : str, default: 'yellow' 59 | Color to use to mark the top value for each column 60 | greater_is_better : boolean, default: False 61 | For the correlation test, greater is better (and don't highlight the diagonal) 62 | """ 63 | attr = 'background-color: {}'.format(color) 64 | 65 | if data.name == 'coeff. of determination': 66 | is_max = data == data.max() # because top value is 1.0 (larger is better) 67 | else: 68 | if greater_is_better==False: 69 | is_max = data == data.min() # others are error functions (smaller is better) 70 | else: 71 | is_max = data == data.max() 72 | return [attr if v else '' for v in is_max] --------------------------------------------------------------------------------