├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── images ├── hierarchy.png ├── sktime-logo-text-horizontal.png ├── tabularization.png ├── tracking_artifact_ui.png └── ts_quiz.png ├── notebooks ├── 01_introduction.ipynb ├── 02_timeseries.ipynb ├── 03_forecasting.ipynb ├── 04_feateng_pipe_tune.ipynb ├── 05_panel_tasks.ipynb ├── 06_distances_kernels_alignment.ipynb ├── 07_MLengineering.ipynb ├── 08_mlflow.ipynb ├── 09_outro.ipynb ├── hierarchical_demo_utils.py └── img │ ├── ask_chatgpt.png │ ├── estimator-conceptual-model.jpg │ ├── implementing_estimators.jpg │ ├── implementing_estimators.png │ ├── sklearn-unified-interface.jpg │ ├── sktime-logo-text-horizontal.jpg │ ├── tasks-forecasting.jpg │ ├── tasks-forecasting.png │ ├── tasks-tsc-large.png │ ├── tasks-tsc.png │ ├── ts-tasks.jpg │ ├── ts-tasks.png │ ├── unified_framework.png │ └── verdena_shapelet.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # IDE 132 | .vscode/ 133 | 134 | # MacOS 135 | .DS_Store 136 | *.csv 137 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v4.0.1 5 | hooks: 6 | - id: check-added-large-files 7 | args: ['--maxkb=1000'] 8 | - id: check-case-conflict 9 | - id: check-merge-conflict 10 | - id: check-symlinks 11 | - id: check-yaml 12 | - id: debug-statements 13 | - id: end-of-file-fixer 14 | - id: fix-encoding-pragma 15 | - id: requirements-txt-fixer 16 | - id: trailing-whitespace 17 | 18 | - repo: https://github.com/pycqa/isort 19 | rev: 5.8.0 20 | hooks: 21 | - id: isort 22 | name: isort (python) 23 | 24 | - repo: https://github.com/psf/black 25 | rev: 21.5b1 26 | hooks: 27 | - id: black 28 | language_version: python3 29 | # args: [--line-length 79] 30 | 31 | - repo: https://github.com/pycqa/flake8 32 | rev: 3.9.2 33 | hooks: 34 | - id: flake8 35 | exclude: docs/conf.py 36 | additional_dependencies: [flake8-bugbear, flake8-print] 37 | args: ["--max-line-length=88"] 38 | 39 | - repo: https://github.com/nbQA-dev/nbQA 40 | rev: 0.13.0 41 | hooks: 42 | - id: nbqa-black 43 | args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells] 44 | additional_dependencies: [black==20.8b1] 45 | # - id: nbqa-isort 46 | # args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells] 47 | # additional_dependencies: [isort==5.6.4] 48 | - id: nbqa-flake8 49 | args: [--nbqa-dont-skip-bad-cells, "--extend-ignore=E402,E203", "--max-line-length=88"] 50 | additional_dependencies: [flake8==3.8.3] 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023 The sktime developers. 4 | 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](images/team.jpg) 2 | 3 | Welcome to the sktime tutorial at Europython 2023 4 | ================================================= 5 | 6 | This tutorial is about [sktime] - a unified framework for machine learning with time series. sktime contains algorithms and tools for building, applying, evaluating modular pipelines and composites for a variety of time series learning tasks, including forecasting, classification, regression. 7 | 8 | `sktime` is easily extensible by anyone, and interoperable with the python data science stack. 9 | 10 | This is an introductory `sktime` half-day tutorial with: 11 | 12 | * a general introduction to `sktime` 13 | * forecasting with `sktime` - uni/multivariate, hierarchical/global, probabilistic 14 | * feature extraction, transformation pipelines, parameter tuning, autoML 15 | * time series classification, regression, and clustering with `sktime` 16 | * customizing time series distances, kernels, time series aligners and alignment distances 17 | * engineering topics: 18 | * APIs, estimator and dependency management 19 | * writing `sktime` compatible 3rd party estimators 20 | * deploying `sktime` in production using `mlflow` with the `mlflavours` plugin 21 | 22 | [sktime]: https://sktime.net 23 | 24 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/sktime/sktime-tutorial-europython-2023/main?filepath=notebooks) [![!discord](https://img.shields.io/static/v1?logo=discord&label=discord&message=chat&color=lightgreen)](https://discord.com/invite/54ACzaFsn7) [![!slack](https://img.shields.io/static/v1?logo=linkedin&label=LinkedIn&message=news&color=lightblue)](https://www.linkedin.com/company/scikit-time/) 25 | 26 | ## :rocket: How to get started 27 | 28 | In the tutorial, we will move through notebooks section by section. 29 | 30 | You have different options how to run the tutorial notebooks: 31 | 32 | * Run the notebooks in the cloud on [Binder] - for this you don't have to install anything! 33 | * Run the notebooks on your machine. [Clone] this repository, get [conda], install the required packages (`sktime`, `seaborn`, `jupyter`) in an environment, and open the notebooks with that environment. For detail instructions, see below. For troubleshooting, see sktime's more detailed [installation instructions]. 34 | * or, use python venv, and/or an editable install of this repo as a package. Instructions below. 35 | 36 | [Binder]: https://mybinder.org/v2/gh/sktime/sktime-tutorial-europython-2023/main?filepath=notebooks 37 | [clone]: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository 38 | [conda]: https://docs.conda.io/en/latest/ 39 | [installation instructions]: https://www.sktime.net/en/latest/installation.html 40 | 41 | Please let us know on the [sktime discord](https://discord.com/invite/54ACzaFsn7) if you have any issues during the conference, or join to ask for help anytime. 42 | 43 | ## :bulb: Description 44 | 45 | This tutorial presents [sktime] - a unified framework for machine learning with time series. sktime covers multiple time series learning problems, including time series transformation, classification and forecasting, among others.`sktime` allows you to easily apply an algorithm for one task to solve another (e.g. a scikit-learn regressor to solve a forecasting problem). In the tutorial, you will learn about how you can identify these problems, what their key differences are and how they are related. 46 | 47 | `sktime` provides various time series algorithms and modular composition tools for pipelining, ensembling and tuning. 48 | `sktime` also provides API compatible interfaces to many popular libraries, such as `statsmodels`, `prophet`, `statsforecast`, `tslearn`, `tsfresh`, etc, 49 | which can be readily combined using `sktime` composition patterns. 50 | 51 | In this tutorial, you will learn how to use, combine, tune and evaluate different algorithms on real-world data sets. 52 | The tutorial consists of step-by-step using Jupyter Notebooks. 53 | 54 | `sktime` not just a package, but also an active community which aims to be welcoming to new joiners. 55 | We invite anyone to get involved as a developer, user, supporter (or any combination of these). 56 | 57 | ## :movie_camera: Other Tutorials: 58 | 59 | - [Pydata Berlin 2022 - Advanced Forecasting Tutorial](https://www.youtube.com/watch?v=4Rf9euAhjNc) 60 | 61 | - [Pydata London 2022 - How to implement your own estimator in sktime](https://www.youtube.com/watch?v=S_3ewcvs_pg) 62 | 63 | - [Pydata Global 2022 - Feature extraction, Pipelines, Tuning](https://github.com/sktime/sktime-tutorial-pydata-global-2022) 64 | 65 | - [Pydata London 2023 - Time Series Classification, Regression, Distances & Kernels](https://github.com/sktime/sktime-tutorial-pydata-london-2023) 66 | 67 | ## :wave: How to contribute 68 | 69 | If you're interested in contributing to sktime, you can find out more how to get involved [here](https://www.sktime.net/en/latest/get_involved.html). 70 | 71 | Any contributions are welcome, not just code! 72 | 73 | We also invite everyone to the "getting started with contributions" onboarding feature at the community sprint at EuroPython (July 22-23)! 74 | 75 | ## Installation instructions for local use 76 | 77 | To run the notebooks locally, you will need: 78 | 79 | * a local repository clone 80 | * a python environment with required packages installed 81 | 82 | ### Cloning the repository 83 | 84 | To clone the repository locally: 85 | 86 | `git clone https://github.com/sktime/sktime-tutorial-europython-2023.git` 87 | 88 | ### Using conda env 89 | 90 | 1. Create a python virtual environment: 91 | `conda create -y -n europython_sktime python=3.9` 92 | 2. Install required packages: 93 | `conda install -y -n europython_sktime pip sktime seaborn jupyter pmdarima statsmodels dtw-python` 94 | 3. Activate your environment: 95 | `conda activate europython_sktime` 96 | 4. If using jupyter: make the environment available in jupyter: 97 | `python -m ipykernel install --user --name=europython_sktime` 98 | 99 | ### Using python venv 100 | 101 | 1. Create a python virtual environment: 102 | `python -m venv europython_sktime` 103 | 2. Activate your environment: 104 | `source europython_sktime/bin/activate` 105 | 3. Install the requirements: 106 | `pip install sktime seaborn jupyter pmdarima statsmodels dtw-python` 107 | 4. If using jupyter: make the environment available in jupyter: 108 | `python -m ipykernel install --user --name=europython_sktime` 109 | -------------------------------------------------------------------------------- /images/hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/hierarchy.png -------------------------------------------------------------------------------- /images/sktime-logo-text-horizontal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/sktime-logo-text-horizontal.png -------------------------------------------------------------------------------- /images/tabularization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/tabularization.png -------------------------------------------------------------------------------- /images/tracking_artifact_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/tracking_artifact_ui.png -------------------------------------------------------------------------------- /images/ts_quiz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/ts_quiz.png -------------------------------------------------------------------------------- /notebooks/01_introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "![](./img/sktime-logo-text-horizontal.jpg)" 9 | ] 10 | }, 11 | { 12 | "attachments": {}, 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### Agenda for today\n", 17 | "\n", 18 | "1. General introduction to `sktime` & `sklearn`\n", 19 | "\n", 20 | "2. forecasting with `sktime`\n", 21 | "\n", 22 | "3. feature extraction, tuning, autoML\n", 23 | "\n", 24 | "4. time series classification, regression, and clustering with `sktime`\n", 25 | "\n", 26 | "5. customizing time series distances, kernels, time series aligners and alignment distances\n", 27 | "\n", 28 | "6. overview from ML eng & API perspective - estimators modules, learning tasks, library\n", 29 | "\n", 30 | "7. deployment with `mlflow` / `mlflavors`" 31 | ] 32 | }, 33 | { 34 | "attachments": {}, 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### Running the notebooks" 39 | ] 40 | }, 41 | { 42 | "attachments": {}, 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "all notebooks available on `github.com/sktime`\n", 47 | "\n", 48 | "repository: `github.com/sktime/sktime-tutorial-europython-2023`\n", 49 | "\n", 50 | "* README instructions to run notebooks locally\n", 51 | "* binder to run notebooks in the cloud (if wifi allows)\n", 52 | "\n", 53 | "help, Q&A, developer chat in EuroPython [`sktime` tutorial discord thread](https://discord.com/channels/1120766458528542794/1130170803137282118)" 54 | ] 55 | }, 56 | { 57 | "attachments": {}, 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## 1 - Introduction to ``sktime``\n", 62 | "\n", 63 | "### 1.1 What is ``sktime``?\n", 64 | "\n", 65 | "- `sktime` is a python library for time series learning tasks!\n", 66 | " - check [our website](https://www.sktime.net/en/latest/index.html)!\n", 67 | " - integrative framework layer in the time series space" 68 | ] 69 | }, 70 | { 71 | "attachments": {}, 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "- `sklearn` / `sktime` interface:\n", 76 | " - unified interface for objects/estimators\n", 77 | " - modular design, strategy pattern\n", 78 | " - composable, composites are interface homogenous\n", 79 | " - simple specification language and parameter interface" 80 | ] 81 | }, 82 | { 83 | "attachments": {}, 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "- `sktime` is a vibrant, welcoming community with mentoring opportunities!\n", 88 | " - We *love* new contributors. Especially if you are new to open source!\n", 89 | " - join the ``sktime`` sprint this week-end at EuroPython! Gentle intro to contributing!\n", 90 | " - Check out the ``sktime`` [new contributors guide](https://www.sktime.net/en/latest/get_involved/contributing.html)\n", 91 | " - join our [discord](https://discord.com/invite/54ACzaFsn7) and/or one of our regular meetups!\n", 92 | " - follow us on [LinkedIn](https://www.linkedin.com/company/scikit-time/)!\n", 93 | " - star us on [GitHub](https://github.com/sktime/sktime)!" 94 | ] 95 | }, 96 | { 97 | "attachments": {}, 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "### 1.2 sklearn unified interface - the strategy pattern\n", 102 | "\n", 103 | "`sklearn` provides a unified interface to multiple learning tasks including classification, regression.\n", 104 | "\n", 105 | "any (supervised) estimator has the following interface points\n", 106 | "\n", 107 | "1. **Instantiate** your model of choice, with parameter settings\n", 108 | "2. **Fit** the instance of your model\n", 109 | "3. Use that fitted instance to **predict** new data!\n", 110 | "\n", 111 | "![](./img/estimator-conceptual-model.jpg)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 1, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "# get data to use the model on\n", 121 | "from sklearn.datasets import load_iris\n", 122 | "from sklearn.model_selection import train_test_split\n", 123 | "\n", 124 | "X, y = load_iris(return_X_y=True, as_frame=True)\n", 125 | "X_train, X_test, y_train, y_test = train_test_split(X, y)\n", 126 | "\n", 127 | "# data are pd.DataFrame/pd.Series\n", 128 | "# X_train.shape = (112, 4) - 112 train flowers times 4 features\n", 129 | "# sepal length, sepal width, petal length, petal width\n", 130 | "# y_train.shape = (112,) - 112 categorical labels - one of 3 flower types 0, 1, 2\n", 131 | "# X_test.shape = (38, 4) - 38 test flowers times 4 features\n", 132 | "# y_test.shape = (38,) - 38 cagetorical labels - one of 3 flower types\n" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 2, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "array([2, 1, 2, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 0, 1, 1, 0, 2, 0, 0, 2, 2,\n", 144 | " 0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2])" 145 | ] 146 | }, 147 | "execution_count": 2, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "from sklearn.svm import SVC\n", 154 | "\n", 155 | "# 1. Instantiate SVC with parameters gamma, C\n", 156 | "clf = SVC(gamma=0.001, C=100.)\n", 157 | "# clf is an instance of SVC now\n", 158 | "\n", 159 | "# 2. Fit clf to training data = 112 feature/label pairs\n", 160 | "clf.fit(X_train, y_train)\n", 161 | "# clf changes state to \"fitted\", computes model\n", 162 | "\n", 163 | "# 3. Predict labels on test data = 38 feature vectors\n", 164 | "y_test_pred = clf.predict(X_test)\n", 165 | "# produces predictions for the test data, 38 labels\n", 166 | "\n", 167 | "y_test_pred\n", 168 | "# y_test_pred.shape = (38,)" 169 | ] 170 | }, 171 | { 172 | "attachments": {}, 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "IMPORTANT: to use another classifier, only the specification line, part 1 changes!\n", 177 | "\n", 178 | "`SVC` could have been `RandomForest`, steps 2 and 3 remain the same - unified interface:" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 3, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "array([2, 1, 2, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 0, 1, 1, 0, 2, 0, 0, 2, 2,\n", 190 | " 0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2])" 191 | ] 192 | }, 193 | "execution_count": 3, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "from sklearn.ensemble import RandomForestClassifier\n", 200 | "\n", 201 | "# 1. Instantiate SVC with parameters gamma, C\n", 202 | "clf = RandomForestClassifier(n_estimators=100)\n", 203 | "\n", 204 | "# 2. Fit clf to training data\n", 205 | "clf.fit(X_train, y_train)\n", 206 | "\n", 207 | "# 3. Predict labels on test data\n", 208 | "y_test_pred = clf.predict(X_test)\n", 209 | "\n", 210 | "y_test_pred" 211 | ] 212 | }, 213 | { 214 | "attachments": {}, 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "in object oriented design terminology, this is called **\"strategy pattern\"**\n", 219 | "\n", 220 | "= different estimators can be switched out without change to the interface\n", 221 | "\n", 222 | "= like a power plug adapter, it's plug&play if it conforms with the interface\n", 223 | "\n", 224 | "Pictorial summary:\n", 225 | "![](./img/sklearn-unified-interface.jpg)" 226 | ] 227 | }, 228 | { 229 | "attachments": {}, 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "`sklearn` estimators are parametric:\n", 234 | "\n", 235 | "all parameters in the \"blueprint\" accessed and set via `get_params`, `set_params`:" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 4, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "{'bootstrap': True,\n", 247 | " 'ccp_alpha': 0.0,\n", 248 | " 'class_weight': None,\n", 249 | " 'criterion': 'gini',\n", 250 | " 'max_depth': None,\n", 251 | " 'max_features': 'sqrt',\n", 252 | " 'max_leaf_nodes': None,\n", 253 | " 'max_samples': None,\n", 254 | " 'min_impurity_decrease': 0.0,\n", 255 | " 'min_samples_leaf': 1,\n", 256 | " 'min_samples_split': 2,\n", 257 | " 'min_weight_fraction_leaf': 0.0,\n", 258 | " 'n_estimators': 100,\n", 259 | " 'n_jobs': None,\n", 260 | " 'oob_score': False,\n", 261 | " 'random_state': None,\n", 262 | " 'verbose': 0,\n", 263 | " 'warm_start': False}" 264 | ] 265 | }, 266 | "execution_count": 4, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "clf.get_params()" 273 | ] 274 | }, 275 | { 276 | "attachments": {}, 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "### 1.3 `sktime` is devoted to time-series data analysis\n", 281 | "\n", 282 | "Richer space of time series tasks, compared to \"tabular\":\n", 283 | "\n", 284 | "- **Forecasting** - predict energy consumption tomorrow, based on past weeks\n", 285 | "- **Classification** - classify electrocardiograms to healthy/sick, based on prior examples\n", 286 | "- **Regression** - predict compound purity in bioreactor based on temperature/pressure profile\n", 287 | "- **Clustering** - sort outlines of tree leaves into a small number of similar classes\n", 288 | "- **Annotation** - identify jumps, anomalies, events in a data stream" 289 | ] 290 | }, 291 | { 292 | "attachments": {}, 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "`sktime` aims to provide `sklearn`-like, modular, composable, interfaces for these!" 297 | ] 298 | }, 299 | { 300 | "attachments": {}, 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "| Task | Status | Links |\n", 305 | "|---|---|---|\n", 306 | "| **Forecasting** | stable | [Tutorial](https://www.sktime.net/en/latest/examples/01_forecasting.html) · [API Reference](https://www.sktime.net/en/latest/api_reference/forecasting.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/forecasting.py) |\n", 307 | "| **Time Series Classification** | stable | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/02_classification.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/classification.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/classification.py) |\n", 308 | "| **Time Series Regression** | stable | [API Reference](https://www.sktime.net/en/latest/api_reference/regression.html) |\n", 309 | "| **Transformations** | stable | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/03_transformers.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/transformations.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/transformer.py) |\n", 310 | "| **Parameter fitting** | maturing | [API Reference](https://www.sktime.net/en/latest/api_reference/param_est.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/transformer.py) |\n", 311 | "| **Time Series Clustering** | maturing | [API Reference](https://www.sktime.net/en/latest/api_reference/clustering.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/clustering.py) |\n", 312 | "| **Time Series Distances/Kernels** | maturing | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/03_transformers.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/dists_kernels.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/dist_kern_panel.py) |\n", 313 | "| **Annotation** | experimental | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/annotation.py) |\n", 314 | "| **Distributions and simulation** | experimental | |" 315 | ] 316 | }, 317 | { 318 | "attachments": {}, 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "Example - forecasting" 323 | ] 324 | }, 325 | { 326 | "attachments": {}, 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "![](./img/tasks-forecasting.png)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 5, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "from sktime.datasets import load_airline\n", 340 | "from sktime.forecasting.naive import NaiveForecaster\n", 341 | "import numpy as np\n", 342 | "\n", 343 | "# step 1: data specification\n", 344 | "y = load_airline()\n", 345 | "# y = pandas.Series, y.shape = (150,)\n", 346 | "# 150 months of monthly observations\n", 347 | "\n", 348 | "# step 2: specifying forecasting horizon\n", 349 | "fh = np.arange(1, 37)\n", 350 | "# we want to forecast 36 months = 3 years\n", 351 | "\n", 352 | "# step 3: specifying the forecasting algorithm\n", 353 | "forecaster = NaiveForecaster(strategy=\"last\", sp=12)\n", 354 | "\n", 355 | "# step 4: fitting the forecaster\n", 356 | "forecaster.fit(y)\n", 357 | "\n", 358 | "# step 5: querying predictions\n", 359 | "y_pred = forecaster.predict(fh)\n", 360 | "# y_pred is a \"continuation\" of y\n", 361 | "# y_pred.shape = (36,) = forecasts for 36 months" 362 | ] 363 | }, 364 | { 365 | "attachments": {}, 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "Example - classification" 370 | ] 371 | }, 372 | { 373 | "attachments": {}, 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "![](./img/tasks-tsc.png)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 6, 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": [ 386 | "from sktime.datasets import load_osuleaf\n", 387 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n", 388 | "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n", 389 | "from sktime.dists_kernels import ScipyDist\n", 390 | "\n", 391 | "# step 1 - specify training data\n", 392 | "X_train, y_train = load_osuleaf(split=\"train\", return_type=\"numpy3D\")\n", 393 | "\n", 394 | "# step 2 - specify data to predict labels for\n", 395 | "X_new, _ = load_osuleaf(split=\"test\", return_type=\"numpy3D\")\n", 396 | "X_new = X_new[:2]\n", 397 | "\n", 398 | "# step 3 - specify the classifier\n", 399 | "mean_eucl_dist = FlatDist(ScipyDist())\n", 400 | "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=mean_eucl_dist)\n", 401 | "\n", 402 | "# step 4 - fitting the classifier\n", 403 | "clf.fit(X_train, y_train)\n", 404 | "\n", 405 | "# step 5 - predict labels on new data\n", 406 | "y_pred = clf.predict(X_new)" 407 | ] 408 | }, 409 | { 410 | "attachments": {}, 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "### 1.4 `sktime` integrates the time series modelling ecosystem!\n", 415 | "\n", 416 | "the package space for time series is highly fragmented:\n", 417 | "\n", 418 | "* lots of great implementations and methods out there!\n", 419 | "* but many different interfaces, not composable like `sklearn`\n", 420 | "\n", 421 | "`sktime` integrates the ecosystem - in friendly collaboration with all the packages out there!\n", 422 | "\n", 423 | "* unified interface standard\n", 424 | "* highly composable\n", 425 | "* mini-package manager on estiator/module level\n", 426 | "* easily extensible - 3rd party plugins, other packages\n", 427 | "\n", 428 | "![](./img/unified_framework.png)" 429 | ] 430 | }, 431 | { 432 | "attachments": {}, 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "### 1.5 Summary/What is next!\n", 437 | "\n", 438 | "- `sklearn` interface: unified interface (strategy pattern), modular, composition stable, easy specification language\n", 439 | "- `sktime` evolves the interface for time series learning tasks\n", 440 | "- `sktime` integrates a fragmented ecosytem with interface, composability, dependency management\n", 441 | "\n", 442 | "- today:\n", 443 | " * deep dive forecasting\n", 444 | " * feature extraction, tuning, autoML\n", 445 | " * deep dive classification and panel tasks regression, clustering\n", 446 | " * time series distances, kernels, alignment\n", 447 | " * engineering/API perspective, deployment" 448 | ] 449 | }, 450 | { 451 | "attachments": {}, 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "---\n", 456 | "### Credits: notebook 1 - `sktime` intro\n", 457 | "\n", 458 | "notebook creation: fkiraly, marrov\n", 459 | "\n", 460 | "some vignettes based on existing `sktime` tutorials, credit: fkiraly, miraep8\n", 461 | "\n", 462 | "slides (png/jpg):\n", 463 | "\n", 464 | "* from fkiraly's postgraduate course at UCL, Principles and Patterns in Data Scientific Software Engineering\n", 465 | "* ecosystem slide: fkiraly, mloning\n", 466 | "* learning tasks: fkiraly, mloning\n", 467 | "\n", 468 | "General credit also to `sklearn` and `sktime` contributors" 469 | ] 470 | } 471 | ], 472 | "metadata": { 473 | "kernelspec": { 474 | "display_name": "Python 3.8.13 ('pydata22')", 475 | "language": "python", 476 | "name": "python3" 477 | }, 478 | "language_info": { 479 | "codemirror_mode": { 480 | "name": "ipython", 481 | "version": 3 482 | }, 483 | "file_extension": ".py", 484 | "mimetype": "text/x-python", 485 | "name": "python", 486 | "nbconvert_exporter": "python", 487 | "pygments_lexer": "ipython3", 488 | "version": "3.11.3" 489 | }, 490 | "orig_nbformat": 4, 491 | "vscode": { 492 | "interpreter": { 493 | "hash": "e61b44dca3bf47c8973c8cd627825697e2dad493e19dd6592afda0a0a3c312a0" 494 | } 495 | } 496 | }, 497 | "nbformat": 4, 498 | "nbformat_minor": 2 499 | } 500 | -------------------------------------------------------------------------------- /notebooks/05_panel_tasks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### Overview of this notebook\n", 9 | "\n", 10 | "* Introduction to time series classification, regression, clustering\n", 11 | "* `sktime` data format fo \"time series panels\" = collections of time series\n", 12 | "* Basic vignettes for TSC, TSR, TSCl\n", 13 | "* Advanced vignettes - pipelines, ensembles, tuning" 14 | ] 15 | }, 16 | { 17 | "attachments": {}, 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 5. Learning tasks - Classification, Regression, Clustering & more \n", 22 | "\n", 23 | "deal with *collections of time series* = \"panel data\"\n", 24 | "\n", 25 | "Classification = try to assign one *category* per time series, after training on time series/category examples\n", 26 | "\n", 27 | "Example: daily energy consumption profile over time - Predict season, e.g., winter/summer, or type of consumer\n", 28 | "\n", 29 | "Regression = try to assign one *category* per time series, after training on time series/category examples\n", 30 | "\n", 31 | "Example: temperature/pressure/time profile of chemical reactor. Predict total purity (fraction of 1)\n", 32 | "\n", 33 | "Clustering = put different time series in a small number of similarity buckets" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import numpy as np\n", 43 | "import pandas as pd\n", 44 | "\n", 45 | "# Increase display width\n", 46 | "pd.set_option('display.width', 1000)" 47 | ] 48 | }, 49 | { 50 | "attachments": {}, 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## 5.1 Panel data - `sktime` data formats \n", 55 | "\n", 56 | "`Panel` abstract data type = values observed for:\n", 57 | "\n", 58 | "* `instance`, e.g., patient\n", 59 | "* `variable`, e.g., blood pressure, body temperatire\n", 60 | "* `time`/`index`, e.g., January 12, 2023 (usually but not necessarily a time index!)\n", 61 | "\n", 62 | "One value X is: \"patient A had blood pressure X on January 12, 2023\"\n", 63 | "\n", 64 | "time series classification, regression, clustering: slices `Panel` data by instance" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "\n", 72 | "Preferred format 1: `pd.DataFrame` with 2-level `MultiIndex`, (instance, time), cols= variables\n", 73 | "\n", 74 | "Preferred format 2: 3D `np.ndarray` with index (instance, variable, time)\n", 75 | "\n", 76 | "* `sktime` supports and recognizes multiple data formats for convenience and internal use, e.g., `dask`, `xarray`\n", 77 | "* abstract data type = \"scitype\"; in-memory specification = \"mtype\"\n", 78 | "* More information in tutorial on [in-memory data representations and data loading](https://www.sktime.net/en/latest/examples/AA_datatypes_and_datasets.html#In-memory-data-representations-and-data-loading)" 79 | ] 80 | }, 81 | { 82 | "attachments": {}, 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "### 5.1.1 preferred format 1 - `pd-multiindex` specification" 87 | ] 88 | }, 89 | { 90 | "attachments": {}, 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "`pd-multiindex` = `pd.DataFrame` with 2-level `MultiIndex`, (instance, time), cols= variables" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 2, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "from sktime.datasets import load_italy_power_demand\n", 104 | "\n", 105 | "# load an example time series panel in pd-multiindex mtype\n", 106 | "X, _ = load_italy_power_demand(return_type=\"pd-multiindex\")\n", 107 | "\n", 108 | "# renaming columns for illustrative purposes\n", 109 | "X.columns = [\"total_power_demand\"]\n", 110 | "X.index.names = [\"day_ID\", \"hour_of_day\"]" 111 | ] 112 | }, 113 | { 114 | "attachments": {}, 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "The Italy power demand dataset has:\n", 119 | "\n", 120 | "* 1096 individual time series instances = single days of total power demand (mean subtracted)\n", 121 | "* one single variable per time series instances, `total_power_demand`\n", 122 | " * total power demand on that day, in that hourly period\n", 123 | " * hence a univariate dataset\n", 124 | "* individual time series are observed at 24 time (period) points (the same number for all instances)\n", 125 | "\n", 126 | "In the dataset, days are jumbled and of different scope (independent sampling).\n", 127 | "* considered independent\n", 128 | "* for task, e.g., \"identify season or weekday/week-end from pattern\"" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 3, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/html": [ 139 | "
\n", 140 | "\n", 153 | "\n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | "
total_power_demand
day_IDhour_of_day
00-0.710518
1-1.183320
2-1.372442
3-1.593083
4-1.467002
.........
1095190.180490
20-0.094058
210.729587
220.210995
23-0.002542
\n", 216 | "

26304 rows × 1 columns

\n", 217 | "
" 218 | ], 219 | "text/plain": [ 220 | " total_power_demand\n", 221 | "day_ID hour_of_day \n", 222 | "0 0 -0.710518\n", 223 | " 1 -1.183320\n", 224 | " 2 -1.372442\n", 225 | " 3 -1.593083\n", 226 | " 4 -1.467002\n", 227 | "... ...\n", 228 | "1095 19 0.180490\n", 229 | " 20 -0.094058\n", 230 | " 21 0.729587\n", 231 | " 22 0.210995\n", 232 | " 23 -0.002542\n", 233 | "\n", 234 | "[26304 rows x 1 columns]" 235 | ] 236 | }, 237 | "execution_count": 3, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "X" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 4, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "from sktime.datasets import load_basic_motions\n", 253 | "\n", 254 | "# load an example time series panel in pd-multiindex mtype\n", 255 | "X, _ = load_basic_motions(return_type=\"pd-multiindex\")\n", 256 | "\n", 257 | "# renaming columns for illustrative purposes\n", 258 | "X.columns = [\"accel_1\", \"accel_2\", \"accel_3\", \"gyro_1\", \"gyro_2\", \"gyro_3\"]\n", 259 | "X.index.names = [\"trial_no\", \"timepoint\"]" 260 | ] 261 | }, 262 | { 263 | "attachments": {}, 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "The basic motions dataset has:\n", 268 | "\n", 269 | "* 80 individual time series instances = trials = person engaging in activity (running, badminton, etc)\n", 270 | "* six variables per time series instance, `dim_0` to `dim_5`\n", 271 | " * 3 accelerometer and 3 gyrometer measurements\n", 272 | " * hence a multivariate dataset\n", 273 | "* individual time series are observed at 100 time points (the same number for all instances)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 5, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/html": [ 284 | "
\n", 285 | "\n", 298 | "\n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | "
accel_1accel_2accel_3gyro_1gyro_2gyro_3
trial_notimepoint
000.0791060.3940320.5514440.3515650.0239700.633883
10.0791060.3940320.5514440.3515650.0239700.633883
2-0.903497-3.666397-0.282844-0.095881-0.3196050.972131
31.116125-0.6561010.3331181.624657-0.5699621.209171
41.6382001.4051350.3938751.187864-0.2716641.739182
........................
799528.459024-16.6337703.6318698.978229-3.611533-1.491489
9610.2600940.1027751.269261-1.645964-3.3771571.283746
974.316471-3.5743192.063831-1.717875-1.8430540.484734
980.704446-4.9204442.851857-2.982977-0.809665-0.721774
99-2.074749-6.8923774.848379-1.350330-1.203844-1.776470
\n", 426 | "

8000 rows × 6 columns

\n", 427 | "
" 428 | ], 429 | "text/plain": [ 430 | " accel_1 accel_2 accel_3 gyro_1 gyro_2 gyro_3\n", 431 | "trial_no timepoint \n", 432 | "0 0 0.079106 0.394032 0.551444 0.351565 0.023970 0.633883\n", 433 | " 1 0.079106 0.394032 0.551444 0.351565 0.023970 0.633883\n", 434 | " 2 -0.903497 -3.666397 -0.282844 -0.095881 -0.319605 0.972131\n", 435 | " 3 1.116125 -0.656101 0.333118 1.624657 -0.569962 1.209171\n", 436 | " 4 1.638200 1.405135 0.393875 1.187864 -0.271664 1.739182\n", 437 | "... ... ... ... ... ... ...\n", 438 | "79 95 28.459024 -16.633770 3.631869 8.978229 -3.611533 -1.491489\n", 439 | " 96 10.260094 0.102775 1.269261 -1.645964 -3.377157 1.283746\n", 440 | " 97 4.316471 -3.574319 2.063831 -1.717875 -1.843054 0.484734\n", 441 | " 98 0.704446 -4.920444 2.851857 -2.982977 -0.809665 -0.721774\n", 442 | " 99 -2.074749 -6.892377 4.848379 -1.350330 -1.203844 -1.776470\n", 443 | "\n", 444 | "[8000 rows x 6 columns]" 445 | ] 446 | }, 447 | "execution_count": 5, 448 | "metadata": {}, 449 | "output_type": "execute_result" 450 | } 451 | ], 452 | "source": [ 453 | "# The outermost index represents the instance number\n", 454 | "# whereas the inner index represents the index of the particular index\n", 455 | "# within that instance.\n", 456 | "X" 457 | ] 458 | }, 459 | { 460 | "attachments": {}, 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "pandas provides a simple way to access a range of value in the multi-indexed dataframe:" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 6, 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "data": { 474 | "text/plain": [ 475 | "timepoint\n", 476 | "0 0.351565\n", 477 | "1 0.351565\n", 478 | "2 -0.095881\n", 479 | "3 1.624657\n", 480 | "4 1.187864\n", 481 | " ... \n", 482 | "95 0.039951\n", 483 | "96 -0.029297\n", 484 | "97 0.000000\n", 485 | "98 0.000000\n", 486 | "99 -0.007990\n", 487 | "Name: gyro_1, Length: 100, dtype: float64" 488 | ] 489 | }, 490 | "execution_count": 6, 491 | "metadata": {}, 492 | "output_type": "execute_result" 493 | } 494 | ], 495 | "source": [ 496 | "# Select:\n", 497 | "# * the fourth variable (gyroscope 1)\n", 498 | "# * of the first instance (trial 1 = 0 in python)\n", 499 | "# * values at all 100 timestamps\n", 500 | "#\n", 501 | "X.loc[0, \"gyro_1\"]" 502 | ] 503 | }, 504 | { 505 | "attachments": {}, 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "Or if you want to access the individual values:" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 7, 515 | "metadata": {}, 516 | "outputs": [ 517 | { 518 | "data": { 519 | "text/plain": [ 520 | "-1.27952" 521 | ] 522 | }, 523 | "execution_count": 7, 524 | "metadata": {}, 525 | "output_type": "execute_result" 526 | } 527 | ], 528 | "source": [ 529 | "# Select:\n", 530 | "# * the fifth time time point (5 = 4 in python)\n", 531 | "# * the third variable (accelerometer 3)\n", 532 | "# * of the fourty-third instance (trial 43 = 42 in python)\n", 533 | "\n", 534 | "X.loc[(42, 4), \"accel_3\"]" 535 | ] 536 | }, 537 | { 538 | "attachments": {}, 539 | "cell_type": "markdown", 540 | "metadata": {}, 541 | "source": [ 542 | "### 5.1.2 preferred format 2 - `numpy3D` specification" 543 | ] 544 | }, 545 | { 546 | "attachments": {}, 547 | "cell_type": "markdown", 548 | "metadata": {}, 549 | "source": [ 550 | "`numpy3D` = 3D `np.ndarray` with index (instance, variable, time)\n", 551 | "\n", 552 | "instance/time index is interpreted as integer\n", 553 | "\n", 554 | "IMPORTANT: unlike `pd-multiindex`, this assumes:\n", 555 | "\n", 556 | "* all individual series have the same length\n", 557 | "* all individual series have the same index" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 8, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [ 566 | "from sktime.datasets import load_basic_motions\n", 567 | "\n", 568 | "# load an example time series panel in numpy mtype\n", 569 | "X, _ = load_basic_motions(return_type=\"numpy3D\")" 570 | ] 571 | }, 572 | { 573 | "attachments": {}, 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "The Italy power demand dataset has:\n", 578 | "\n", 579 | "* 1096 individual time series instances = single days of total power demand (mean subtracted)\n", 580 | "* one single variable per time series instances, unnamed in numpy\n", 581 | "* individual time series are observed at 24 time (period) points (the same number for all instances)" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 9, 587 | "metadata": {}, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "(80, 6, 100)" 593 | ] 594 | }, 595 | "execution_count": 9, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "# (num_instances, num_variables, length)\n", 602 | "X.shape" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": 10, 608 | "metadata": {}, 609 | "outputs": [], 610 | "source": [ 611 | "from sktime.datasets import load_basic_motions\n", 612 | "\n", 613 | "# load an example time series panel in numpy mtype\n", 614 | "X, _ = load_basic_motions(return_type=\"numpy3D\")" 615 | ] 616 | }, 617 | { 618 | "attachments": {}, 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "The basic motions dataset has:\n", 623 | "\n", 624 | "* 80 individual time series instances = trials = person engaging in activity (running, badminton, etc)\n", 625 | "* six variables per time series instance, unnamed in numpy\n", 626 | "* individual time series are observed at 100 time points (the same number for all instances)" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": 11, 632 | "metadata": {}, 633 | "outputs": [ 634 | { 635 | "data": { 636 | "text/plain": [ 637 | "(80, 6, 100)" 638 | ] 639 | }, 640 | "execution_count": 11, 641 | "metadata": {}, 642 | "output_type": "execute_result" 643 | } 644 | ], 645 | "source": [ 646 | "X.shape" 647 | ] 648 | }, 649 | { 650 | "attachments": {}, 651 | "cell_type": "markdown", 652 | "metadata": {}, 653 | "source": [ 654 | "## 5.2 Time Series Classification, Regression, Clustering - Basic Vignettes\n", 655 | "\n", 656 | "Above tasks are very similar to \"tabular\" classification, regression, clustering, as in `sklearn`\n", 657 | "\n", 658 | "Main distinction:\n", 659 | "* in \"tabular\" classification etc, one (feature) instance row vector of features\n", 660 | "* in TSC, one (feature) instance is a full time series, possibly unequal length, distinct index set" 661 | ] 662 | }, 663 | { 664 | "attachments": {}, 665 | "cell_type": "markdown", 666 | "metadata": {}, 667 | "source": [ 668 | "![](./img/tasks-tsc.png)" 669 | ] 670 | }, 671 | { 672 | "attachments": {}, 673 | "cell_type": "markdown", 674 | "metadata": {}, 675 | "source": [ 676 | "\n", 677 | "More formally:\n", 678 | "\n", 679 | "* \"tabular\" classification:\n", 680 | " * training pairs $(x_1, y_1), \\dots, (x_n, y_n)$\n", 681 | " * where $x_i$ are rows of a `pd.DataFrame` (same col types)\n", 682 | " * and $y_i \\in \\mathcal{C}$ for a finite set $\\mathcal{C}$\n", 683 | " * used to train a classifier that\n", 684 | " * for a new `pd.DataFrame` row $x_*$\n", 685 | " * predicts $y_* \\in \\mathcal{C}$" 686 | ] 687 | }, 688 | { 689 | "cell_type": "markdown", 690 | "metadata": {}, 691 | "source": [ 692 | "\n", 693 | "* time series classification:\n", 694 | " * training pairs $(x_1, y_1), \\dots, (x_n, y_n)$\n", 695 | " * where $x_i$ are time series instaces, from a certain domain\n", 696 | " * and $y_i \\in \\mathcal{C}$ for a finite set $\\mathcal{C}$\n", 697 | " * used to train a classifier that\n", 698 | " * for a new time series instance $x_*$\n", 699 | " * predicts $y_* \\in \\mathcal{C}$" 700 | ] 701 | }, 702 | { 703 | "attachments": {}, 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "very similar for time series regression, clustering - exercise left to reader :-)" 708 | ] 709 | }, 710 | { 711 | "attachments": {}, 712 | "cell_type": "markdown", 713 | "metadata": {}, 714 | "source": [ 715 | "`sktime` design implications:\n", 716 | "\n", 717 | "* need representation of collections of time series (panels), see Section 5.1\n", 718 | " * same as in \"adjacent\" learning tasks, e.g., panel forecasting\n", 719 | " * same as for transformation estimators\n", 720 | "* algorithms that use sequentiality, can deal with unequal length, missing values etc \n", 721 | "* algorithms usually based on distances or kernels between time series - need to cover that in framework\n", 722 | "* but we can use familiar `fit` / `predict` and `scikit-learn` / `scikit-base` interface!" 723 | ] 724 | }, 725 | { 726 | "attachments": {}, 727 | "cell_type": "markdown", 728 | "metadata": {}, 729 | "source": [ 730 | "### 5.2.3 Time Series Classification - deployment vignette" 731 | ] 732 | }, 733 | { 734 | "attachments": {}, 735 | "cell_type": "markdown", 736 | "metadata": {}, 737 | "source": [ 738 | "Basic deployment vignette for TSC:\n", 739 | "\n", 740 | "1. load/setup training data, `X` in a `Panel` (more specifically `numpy3D`) format, `y` as 1D `np.ndarray`\n", 741 | "2. load/setup new data for prediction (can be done after 2 too)\n", 742 | "3. specify the classifier using `sklearn`-like syntax\n", 743 | "4. fit classifier to training data, `fit(X, y)`\n", 744 | "5. predict labels on new data, `predict(X_new)`" 745 | ] 746 | }, 747 | { 748 | "cell_type": "code", 749 | "execution_count": 12, 750 | "metadata": {}, 751 | "outputs": [], 752 | "source": [ 753 | "# steps 1, 2 - prepare osuleaf dataset (train and new)\n", 754 | "from sktime.datasets import load_italy_power_demand\n", 755 | "\n", 756 | "X_train, y_train = load_italy_power_demand(split=\"train\", return_type=\"numpy3D\")\n", 757 | "X_new, _ = load_italy_power_demand(split=\"test\", return_type=\"numpy3D\")" 758 | ] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": 13, 763 | "metadata": {}, 764 | "outputs": [ 765 | { 766 | "data": { 767 | "text/plain": [ 768 | "(67, 1, 24)" 769 | ] 770 | }, 771 | "execution_count": 13, 772 | "metadata": {}, 773 | "output_type": "execute_result" 774 | } 775 | ], 776 | "source": [ 777 | "# this is in numpy3D format, but could also be pd-multiindex or other\n", 778 | "X_train.shape" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 14, 784 | "metadata": {}, 785 | "outputs": [ 786 | { 787 | "data": { 788 | "text/plain": [ 789 | "(67,)" 790 | ] 791 | }, 792 | "execution_count": 14, 793 | "metadata": {}, 794 | "output_type": "execute_result" 795 | } 796 | ], 797 | "source": [ 798 | "# y is a 1D np.ndarray of labels - same length as number of instances in X_train\n", 799 | "y_train.shape" 800 | ] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": 15, 805 | "metadata": {}, 806 | "outputs": [], 807 | "source": [ 808 | "# step 3 - specify the classifier\n", 809 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n", 810 | "\n", 811 | "# example 1 - 3-NN with simple dynamic time warping distance (requires numba)\n", 812 | "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3)\n", 813 | "\n", 814 | "# example 2 - custom distance:\n", 815 | "# 3-nearest neighbour classifier with Euclidean distance (on flattened time series)\n", 816 | "# (requires scipy)\n", 817 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n", 818 | "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n", 819 | "from sktime.dists_kernels import ScipyDist\n", 820 | "\n", 821 | "eucl_dist = FlatDist(ScipyDist())\n", 822 | "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)" 823 | ] 824 | }, 825 | { 826 | "attachments": {}, 827 | "cell_type": "markdown", 828 | "metadata": {}, 829 | "source": [ 830 | "we could specify any `sktime` classifier here - the rest remains the same!" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": 16, 836 | "metadata": {}, 837 | "outputs": [ 838 | { 839 | "data": { 840 | "text/plain": [ 841 | "{'algorithm': 'brute',\n", 842 | " 'distance': FlatDist(transformer=ScipyDist()),\n", 843 | " 'distance_mtype': None,\n", 844 | " 'distance_params': None,\n", 845 | " 'leaf_size': 30,\n", 846 | " 'n_jobs': None,\n", 847 | " 'n_neighbors': 3,\n", 848 | " 'pass_train_distances': False,\n", 849 | " 'weights': 'uniform',\n", 850 | " 'distance__transformer': ScipyDist(),\n", 851 | " 'distance__transformer__colalign': 'intersect',\n", 852 | " 'distance__transformer__metric': 'euclidean',\n", 853 | " 'distance__transformer__metric_kwargs': None,\n", 854 | " 'distance__transformer__p': 2,\n", 855 | " 'distance__transformer__var_weights': None}" 856 | ] 857 | }, 858 | "execution_count": 16, 859 | "metadata": {}, 860 | "output_type": "execute_result" 861 | } 862 | ], 863 | "source": [ 864 | "# all classifiers is scikit-learn / scikit-base compatible!\n", 865 | "# nested parameter interface via get_params, set_params\n", 866 | "clf.get_params()" 867 | ] 868 | }, 869 | { 870 | "cell_type": "code", 871 | "execution_count": 17, 872 | "metadata": {}, 873 | "outputs": [ 874 | { 875 | "data": { 876 | "text/html": [ 877 | "
KNeighborsTimeSeriesClassifier(distance=FlatDist(transformer=ScipyDist()),\n",
 878 |        "                               n_neighbors=3)
Please rerun this cell to show the HTML repr or trust the notebook.
" 880 | ], 881 | "text/plain": [ 882 | "KNeighborsTimeSeriesClassifier(distance=FlatDist(transformer=ScipyDist()),\n", 883 | " n_neighbors=3)" 884 | ] 885 | }, 886 | "execution_count": 17, 887 | "metadata": {}, 888 | "output_type": "execute_result" 889 | } 890 | ], 891 | "source": [ 892 | "# step 4 - fit/train the classifier\n", 893 | "clf.fit(X_train, y_train)" 894 | ] 895 | }, 896 | { 897 | "cell_type": "code", 898 | "execution_count": 18, 899 | "metadata": {}, 900 | "outputs": [ 901 | { 902 | "data": { 903 | "text/plain": [ 904 | "True" 905 | ] 906 | }, 907 | "execution_count": 18, 908 | "metadata": {}, 909 | "output_type": "execute_result" 910 | } 911 | ], 912 | "source": [ 913 | "# the classifier is now fitted\n", 914 | "clf.is_fitted" 915 | ] 916 | }, 917 | { 918 | "cell_type": "code", 919 | "execution_count": 19, 920 | "metadata": {}, 921 | "outputs": [ 922 | { 923 | "data": { 924 | "text/plain": [ 925 | "{'classes': array(['1', '2'], dtype='\n", 1289 | "\n", 1302 | "\n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | "
namescitypetypedescription
0capability:contractableclassifierboolcontract time setting, does the estimator supp...
1capability:missing_values[classifier, early_classifier, param_est, regr...boolcan the classifier handle missing data (NA, np...
2capability:multithreading[classifier, early_classifier]boolcan the classifier set n_jobs to use multiple ...
3capability:multivariate[classifier, early_classifier, param_est, regr...boolcan the classifier classify time series with 2...
4capability:predict_probaclassifierbooldoes the classifier implement a non-default pr...
5capability:train_estimateclassifierboolcan the classifier estimate its performance on...
6capability:unequal_length[classifier, early_classifier, regressor, tran...boolcan the estimator handle unequal length time s...
7classifier_typeclassifier(list, [dictionary, distance, feature, hybrid,...which type the classifier falls under in the t...
\n", 1371 | "" 1372 | ], 1373 | "text/plain": [ 1374 | " name scitype type description\n", 1375 | "0 capability:contractable classifier bool contract time setting, does the estimator supp...\n", 1376 | "1 capability:missing_values [classifier, early_classifier, param_est, regr... bool can the classifier handle missing data (NA, np...\n", 1377 | "2 capability:multithreading [classifier, early_classifier] bool can the classifier set n_jobs to use multiple ...\n", 1378 | "3 capability:multivariate [classifier, early_classifier, param_est, regr... bool can the classifier classify time series with 2...\n", 1379 | "4 capability:predict_proba classifier bool does the classifier implement a non-default pr...\n", 1380 | "5 capability:train_estimate classifier bool can the classifier estimate its performance on...\n", 1381 | "6 capability:unequal_length [classifier, early_classifier, regressor, tran... bool can the estimator handle unequal length time s...\n", 1382 | "7 classifier_type classifier (list, [dictionary, distance, feature, hybrid,... which type the classifier falls under in the t..." 1383 | ] 1384 | }, 1385 | "execution_count": 28, 1386 | "metadata": {}, 1387 | "output_type": "execute_result" 1388 | } 1389 | ], 1390 | "source": [ 1391 | "from sktime.registry import all_tags\n", 1392 | "\n", 1393 | "all_tags(\"classifier\", as_dataframe=True)" 1394 | ] 1395 | }, 1396 | { 1397 | "attachments": {}, 1398 | "cell_type": "markdown", 1399 | "metadata": {}, 1400 | "source": [ 1401 | "valid estimator types are listed in the `all_tags` docstring, or `sktime.registry.BASE_CLASS_REGISTER`" 1402 | ] 1403 | }, 1404 | { 1405 | "cell_type": "code", 1406 | "execution_count": 29, 1407 | "metadata": {}, 1408 | "outputs": [ 1409 | { 1410 | "data": { 1411 | "text/plain": [ 1412 | "('object',\n", 1413 | " 'estimator',\n", 1414 | " 'aligner',\n", 1415 | " 'classifier',\n", 1416 | " 'clusterer',\n", 1417 | " 'early_classifier',\n", 1418 | " 'forecaster',\n", 1419 | " 'metric',\n", 1420 | " 'network',\n", 1421 | " 'param_est',\n", 1422 | " 'regressor',\n", 1423 | " 'series-annotator',\n", 1424 | " 'splitter',\n", 1425 | " 'transformer',\n", 1426 | " 'transformer-pairwise',\n", 1427 | " 'transformer-pairwise-panel',\n", 1428 | " 'distribution')" 1429 | ] 1430 | }, 1431 | "execution_count": 29, 1432 | "metadata": {}, 1433 | "output_type": "execute_result" 1434 | } 1435 | ], 1436 | "source": [ 1437 | "from sktime.registry import BASE_CLASS_REGISTER\n", 1438 | "\n", 1439 | "# get only fist table column, the list of types\n", 1440 | "list(zip(*BASE_CLASS_REGISTER))[0]" 1441 | ] 1442 | }, 1443 | { 1444 | "attachments": {}, 1445 | "cell_type": "markdown", 1446 | "metadata": {}, 1447 | "source": [ 1448 | "to find all estimators of a certain type, use `sktime.registry.all_estimators`" 1449 | ] 1450 | }, 1451 | { 1452 | "cell_type": "code", 1453 | "execution_count": 30, 1454 | "metadata": {}, 1455 | "outputs": [ 1456 | { 1457 | "data": { 1458 | "text/html": [ 1459 | "
\n", 1460 | "\n", 1473 | "\n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | "
nameobject
0Arsenal<class 'sktime.classification.kernel_based._ar...
1BOSSEnsemble<class 'sktime.classification.dictionary_based...
2BaggingClassifier<class 'sktime.classification.ensemble._baggin...
3CNNClassifier<class 'sktime.classification.deep_learning.cn...
4CanonicalIntervalForest<class 'sktime.classification.interval_based._...
5Catch22Classifier<class 'sktime.classification.feature_based._c...
6ClassifierPipeline<class 'sktime.classification.compose._pipelin...
7ColumnEnsembleClassifier<class 'sktime.classification.compose._column_...
8ComposableTimeSeriesForestClassifier<class 'sktime.classification.ensemble._ctsf.C...
9ContractableBOSS<class 'sktime.classification.dictionary_based...
10DrCIF<class 'sktime.classification.interval_based._...
11DummyClassifier<class 'sktime.classification.dummy._dummy.Dum...
12ElasticEnsemble<class 'sktime.classification.distance_based._...
13FCNClassifier<class 'sktime.classification.deep_learning.fc...
14FreshPRINCE<class 'sktime.classification.feature_based._f...
15HIVECOTEV1<class 'sktime.classification.hybrid._hivecote...
16HIVECOTEV2<class 'sktime.classification.hybrid._hivecote...
17InceptionTimeClassifier<class 'sktime.classification.deep_learning.in...
18IndividualBOSS<class 'sktime.classification.dictionary_based...
19IndividualTDE<class 'sktime.classification.dictionary_based...
20KNeighborsTimeSeriesClassifier<class 'sktime.classification.distance_based._...
21LSTMFCNClassifier<class 'sktime.classification.deep_learning.ls...
22MACNNClassifier<class 'sktime.classification.deep_learning.ma...
23MLPClassifier<class 'sktime.classification.deep_learning.ml...
24MUSE<class 'sktime.classification.dictionary_based...
25MatrixProfileClassifier<class 'sktime.classification.feature_based._m...
26MrSQM<class 'sktime.classification.shapelet_based._...
27ProbabilityThresholdEarlyClassifier<class 'sktime.classification.early_classifica...
28ProximityForest<class 'sktime.classification.distance_based._...
29ProximityStump<class 'sktime.classification.distance_based._...
30ProximityTree<class 'sktime.classification.distance_based._...
31RandomIntervalClassifier<class 'sktime.classification.feature_based._r...
32RandomIntervalSpectralEnsemble<class 'sktime.classification.interval_based._...
33ResNetClassifier<class 'sktime.classification.deep_learning.re...
34RocketClassifier<class 'sktime.classification.kernel_based._ro...
35ShapeDTW<class 'sktime.classification.distance_based._...
36ShapeletTransformClassifier<class 'sktime.classification.shapelet_based._...
37SignatureClassifier<class 'sktime.classification.feature_based._s...
38SimpleRNNClassifier<class 'sktime.classification.deep_learning.rn...
39SklearnClassifierPipeline<class 'sktime.classification.compose._pipelin...
40SummaryClassifier<class 'sktime.classification.feature_based._s...
41SupervisedTimeSeriesForest<class 'sktime.classification.interval_based._...
42TSFreshClassifier<class 'sktime.classification.feature_based._t...
43TapNetClassifier<class 'sktime.classification.deep_learning.ta...
44TemporalDictionaryEnsemble<class 'sktime.classification.dictionary_based...
45TimeSeriesForestClassifier<class 'sktime.classification.interval_based._...
46TimeSeriesSVC<class 'sktime.classification.kernel_based._sv...
47WEASEL<class 'sktime.classification.dictionary_based...
48WeightedEnsembleClassifier<class 'sktime.classification.ensemble._weight...
\n", 1729 | "
" 1730 | ], 1731 | "text/plain": [ 1732 | " name object\n", 1733 | "0 Arsenal \n", 1814 | "\n", 1827 | "\n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | "
nameobject
0BaggingClassifier<class 'sktime.classification.ensemble._baggin...
1DummyClassifier<class 'sktime.classification.dummy._dummy.Dum...
2KNeighborsTimeSeriesClassifier<class 'sktime.classification.distance_based._...
3SklearnClassifierPipeline<class 'sktime.classification.compose._pipelin...
4TimeSeriesSVC<class 'sktime.classification.kernel_based._sv...
5WeightedEnsembleClassifier<class 'sktime.classification.ensemble._weight...
\n", 1868 | "" 1869 | ], 1870 | "text/plain": [ 1871 | " name object\n", 1872 | "0 BaggingClassifier \n", 1957 | "\n", 1970 | "\n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | "
dim_0
timepoints
000.267711
1-0.290155
2-0.564339
3-0.870044
4-0.829027
.........
109519-0.425904
20-0.781304
21-0.038512
22-0.637956
23-0.932346
\n", 2033 | "

26304 rows × 1 columns

\n", 2034 | "" 2035 | ], 2036 | "text/plain": [ 2037 | " dim_0\n", 2038 | " timepoints \n", 2039 | "0 0 0.267711\n", 2040 | " 1 -0.290155\n", 2041 | " 2 -0.564339\n", 2042 | " 3 -0.870044\n", 2043 | " 4 -0.829027\n", 2044 | "... ...\n", 2045 | "1095 19 -0.425904\n", 2046 | " 20 -0.781304\n", 2047 | " 21 -0.038512\n", 2048 | " 22 -0.637956\n", 2049 | " 23 -0.932346\n", 2050 | "\n", 2051 | "[26304 rows x 1 columns]" 2052 | ] 2053 | }, 2054 | "execution_count": 32, 2055 | "metadata": {}, 2056 | "output_type": "execute_result" 2057 | } 2058 | ], 2059 | "source": [ 2060 | "from sktime.datasets import load_italy_power_demand\n", 2061 | "from sktime.transformations.series.detrend import Detrender\n", 2062 | "\n", 2063 | "# load some panel data\n", 2064 | "X, _ = load_italy_power_demand(return_type=\"pd-multiindex\")\n", 2065 | "\n", 2066 | "# specify a linear detrender\n", 2067 | "detrender = Detrender()\n", 2068 | "\n", 2069 | "# detrend X by removing linear trend from each instance\n", 2070 | "X_detrended = detrender.fit_transform(X)\n", 2071 | "X_detrended" 2072 | ] 2073 | }, 2074 | { 2075 | "attachments": {}, 2076 | "cell_type": "markdown", 2077 | "metadata": {}, 2078 | "source": [ 2079 | "for panel tasks such as TSC, TSR, clustering, there are two distinctions to be aware of:\n", 2080 | "\n", 2081 | "* series-to-series transformers transform individual series to series, panels to panels. E.g., instance-wise detrender above\n", 2082 | "* series-to-primitive transformers transform individual series to a set of tabular features. E>g., summary feature extractor\n", 2083 | "\n", 2084 | "either type of transform can be instance-wise:\n", 2085 | "\n", 2086 | "* instance-wise transforms use only the i-th series to transform the i-th series. E.g., instance-wise detrender\n", 2087 | "* non-instance-wise transforms train on all series to transform the i-th series. E.g., PCA, overall mean detrender" 2088 | ] 2089 | }, 2090 | { 2091 | "cell_type": "code", 2092 | "execution_count": 33, 2093 | "metadata": {}, 2094 | "outputs": [ 2095 | { 2096 | "data": { 2097 | "text/html": [ 2098 | "
\n", 2099 | "\n", 2112 | "\n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | "
meanstdminmax0.10.250.50.750.9
0-1.041667e-091.0-1.5930831.464375-1.372442-0.8050780.0302070.9364121.218518
1-1.958333e-091.0-1.6309171.201393-1.533955-0.9993880.3848710.7357201.084018
2-1.775000e-091.0-1.3971182.349344-1.003740-0.741487-0.1326870.2653741.515756
3-8.541667e-101.0-1.6464581.344487-1.476779-0.8987220.2660220.7764951.039641
4-3.416667e-091.0-1.6202401.303502-1.511644-0.9780610.4054950.6926481.061249
..............................
1091-1.041667e-091.0-1.8177991.630397-1.323058-0.6434140.0812080.5684531.390523
1092-4.166666e-101.0-1.5500771.513605-1.343747-0.7685260.0755500.8571011.276013
10934.166667e-091.0-1.7069921.052255-1.498879-1.1399430.4676690.7131950.993797
10941.583333e-091.0-1.6738572.420163-0.744173-0.479768-0.2665380.1599231.550184
10953.495833e-091.0-1.6803371.461716-1.488154-0.8109340.2415010.6456971.184117
\n", 2262 | "

1096 rows × 9 columns

\n", 2263 | "
" 2264 | ], 2265 | "text/plain": [ 2266 | " mean std min max 0.1 0.25 0.5 0.75 0.9\n", 2267 | "0 -1.041667e-09 1.0 -1.593083 1.464375 -1.372442 -0.805078 0.030207 0.936412 1.218518\n", 2268 | "1 -1.958333e-09 1.0 -1.630917 1.201393 -1.533955 -0.999388 0.384871 0.735720 1.084018\n", 2269 | "2 -1.775000e-09 1.0 -1.397118 2.349344 -1.003740 -0.741487 -0.132687 0.265374 1.515756\n", 2270 | "3 -8.541667e-10 1.0 -1.646458 1.344487 -1.476779 -0.898722 0.266022 0.776495 1.039641\n", 2271 | "4 -3.416667e-09 1.0 -1.620240 1.303502 -1.511644 -0.978061 0.405495 0.692648 1.061249\n", 2272 | "... ... ... ... ... ... ... ... ... ...\n", 2273 | "1091 -1.041667e-09 1.0 -1.817799 1.630397 -1.323058 -0.643414 0.081208 0.568453 1.390523\n", 2274 | "1092 -4.166666e-10 1.0 -1.550077 1.513605 -1.343747 -0.768526 0.075550 0.857101 1.276013\n", 2275 | "1093 4.166667e-09 1.0 -1.706992 1.052255 -1.498879 -1.139943 0.467669 0.713195 0.993797\n", 2276 | "1094 1.583333e-09 1.0 -1.673857 2.420163 -0.744173 -0.479768 -0.266538 0.159923 1.550184\n", 2277 | "1095 3.495833e-09 1.0 -1.680337 1.461716 -1.488154 -0.810934 0.241501 0.645697 1.184117\n", 2278 | "\n", 2279 | "[1096 rows x 9 columns]" 2280 | ] 2281 | }, 2282 | "execution_count": 33, 2283 | "metadata": {}, 2284 | "output_type": "execute_result" 2285 | } 2286 | ], 2287 | "source": [ 2288 | "# example of a series-to-primitive transformer\n", 2289 | "from sktime.transformations.series.summarize import SummaryTransformer\n", 2290 | "\n", 2291 | "# specify summary transformer\n", 2292 | "summary_trafo = SummaryTransformer()\n", 2293 | "\n", 2294 | "# extract summary features - one per instance in the panel\n", 2295 | "X_summaries = summary_trafo.fit_transform(X)\n", 2296 | "X_summaries" 2297 | ] 2298 | }, 2299 | { 2300 | "attachments": {}, 2301 | "cell_type": "markdown", 2302 | "metadata": {}, 2303 | "source": [ 2304 | "just like classifiers, we can search for transformers of either type via the right tag:\n", 2305 | "\n", 2306 | "* `\"scitype:transform-input\"` and `\"scitype:transform-output\"` define input and output, e.g., \"series-to-series\" (both are scitype strings)\n", 2307 | "* `\"scitype:instancewise\"` is boolean and tells us whether the transform is instance-wise" 2308 | ] 2309 | }, 2310 | { 2311 | "cell_type": "code", 2312 | "execution_count": 34, 2313 | "metadata": {}, 2314 | "outputs": [ 2315 | { 2316 | "data": { 2317 | "text/html": [ 2318 | "
\n", 2319 | "\n", 2332 | "\n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | " \n", 2375 | " \n", 2376 | " \n", 2377 | " \n", 2378 | " \n", 2379 | " \n", 2380 | " \n", 2381 | " \n", 2382 | " \n", 2383 | " \n", 2384 | " \n", 2385 | " \n", 2386 | " \n", 2387 | " \n", 2388 | " \n", 2389 | " \n", 2390 | " \n", 2391 | " \n", 2392 | " \n", 2393 | " \n", 2394 | " \n", 2395 | " \n", 2396 | " \n", 2397 | "
nameobject
0Catch22<class 'sktime.transformations.panel.catch22.C...
1Catch22Wrapper<class 'sktime.transformations.panel.catch22wr...
2FittedParamExtractor<class 'sktime.transformations.panel.summarize...
3RandomIntervalFeatureExtractor<class 'sktime.transformations.panel.summarize...
4RandomIntervals<class 'sktime.transformations.panel.random_in...
5RandomShapeletTransform<class 'sktime.transformations.panel.shapelet_...
6SignatureTransformer<class 'sktime.transformations.panel.signature...
7SummaryTransformer<class 'sktime.transformations.series.summariz...
8TSFreshFeatureExtractor<class 'sktime.transformations.panel.tsfresh.T...
9Tabularizer<class 'sktime.transformations.panel.reduce.Ta...
10TimeBinner<class 'sktime.transformations.panel.reduce.Ti...
\n", 2398 | "
" 2399 | ], 2400 | "text/plain": [ 2401 | " name object\n", 2402 | "0 Catch22 #sk-6e583018-f2e4-47c7-a55a-f306e319e22c {color: black;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c pre{padding: 0;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable {background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator:hover {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-item {z-index: 1;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:only-child::after {width: 0;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-text-repr-fallback {display: none;}
ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2477 |        "                   transformers=[ExponentTransformer()])
Please rerun this cell to show the HTML repr or trust the notebook.
" 2479 | ], 2480 | "text/plain": [ 2481 | "ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n", 2482 | " transformers=[ExponentTransformer()])" 2483 | ] 2484 | }, 2485 | "execution_count": 35, 2486 | "metadata": {}, 2487 | "output_type": "execute_result" 2488 | } 2489 | ], 2490 | "source": [ 2491 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n", 2492 | "from sktime.transformations.series.exponent import ExponentTransformer\n", 2493 | "\n", 2494 | "pipe = ExponentTransformer() * KNeighborsTimeSeriesClassifier()\n", 2495 | "\n", 2496 | "# this constructs a ClassifierPipeline, which is also a classifier\n", 2497 | "pipe" 2498 | ] 2499 | }, 2500 | { 2501 | "cell_type": "code", 2502 | "execution_count": 36, 2503 | "metadata": {}, 2504 | "outputs": [], 2505 | "source": [ 2506 | "# alternative to construct:\n", 2507 | "from sktime.pipeline import make_pipeline\n", 2508 | "\n", 2509 | "pipe = make_pipeline(ExponentTransformer(), KNeighborsTimeSeriesClassifier())" 2510 | ] 2511 | }, 2512 | { 2513 | "cell_type": "code", 2514 | "execution_count": 37, 2515 | "metadata": {}, 2516 | "outputs": [ 2517 | { 2518 | "data": { 2519 | "text/html": [ 2520 | "
ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2521 |        "                   transformers=[ExponentTransformer()])
Please rerun this cell to show the HTML repr or trust the notebook.
" 2523 | ], 2524 | "text/plain": [ 2525 | "ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n", 2526 | " transformers=[ExponentTransformer()])" 2527 | ] 2528 | }, 2529 | "execution_count": 37, 2530 | "metadata": {}, 2531 | "output_type": "execute_result" 2532 | } 2533 | ], 2534 | "source": [ 2535 | "from sktime.datasets import load_unit_test\n", 2536 | "\n", 2537 | "X_train, y_train = load_unit_test(split=\"TRAIN\")\n", 2538 | "X_test, _ = load_unit_test(split=\"TEST\")\n", 2539 | "\n", 2540 | "# this is a forecaster with the same interface as knn-classifier\n", 2541 | "# first applies exponent transform, then knn-classifier\n", 2542 | "pipe.fit(X_train, y_train)" 2543 | ] 2544 | }, 2545 | { 2546 | "attachments": {}, 2547 | "cell_type": "markdown", 2548 | "metadata": {}, 2549 | "source": [ 2550 | "`sktime` transformers pipeline with `sklearn` classifiers!\n", 2551 | "\n", 2552 | "This allows to build \"time series feature extraction then `sklearn` classify`\" pipelines:" 2553 | ] 2554 | }, 2555 | { 2556 | "cell_type": "code", 2557 | "execution_count": 38, 2558 | "metadata": {}, 2559 | "outputs": [ 2560 | { 2561 | "data": { 2562 | "text/html": [ 2563 | "
SklearnClassifierPipeline(classifier=RandomForestClassifier(),\n",
2564 |        "                          transformers=[SummaryTransformer()])
Please rerun this cell to show the HTML repr or trust the notebook.
" 2566 | ], 2567 | "text/plain": [ 2568 | "SklearnClassifierPipeline(classifier=RandomForestClassifier(),\n", 2569 | " transformers=[SummaryTransformer()])" 2570 | ] 2571 | }, 2572 | "execution_count": 38, 2573 | "metadata": {}, 2574 | "output_type": "execute_result" 2575 | } 2576 | ], 2577 | "source": [ 2578 | "from sklearn.ensemble import RandomForestClassifier\n", 2579 | "from sktime.transformations.series.summarize import SummaryTransformer\n", 2580 | "\n", 2581 | "# specify summary transformer\n", 2582 | "summary_rf = SummaryTransformer() * RandomForestClassifier()\n", 2583 | "\n", 2584 | "summary_rf.fit(X_train, y_train)" 2585 | ] 2586 | }, 2587 | { 2588 | "attachments": {}, 2589 | "cell_type": "markdown", 2590 | "metadata": {}, 2591 | "source": [ 2592 | "### 5.4.3 Using transformers to deal with unequal length or missing values" 2593 | ] 2594 | }, 2595 | { 2596 | "attachments": {}, 2597 | "cell_type": "markdown", 2598 | "metadata": {}, 2599 | "source": [ 2600 | "pro tip: useful transformers to pipeline are those that \"improve\" capabilities!\n", 2601 | "\n", 2602 | "Search for these transformer tags:\n", 2603 | "\n", 2604 | "* `\"capability:unequal_length:removes\"` - ensures all instances in the panel have equal length afterwards. Examples: padding, cutting, resampling.\n", 2605 | "* `\"capability:missing_values:removes\"` - removes all missing values from the data (e.g., series, panel) passed to it. Example: mean imputation" 2606 | ] 2607 | }, 2608 | { 2609 | "cell_type": "code", 2610 | "execution_count": 39, 2611 | "metadata": {}, 2612 | "outputs": [ 2613 | { 2614 | "data": { 2615 | "text/html": [ 2616 | "
\n", 2617 | "\n", 2630 | "\n", 2631 | " \n", 2632 | " \n", 2633 | " \n", 2634 | " \n", 2635 | " \n", 2636 | " \n", 2637 | " \n", 2638 | " \n", 2639 | " \n", 2640 | " \n", 2641 | " \n", 2642 | " \n", 2643 | " \n", 2644 | " \n", 2645 | " \n", 2646 | " \n", 2647 | " \n", 2648 | " \n", 2649 | " \n", 2650 | " \n", 2651 | " \n", 2652 | " \n", 2653 | " \n", 2654 | " \n", 2655 | " \n", 2656 | " \n", 2657 | " \n", 2658 | " \n", 2659 | " \n", 2660 | " \n", 2661 | " \n", 2662 | " \n", 2663 | " \n", 2664 | " \n", 2665 | " \n", 2666 | " \n", 2667 | " \n", 2668 | " \n", 2669 | " \n", 2670 | " \n", 2671 | " \n", 2672 | " \n", 2673 | " \n", 2674 | " \n", 2675 | "
nameobject
0ClearSky<class 'sktime.transformations.series.clear_sk...
1IntervalSegmenter<class 'sktime.transformations.panel.segment.I...
2PaddingTransformer<class 'sktime.transformations.panel.padder.Pa...
3RandomIntervalSegmenter<class 'sktime.transformations.panel.segment.R...
4SlopeTransformer<class 'sktime.transformations.panel.slope.Slo...
5TimeBinAggregate<class 'sktime.transformations.series.binning....
6TruncationTransformer<class 'sktime.transformations.panel.truncatio...
\n", 2676 | "
" 2677 | ], 2678 | "text/plain": [ 2679 | " name object\n", 2680 | "0 ClearSky \n", 2710 | "\n", 2723 | "\n", 2724 | " \n", 2725 | " \n", 2726 | " \n", 2727 | " \n", 2728 | " \n", 2729 | " \n", 2730 | " \n", 2731 | " \n", 2732 | " \n", 2733 | " \n", 2734 | " \n", 2735 | " \n", 2736 | " \n", 2737 | " \n", 2738 | " \n", 2739 | " \n", 2740 | " \n", 2741 | " \n", 2742 | " \n", 2743 | "
nameobject
0ClearSky<class 'sktime.transformations.series.clear_sk...
1Imputer<class 'sktime.transformations.series.impute.I...
\n", 2744 | "" 2745 | ], 2746 | "text/plain": [ 2747 | " name object\n", 2748 | "0 ClearSky /model --env-manager local --host 127.0.0.1\n", 300 | "```\n", 301 | "\n", 302 | "with `run_id` as obtained in the \"save\" step.\n", 303 | "\n", 304 | "Then, run the below model scoring script to request a prediction from the served model." 305 | ] 306 | }, 307 | { 308 | "attachments": {}, 309 | "cell_type": "markdown", 310 | "id": "d889b702", 311 | "metadata": {}, 312 | "source": [ 313 | "for serving the model to an **endpoint in the cloud** (e.g. Azure ML, AWS SageMaker, etc.):\n", 314 | "\n", 315 | "use [MLflow deployment tools](https://mlflow.org/docs/latest/models.html#built-in-deployment-tools)):" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "id": "58c11c04", 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "import pandas as pd\n", 326 | "import requests\n", 327 | "from sktime.datasets import load_longley\n", 328 | "from sktime.forecasting.model_selection import temporal_train_test_split\n", 329 | "\n", 330 | "y, X = load_longley()\n", 331 | "y_train, y_test, X_train, X_test = temporal_train_test_split(y, X)\n", 332 | "\n", 333 | "# Define local host and endpoint url\n", 334 | "host = \"127.0.0.1\"\n", 335 | "url = f\"http://{host}:5000/invocations\"\n", 336 | "\n", 337 | "# Model scoring via REST API requires transforming the configuration DataFrame\n", 338 | "# into JSON format. As numpy ndarray type is not JSON serializable we need to\n", 339 | "# convert the exogenous regressor into a list. The wrapper instance will convert\n", 340 | "# the list back to ndarray type as required by sktime predict methods. For more\n", 341 | "# details read the MLflow deployment API reference.\n", 342 | "# (https://mlflow.org/docs/latest/models.html#deploy-mlflow-models)\n", 343 | "X_test_list = X_test.to_numpy().tolist()\n", 344 | "predict_conf = pd.DataFrame(\n", 345 | " [\n", 346 | " {\n", 347 | " \"fh\": [1, 2, 3],\n", 348 | " \"predict_method\": \"predict_interval\",\n", 349 | " \"coverage\": [0.9, 0.95],\n", 350 | " \"X\": X_test_list,\n", 351 | " }\n", 352 | " ]\n", 353 | ")\n", 354 | "\n", 355 | "# Create dictionary with pandas DataFrame in the split orientation\n", 356 | "json_data = {\"dataframe_split\": predict_conf.to_dict(orient=\"split\")}\n", 357 | "\n", 358 | "# Score model\n", 359 | "response = requests.post(url, json=json_data)\n", 360 | "print(response.json())" 361 | ] 362 | }, 363 | { 364 | "attachments": {}, 365 | "cell_type": "markdown", 366 | "id": "2d1c59a2", 367 | "metadata": {}, 368 | "source": [ 369 | "---\n", 370 | "### Credits: notebook 6 - deploy to production with mlflow / mlflavors\n", 371 | "\n", 372 | "notebook creation: benjaminbluhm\n", 373 | "\n", 374 | "minor rearranging by fkiraly\n", 375 | "\n", 376 | "mlflavors, `sktime` mlflow interface: benjaminbluhm" 377 | ] 378 | } 379 | ], 380 | "metadata": { 381 | "kernelspec": { 382 | "display_name": "Python 3 (ipykernel)", 383 | "language": "python", 384 | "name": "python3" 385 | }, 386 | "language_info": { 387 | "codemirror_mode": { 388 | "name": "ipython", 389 | "version": 3 390 | }, 391 | "file_extension": ".py", 392 | "mimetype": "text/x-python", 393 | "name": "python", 394 | "nbconvert_exporter": "python", 395 | "pygments_lexer": "ipython3", 396 | "version": "3.10.11" 397 | } 398 | }, 399 | "nbformat": 4, 400 | "nbformat_minor": 5 401 | } 402 | -------------------------------------------------------------------------------- /notebooks/09_outro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "37997396", 7 | "metadata": {}, 8 | "source": [ 9 | "# Summary & get involved!" 10 | ] 11 | }, 12 | { 13 | "attachments": {}, 14 | "cell_type": "markdown", 15 | "id": "9936be02", 16 | "metadata": {}, 17 | "source": [ 18 | "- `sklearn` / `sktime` interface:\n", 19 | " - unified interface for objects/estimators\n", 20 | " - modular design, strategy pattern\n", 21 | " - composable, composites are interface homogenous\n", 22 | " - simple specification language and parameter interface\n", 23 | " - visually informative pretty printing" 24 | ] 25 | }, 26 | { 27 | "attachments": {}, 28 | "cell_type": "markdown", 29 | "id": "7820cc3c", 30 | "metadata": {}, 31 | "source": [ 32 | "* Forecasting module:\n", 33 | " - Univariate/multivariate forecasting (stats and ML)\n", 34 | " - Use of exogeneous data\n", 35 | " - Probabilistic forecasting\n", 36 | " - Hierarchical forecasting" 37 | ] 38 | }, 39 | { 40 | "attachments": {}, 41 | "cell_type": "markdown", 42 | "id": "cd081e8c", 43 | "metadata": {}, 44 | "source": [ 45 | "* Transformers & pipelines\n", 46 | "\n", 47 | " * sequential pipelines\n", 48 | " * feature engineering, postproc\n", 49 | " * dunders `*`, `+` etc\n", 50 | " * tuning: parameter est., backtest-gridsearch, autoML" 51 | ] 52 | }, 53 | { 54 | "attachments": {}, 55 | "cell_type": "markdown", 56 | "id": "4a55d803", 57 | "metadata": {}, 58 | "source": [ 59 | "* APIs, engineering & deployment\n", 60 | "\n", 61 | " * modular unified framework interface\n", 62 | " * multiple learning tasks, \"what is my task\" guide\n", 63 | " * dependency management at estimator level\n", 64 | " * 3rd party extensible via templates & test suite\n", 65 | " * deploy via `mlflow` / `mlflavors`" 66 | ] 67 | }, 68 | { 69 | "attachments": {}, 70 | "cell_type": "markdown", 71 | "id": "fbb8d3c5", 72 | "metadata": {}, 73 | "source": [ 74 | "Further reading:\n", 75 | "\n", 76 | "* main `sktime` [tutorials on binder](https://mybinder.org/v2/gh/sktime/sktime/main?filepath=examples)\n", 77 | "* recorded [video tutorials](https://www.youtube.com/playlist?list=PLKs3UgGjlWHqNzu0LEOeLKvnjvvest2d0)\n", 78 | "* find a bug or type? [tutorial feedback thread](https://github.com/sktime/sktime/issues/1447)" 79 | ] 80 | }, 81 | { 82 | "attachments": {}, 83 | "cell_type": "markdown", 84 | "id": "b66e01a8", 85 | "metadata": {}, 86 | "source": [ 87 | "---" 88 | ] 89 | }, 90 | { 91 | "attachments": {}, 92 | "cell_type": "markdown", 93 | "id": "b6da2ae3", 94 | "metadata": {}, 95 | "source": [ 96 | "## Join sktime!\n", 97 | "\n", 98 | "### Vision statement\n", 99 | "\n", 100 | "* an easy-to-use, easy-to-extend, comprehensive **python framework** for ML and AI with time series\n", 101 | "* **open source, permissive license, free to use**\n", 102 | "* **openly and transparently governed**\n", 103 | "* **friendly, responsive, kind and inclusive** community, with an active commitment to ensure fairness and equal opportunity\n", 104 | "* an academically and commercially **neutral space**, with an **ecosystem integration** ambition and neutral point of view\n", 105 | "* an **educational platform**, providing mentoring and upskilling opportunities for all career stages, especially early career\n", 106 | "\n", 107 | "https://opendatascience.com/sktime-python-toolbox-for-machine-learning-with-time-series/\n", 108 | "\n", 109 | "**EVERYONE CAN JOIN! EVERYONE CAN BECOME A COMMUNITY LEADER!**\n", 110 | "\n", 111 | "* join our community discord ([join link](https://discord.com/invite/54ACzaFsn7))!\n", 112 | " * **help-desk for Q&A** and getting started as a user!\n", 113 | " * **dev-chat** for help getting started with open source!\n", 114 | " * contributor [getting started guide](https://github.com/sktime/sktime/issues/1147)\n", 115 | " * [good first issues](https://github.com/sktime/sktime/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)\n", 116 | "* join `sktime`'s user representatives or governance working group\n", 117 | " * register as a user ([form](https://forms.gle/eVuzrCjKDRupxawL7)) - roadmap, bugfix prio, elections\n", 118 | " * [roadmap 2023-2024 planning](https://github.com/sktime/sktime/issues/4691)\n", 119 | " * join [council sessions](https://github.com/sktime/community-org/tree/main/community_council/previous_meetings) and give input\n", 120 | "\n", 121 | "Opportunities:\n", 122 | "\n", 123 | "* regular **job opportunities**, watch the jobs channel on discord\n", 124 | "* sktime **mentoring programme**: [link](github.com/sktime/mentoring)\n", 125 | " * applications on a rolling basis\n", 126 | " * summer programme is starting over next weeks!\n", 127 | "\n", 128 | "Events & meetups:\n", 129 | "\n", 130 | "* regular **community collaboration sessions**\n", 131 | " * meet-ups Fri 3pm UTC on [discord](https://discord.com/invite/54ACzaFsn7)\n", 132 | "* multiple **Sprints and Dev Days** per year\n", 133 | " * [EuroPython 2023, Prague](https://ep2023.europython.eu/), 22-23 July week-end:\n", 134 | " * new contributor **onboarding sprint**!\n", 135 | " * **user feedback session**!\n", 136 | " * **developer meet-up**!\n", 137 | "\n", 138 | "Support us if `sktime` has generated value for you!\n", 139 | "\n", 140 | "* star us on [GitHub](https://github.com/sktime/sktime)\n", 141 | "* follow us on [LinkedIn](https://www.linkedin.com/company/scikit-time/)\n", 142 | "* donate! Every cent helps the time series ecosystem ([GitHub sponsors](https://github.com/sponsors/sktime))" 143 | ] 144 | }, 145 | { 146 | "attachments": {}, 147 | "cell_type": "markdown", 148 | "id": "adfa5420", 149 | "metadata": {}, 150 | "source": [ 151 | "---" 152 | ] 153 | }, 154 | { 155 | "attachments": {}, 156 | "cell_type": "markdown", 157 | "id": "286d4c8d", 158 | "metadata": {}, 159 | "source": [ 160 | "\n", 161 | "## Thank you for your attention\n", 162 | "\n", 163 | "\"Sktime" 164 | ] 165 | }, 166 | { 167 | "attachments": {}, 168 | "cell_type": "markdown", 169 | "id": "2d1c59a2", 170 | "metadata": {}, 171 | "source": [ 172 | "---\n", 173 | "### Credits: sktime\n", 174 | "\n", 175 | "#### many thanks to [all `sktime` contributors!](https://www.sktime.net/en/latest/about/contributors.html)\n", 176 | "\n", 177 | "Citations & credits in academic research papers:\n", 178 | "\n", 179 | "`sktime` toolbox:\n", 180 | " [sktime: A unified interface for machine learning with time series](https://arxiv.org/abs/1909.07872)\n", 181 | "\n", 182 | "`sktime` design principles: [Designing machine learning toolboxes: Concepts, principles and patterns](https://arxiv.org/abs/2101.04938)" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3 (ipykernel)", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.10.11" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 5 207 | } 208 | -------------------------------------------------------------------------------- /notebooks/hierarchical_demo_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sktime.utils._testing.hierarchical import _make_hierarchical 3 | 4 | 5 | def load_product_hierarchy(): 6 | # Get daily historic sales and rename columns and indexes according to hierarchy above 7 | n_years = 5 8 | y = ( 9 | _make_hierarchical( 10 | hierarchy_levels=(2, 4), 11 | min_timepoints=365 * n_years, 12 | max_timepoints=365 * n_years, 13 | random_state=0, 14 | ) 15 | .drop( 16 | index=[ 17 | ("h0_0", "h1_2"), 18 | ("h0_0", "h1_3"), 19 | ("h0_1", "h1_0"), 20 | ("h0_1", "h1_1"), 21 | ] 22 | ) 23 | .rename( 24 | index={ 25 | "h0_0": "Food preparation", 26 | "h0_1": "Food preservation", 27 | "h1_0": "Hobs", 28 | "h1_1": "Ovens", 29 | "h1_2": "Fridges", 30 | "h1_3": "Freezers", 31 | } 32 | ) 33 | .reset_index() 34 | .rename( 35 | columns={ 36 | "h0": "Product line", 37 | "h1": "Product group", 38 | "time": "Date", 39 | "c0": "Sales", 40 | } 41 | ) 42 | ) 43 | 44 | # Set date as monthly as sales as int and aggregate date 45 | y["Date"] = y["Date"].dt.to_period("M") 46 | y = y.groupby(by=["Product line", "Product group", "Date"]).sum() 47 | 48 | # Add noise to have different time series 49 | noise = np.random.RandomState(seed=0).normal(1, 0.3, np.shape(y)) 50 | y = (y * noise).round(0) 51 | 52 | return y 53 | -------------------------------------------------------------------------------- /notebooks/img/ask_chatgpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ask_chatgpt.png -------------------------------------------------------------------------------- /notebooks/img/estimator-conceptual-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/estimator-conceptual-model.jpg -------------------------------------------------------------------------------- /notebooks/img/implementing_estimators.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/implementing_estimators.jpg -------------------------------------------------------------------------------- /notebooks/img/implementing_estimators.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/implementing_estimators.png -------------------------------------------------------------------------------- /notebooks/img/sklearn-unified-interface.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/sklearn-unified-interface.jpg -------------------------------------------------------------------------------- /notebooks/img/sktime-logo-text-horizontal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/sktime-logo-text-horizontal.jpg -------------------------------------------------------------------------------- /notebooks/img/tasks-forecasting.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-forecasting.jpg -------------------------------------------------------------------------------- /notebooks/img/tasks-forecasting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-forecasting.png -------------------------------------------------------------------------------- /notebooks/img/tasks-tsc-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-tsc-large.png -------------------------------------------------------------------------------- /notebooks/img/tasks-tsc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-tsc.png -------------------------------------------------------------------------------- /notebooks/img/ts-tasks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ts-tasks.jpg -------------------------------------------------------------------------------- /notebooks/img/ts-tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ts-tasks.png -------------------------------------------------------------------------------- /notebooks/img/unified_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/unified_framework.png -------------------------------------------------------------------------------- /notebooks/img/verdena_shapelet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/verdena_shapelet.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | sktime[all_extras]==0.20.1 2 | mlflavors==0.1.0 3 | --------------------------------------------------------------------------------