├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── images
    ├── hierarchy.png
    ├── sktime-logo-text-horizontal.png
    ├── tabularization.png
    ├── tracking_artifact_ui.png
    └── ts_quiz.png
├── notebooks
    ├── 01_introduction.ipynb
    ├── 02_timeseries.ipynb
    ├── 03_forecasting.ipynb
    ├── 04_feateng_pipe_tune.ipynb
    ├── 05_panel_tasks.ipynb
    ├── 06_distances_kernels_alignment.ipynb
    ├── 07_MLengineering.ipynb
    ├── 08_mlflow.ipynb
    ├── 09_outro.ipynb
    ├── hierarchical_demo_utils.py
    └── img
    │   ├── ask_chatgpt.png
    │   ├── estimator-conceptual-model.jpg
    │   ├── implementing_estimators.jpg
    │   ├── implementing_estimators.png
    │   ├── sklearn-unified-interface.jpg
    │   ├── sktime-logo-text-horizontal.jpg
    │   ├── tasks-forecasting.jpg
    │   ├── tasks-forecasting.png
    │   ├── tasks-tsc-large.png
    │   ├── tasks-tsc.png
    │   ├── ts-tasks.jpg
    │   ├── ts-tasks.png
    │   ├── unified_framework.png
    │   └── verdena_shapelet.png
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # IDE
132 | .vscode/
133 | 
134 | # MacOS
135 | .DS_Store
136 | *.csv
137 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | 
 3 | - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |   rev: v4.0.1
 5 |   hooks:
 6 |   - id: check-added-large-files
 7 |     args: ['--maxkb=1000']
 8 |   - id: check-case-conflict
 9 |   - id: check-merge-conflict
10 |   - id: check-symlinks
11 |   - id: check-yaml
12 |   - id: debug-statements
13 |   - id: end-of-file-fixer
14 |   - id: fix-encoding-pragma
15 |   - id: requirements-txt-fixer
16 |   - id: trailing-whitespace
17 | 
18 | - repo: https://github.com/pycqa/isort
19 |   rev: 5.8.0
20 |   hooks:
21 |     - id: isort
22 |       name: isort (python)
23 | 
24 | - repo: https://github.com/psf/black
25 |   rev: 21.5b1
26 |   hooks:
27 |   - id: black
28 |     language_version: python3
29 |     # args: [--line-length 79]
30 | 
31 | - repo: https://github.com/pycqa/flake8
32 |   rev: 3.9.2
33 |   hooks:
34 |   - id: flake8
35 |     exclude: docs/conf.py
36 |     additional_dependencies: [flake8-bugbear, flake8-print]
37 |     args: ["--max-line-length=88"]
38 | 
39 | - repo: https://github.com/nbQA-dev/nbQA
40 |   rev: 0.13.0
41 |   hooks:
42 |   - id: nbqa-black
43 |     args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells]
44 |     additional_dependencies: [black==20.8b1]
45 |   # - id: nbqa-isort
46 |   #   args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells]
47 |   #   additional_dependencies: [isort==5.6.4]
48 |   - id: nbqa-flake8
49 |     args: [--nbqa-dont-skip-bad-cells, "--extend-ignore=E402,E203", "--max-line-length=88"]
50 |     additional_dependencies: [flake8==3.8.3]
51 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023 The sktime developers.
 4 | 
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | * Redistributions of source code must retain the above copyright notice, this
11 |   list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 |   this list of conditions and the following disclaimer in the documentation
15 |   and/or other materials provided with the distribution.
16 | 
17 | * Neither the name of the copyright holder nor the names of its
18 |   contributors may be used to endorse or promote products derived from
19 |   this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![](images/team.jpg)
  2 | 
  3 | Welcome to the sktime tutorial at Europython 2023
  4 | =================================================
  5 | 
  6 | This tutorial is about [sktime] - a unified framework for machine learning with time series. sktime contains algorithms and tools for building, applying, evaluating modular pipelines and composites for a variety of time series learning tasks, including forecasting, classification, regression.
  7 | 
  8 | `sktime` is easily extensible by anyone, and interoperable with the python data science stack.
  9 | 
 10 | This is an introductory `sktime` half-day tutorial with:
 11 | 
 12 | * a general introduction to `sktime`
 13 | * forecasting with `sktime` - uni/multivariate, hierarchical/global, probabilistic
 14 | * feature extraction, transformation pipelines, parameter tuning, autoML
 15 | * time series classification, regression, and clustering with `sktime`
 16 | * customizing time series distances, kernels, time series aligners and alignment distances
 17 | * engineering topics:
 18 |     * APIs, estimator and dependency management
 19 |     * writing `sktime` compatible 3rd party estimators
 20 | * deploying `sktime` in production using `mlflow` with the `mlflavours` plugin
 21 | 
 22 | [sktime]: https://sktime.net
 23 | 
 24 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/sktime/sktime-tutorial-europython-2023/main?filepath=notebooks) [![!discord](https://img.shields.io/static/v1?logo=discord&label=discord&message=chat&color=lightgreen)](https://discord.com/invite/54ACzaFsn7) [![!slack](https://img.shields.io/static/v1?logo=linkedin&label=LinkedIn&message=news&color=lightblue)](https://www.linkedin.com/company/scikit-time/)
 25 | 
 26 | ## :rocket: How to get started
 27 | 
 28 | In the tutorial, we will move through notebooks section by section.
 29 | 
 30 | You have different options how to run the tutorial notebooks:
 31 | 
 32 | * Run the notebooks in the cloud on [Binder] - for this you don't have to install anything!
 33 | * Run the notebooks on your machine. [Clone] this repository, get [conda], install the required packages (`sktime`, `seaborn`, `jupyter`) in an environment, and open the notebooks with that environment. For detail instructions, see below. For troubleshooting, see sktime's more detailed [installation instructions].
 34 | * or, use python venv, and/or an editable install of this repo as a package. Instructions below.
 35 | 
 36 | [Binder]: https://mybinder.org/v2/gh/sktime/sktime-tutorial-europython-2023/main?filepath=notebooks
 37 | [clone]: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository
 38 | [conda]: https://docs.conda.io/en/latest/
 39 | [installation instructions]: https://www.sktime.net/en/latest/installation.html
 40 | 
 41 | Please let us know on the [sktime discord](https://discord.com/invite/54ACzaFsn7) if you have any issues during the conference, or join to ask for help anytime.
 42 | 
 43 | ## :bulb: Description
 44 | 
 45 | This tutorial presents [sktime] - a unified framework for machine learning with time series. sktime covers multiple time series learning problems, including time series transformation, classification and forecasting, among others.`sktime` allows you to easily apply an algorithm for one task to solve another (e.g. a scikit-learn regressor to solve a forecasting problem). In the tutorial, you will learn about how you can identify these problems, what their key differences are and how they are related.
 46 | 
 47 | `sktime` provides various time series algorithms and modular composition tools for pipelining, ensembling and tuning.
 48 | `sktime` also provides API compatible interfaces to many popular libraries, such as `statsmodels`, `prophet`, `statsforecast`, `tslearn`, `tsfresh`, etc,
 49 | which can be readily combined using `sktime` composition patterns.
 50 | 
 51 | In this tutorial, you will learn how to use, combine, tune and evaluate different algorithms on real-world data sets.
 52 | The tutorial consists of step-by-step using Jupyter Notebooks.
 53 | 
 54 | `sktime` not just a package, but also an active community which aims to be welcoming to new joiners.
 55 | We invite anyone to get involved as a developer, user, supporter (or any combination of these).
 56 | 
 57 | ## :movie_camera: Other Tutorials:
 58 | 
 59 | - [Pydata Berlin 2022 - Advanced Forecasting Tutorial](https://www.youtube.com/watch?v=4Rf9euAhjNc)
 60 | 
 61 | - [Pydata London 2022 - How to implement your own estimator in sktime](https://www.youtube.com/watch?v=S_3ewcvs_pg)
 62 | 
 63 | - [Pydata Global 2022 - Feature extraction, Pipelines, Tuning](https://github.com/sktime/sktime-tutorial-pydata-global-2022)
 64 | 
 65 | - [Pydata London 2023 - Time Series Classification, Regression, Distances & Kernels](https://github.com/sktime/sktime-tutorial-pydata-london-2023)
 66 | 
 67 | ## :wave: How to contribute
 68 | 
 69 | If you're interested in contributing to sktime, you can find out more how to get involved [here](https://www.sktime.net/en/latest/get_involved.html).
 70 | 
 71 | Any contributions are welcome, not just code!
 72 | 
 73 | We also invite everyone to the "getting started with contributions" onboarding feature at the community sprint at EuroPython (July 22-23)!
 74 | 
 75 | ## Installation instructions for local use
 76 | 
 77 | To run the notebooks locally, you will need:
 78 | 
 79 | * a local repository clone
 80 | * a python environment with required packages installed
 81 | 
 82 | ### Cloning the repository
 83 | 
 84 | To clone the repository locally:
 85 | 
 86 | `git clone https://github.com/sktime/sktime-tutorial-europython-2023.git`
 87 | 
 88 | ### Using conda env
 89 | 
 90 | 1. Create a python virtual environment:
 91 | `conda create -y -n europython_sktime python=3.9`
 92 | 2. Install required packages:
 93 | `conda install -y -n europython_sktime pip sktime seaborn jupyter pmdarima statsmodels dtw-python`
 94 | 3. Activate your environment:
 95 | `conda activate europython_sktime`
 96 | 4. If using jupyter: make the environment available in jupyter:
 97 | `python -m ipykernel install --user --name=europython_sktime`
 98 | 
 99 | ### Using python venv
100 | 
101 | 1. Create a python virtual environment:
102 | `python -m venv europython_sktime`
103 | 2. Activate your environment:
104 | `source europython_sktime/bin/activate`
105 | 3. Install the requirements:
106 | `pip install sktime seaborn jupyter pmdarima statsmodels dtw-python`
107 | 4. If using jupyter: make the environment available in jupyter:
108 | `python -m ipykernel install --user --name=europython_sktime`
109 | 


--------------------------------------------------------------------------------
/images/hierarchy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/hierarchy.png


--------------------------------------------------------------------------------
/images/sktime-logo-text-horizontal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/sktime-logo-text-horizontal.png


--------------------------------------------------------------------------------
/images/tabularization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/tabularization.png


--------------------------------------------------------------------------------
/images/tracking_artifact_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/tracking_artifact_ui.png


--------------------------------------------------------------------------------
/images/ts_quiz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/ts_quiz.png


--------------------------------------------------------------------------------
/notebooks/01_introduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "![](./img/sktime-logo-text-horizontal.jpg)"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "attachments": {},
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "### Agenda for today\n",
 17 |     "\n",
 18 |     "1. General introduction to `sktime` & `sklearn`\n",
 19 |     "\n",
 20 |     "2. forecasting with `sktime`\n",
 21 |     "\n",
 22 |     "3. feature extraction, tuning, autoML\n",
 23 |     "\n",
 24 |     "4. time series classification, regression, and clustering with `sktime`\n",
 25 |     "\n",
 26 |     "5. customizing time series distances, kernels, time series aligners and alignment distances\n",
 27 |     "\n",
 28 |     "6. overview from ML eng & API perspective - estimators modules, learning tasks, library\n",
 29 |     "\n",
 30 |     "7. deployment with `mlflow` / `mlflavors`"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "attachments": {},
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "### Running the notebooks"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "attachments": {},
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "all notebooks available on `github.com/sktime`\n",
 47 |     "\n",
 48 |     "repository: `github.com/sktime/sktime-tutorial-europython-2023`\n",
 49 |     "\n",
 50 |     "* README instructions to run notebooks locally\n",
 51 |     "* binder to run notebooks in the cloud (if wifi allows)\n",
 52 |     "\n",
 53 |     "help, Q&A, developer chat in EuroPython [`sktime` tutorial discord thread](https://discord.com/channels/1120766458528542794/1130170803137282118)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "attachments": {},
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## 1 - Introduction to ``sktime``\n",
 62 |     "\n",
 63 |     "### 1.1 What is ``sktime``?\n",
 64 |     "\n",
 65 |     "- `sktime` is a python library for time series learning tasks!\n",
 66 |     "  - check [our website](https://www.sktime.net/en/latest/index.html)!\n",
 67 |     "  - integrative framework layer in the time series space"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "attachments": {},
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "- `sklearn` / `sktime` interface:\n",
 76 |     "  - unified interface for objects/estimators\n",
 77 |     "  - modular design, strategy pattern\n",
 78 |     "  - composable, composites are interface homogenous\n",
 79 |     "  - simple specification language and parameter interface"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "attachments": {},
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "- `sktime` is a vibrant, welcoming community with mentoring opportunities!\n",
 88 |     "  - We *love* new contributors. Especially if you are new to open source!\n",
 89 |     "      - join the ``sktime`` sprint this week-end at EuroPython! Gentle intro to contributing!\n",
 90 |     "  - Check out the ``sktime`` [new contributors guide](https://www.sktime.net/en/latest/get_involved/contributing.html)\n",
 91 |     "  - join our [discord](https://discord.com/invite/54ACzaFsn7) and/or one of our regular meetups!\n",
 92 |     "  - follow us on [LinkedIn](https://www.linkedin.com/company/scikit-time/)!\n",
 93 |     "  - star us on [GitHub](https://github.com/sktime/sktime)!"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "attachments": {},
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "### 1.2 sklearn unified interface - the strategy pattern\n",
102 |     "\n",
103 |     "`sklearn` provides a unified interface to multiple learning tasks including classification, regression.\n",
104 |     "\n",
105 |     "any (supervised) estimator has the following interface points\n",
106 |     "\n",
107 |     "1. **Instantiate** your model of choice, with parameter settings\n",
108 |     "2. **Fit** the instance of your model\n",
109 |     "3. Use that fitted instance to **predict** new data!\n",
110 |     "\n",
111 |     "![](./img/estimator-conceptual-model.jpg)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 1,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "# get data to use the model on\n",
121 |     "from sklearn.datasets import load_iris\n",
122 |     "from sklearn.model_selection import train_test_split\n",
123 |     "\n",
124 |     "X, y = load_iris(return_X_y=True, as_frame=True)\n",
125 |     "X_train, X_test, y_train, y_test = train_test_split(X, y)\n",
126 |     "\n",
127 |     "# data are pd.DataFrame/pd.Series\n",
128 |     "# X_train.shape = (112, 4)  - 112 train flowers times 4 features\n",
129 |     "#                             sepal length, sepal width, petal length, petal width\n",
130 |     "# y_train.shape = (112,)    - 112 categorical labels  - one of 3 flower types 0, 1, 2\n",
131 |     "# X_test.shape = (38, 4)  - 38 test flowers times 4 features\n",
132 |     "# y_test.shape = (38,)    - 38 cagetorical labels - one of 3 flower types\n"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 2,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "text/plain": [
143 |        "array([2, 1, 2, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 0, 1, 1, 0, 2, 0, 0, 2, 2,\n",
144 |        "       0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2])"
145 |       ]
146 |      },
147 |      "execution_count": 2,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "from sklearn.svm import SVC\n",
154 |     "\n",
155 |     "# 1. Instantiate SVC with parameters gamma, C\n",
156 |     "clf = SVC(gamma=0.001, C=100.)\n",
157 |     "# clf is an instance of SVC now\n",
158 |     "\n",
159 |     "# 2. Fit clf to training data = 112 feature/label pairs\n",
160 |     "clf.fit(X_train, y_train)\n",
161 |     "# clf changes state to \"fitted\", computes model\n",
162 |     "\n",
163 |     "# 3. Predict labels on test data = 38 feature vectors\n",
164 |     "y_test_pred = clf.predict(X_test)\n",
165 |     "# produces predictions for the test data, 38 labels\n",
166 |     "\n",
167 |     "y_test_pred\n",
168 |     "# y_test_pred.shape = (38,)"
169 |    ]
170 |   },
171 |   {
172 |    "attachments": {},
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "IMPORTANT: to use another classifier, only the specification line, part 1 changes!\n",
177 |     "\n",
178 |     "`SVC` could have been `RandomForest`, steps 2 and 3 remain the same - unified interface:"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 3,
184 |    "metadata": {},
185 |    "outputs": [
186 |     {
187 |      "data": {
188 |       "text/plain": [
189 |        "array([2, 1, 2, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 0, 1, 1, 0, 2, 0, 0, 2, 2,\n",
190 |        "       0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2])"
191 |       ]
192 |      },
193 |      "execution_count": 3,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "from sklearn.ensemble import RandomForestClassifier\n",
200 |     "\n",
201 |     "# 1. Instantiate SVC with parameters gamma, C\n",
202 |     "clf = RandomForestClassifier(n_estimators=100)\n",
203 |     "\n",
204 |     "# 2. Fit clf to training data\n",
205 |     "clf.fit(X_train, y_train)\n",
206 |     "\n",
207 |     "# 3. Predict labels on test data\n",
208 |     "y_test_pred = clf.predict(X_test)\n",
209 |     "\n",
210 |     "y_test_pred"
211 |    ]
212 |   },
213 |   {
214 |    "attachments": {},
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "in object oriented design terminology, this is called **\"strategy pattern\"**\n",
219 |     "\n",
220 |     "= different estimators can be switched out without change to the interface\n",
221 |     "\n",
222 |     "= like a power plug adapter, it's plug&play if it conforms with the interface\n",
223 |     "\n",
224 |     "Pictorial summary:\n",
225 |     "![](./img/sklearn-unified-interface.jpg)"
226 |    ]
227 |   },
228 |   {
229 |    "attachments": {},
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "`sklearn` estimators are parametric:\n",
234 |     "\n",
235 |     "all parameters in the \"blueprint\" accessed and set via `get_params`, `set_params`:"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 4,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "{'bootstrap': True,\n",
247 |        " 'ccp_alpha': 0.0,\n",
248 |        " 'class_weight': None,\n",
249 |        " 'criterion': 'gini',\n",
250 |        " 'max_depth': None,\n",
251 |        " 'max_features': 'sqrt',\n",
252 |        " 'max_leaf_nodes': None,\n",
253 |        " 'max_samples': None,\n",
254 |        " 'min_impurity_decrease': 0.0,\n",
255 |        " 'min_samples_leaf': 1,\n",
256 |        " 'min_samples_split': 2,\n",
257 |        " 'min_weight_fraction_leaf': 0.0,\n",
258 |        " 'n_estimators': 100,\n",
259 |        " 'n_jobs': None,\n",
260 |        " 'oob_score': False,\n",
261 |        " 'random_state': None,\n",
262 |        " 'verbose': 0,\n",
263 |        " 'warm_start': False}"
264 |       ]
265 |      },
266 |      "execution_count": 4,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "clf.get_params()"
273 |    ]
274 |   },
275 |   {
276 |    "attachments": {},
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "### 1.3 `sktime` is devoted to time-series data analysis\n",
281 |     "\n",
282 |     "Richer space of time series tasks, compared to \"tabular\":\n",
283 |     "\n",
284 |     "- **Forecasting** - predict energy consumption tomorrow, based on past weeks\n",
285 |     "- **Classification** - classify electrocardiograms to healthy/sick, based on prior examples\n",
286 |     "- **Regression** - predict compound purity in bioreactor based on temperature/pressure profile\n",
287 |     "- **Clustering** - sort outlines of tree leaves into a small number of similar classes\n",
288 |     "- **Annotation** - identify jumps, anomalies, events in a data stream"
289 |    ]
290 |   },
291 |   {
292 |    "attachments": {},
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "`sktime` aims to provide `sklearn`-like, modular, composable, interfaces for these!"
297 |    ]
298 |   },
299 |   {
300 |    "attachments": {},
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "| Task | Status | Links |\n",
305 |     "|---|---|---|\n",
306 |     "| **Forecasting** | stable | [Tutorial](https://www.sktime.net/en/latest/examples/01_forecasting.html) · [API Reference](https://www.sktime.net/en/latest/api_reference/forecasting.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/forecasting.py)  |\n",
307 |     "| **Time Series Classification** | stable | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/02_classification.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/classification.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/classification.py) |\n",
308 |     "| **Time Series Regression** | stable | [API Reference](https://www.sktime.net/en/latest/api_reference/regression.html) |\n",
309 |     "| **Transformations** | stable | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/03_transformers.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/transformations.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/transformer.py)  |\n",
310 |     "| **Parameter fitting** | maturing | [API Reference](https://www.sktime.net/en/latest/api_reference/param_est.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/transformer.py)  |\n",
311 |     "| **Time Series Clustering** | maturing | [API Reference](https://www.sktime.net/en/latest/api_reference/clustering.html) ·  [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/clustering.py) |\n",
312 |     "| **Time Series Distances/Kernels** | maturing | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/03_transformers.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/dists_kernels.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/dist_kern_panel.py) |\n",
313 |     "| **Annotation** | experimental | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/annotation.py) |\n",
314 |     "| **Distributions and simulation** | experimental |  |"
315 |    ]
316 |   },
317 |   {
318 |    "attachments": {},
319 |    "cell_type": "markdown",
320 |    "metadata": {},
321 |    "source": [
322 |     "Example - forecasting"
323 |    ]
324 |   },
325 |   {
326 |    "attachments": {},
327 |    "cell_type": "markdown",
328 |    "metadata": {},
329 |    "source": [
330 |     "![](./img/tasks-forecasting.png)"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 5,
336 |    "metadata": {},
337 |    "outputs": [],
338 |    "source": [
339 |     "from sktime.datasets import load_airline\n",
340 |     "from sktime.forecasting.naive import NaiveForecaster\n",
341 |     "import numpy as np\n",
342 |     "\n",
343 |     "# step 1: data specification\n",
344 |     "y = load_airline()\n",
345 |     "# y = pandas.Series, y.shape = (150,)\n",
346 |     "# 150 months of monthly observations\n",
347 |     "\n",
348 |     "# step 2: specifying forecasting horizon\n",
349 |     "fh = np.arange(1, 37)\n",
350 |     "# we want to forecast 36 months = 3 years\n",
351 |     "\n",
352 |     "# step 3: specifying the forecasting algorithm\n",
353 |     "forecaster = NaiveForecaster(strategy=\"last\", sp=12)\n",
354 |     "\n",
355 |     "# step 4: fitting the forecaster\n",
356 |     "forecaster.fit(y)\n",
357 |     "\n",
358 |     "# step 5: querying predictions\n",
359 |     "y_pred = forecaster.predict(fh)\n",
360 |     "# y_pred is a \"continuation\" of y\n",
361 |     "# y_pred.shape = (36,) = forecasts for 36 months"
362 |    ]
363 |   },
364 |   {
365 |    "attachments": {},
366 |    "cell_type": "markdown",
367 |    "metadata": {},
368 |    "source": [
369 |     "Example - classification"
370 |    ]
371 |   },
372 |   {
373 |    "attachments": {},
374 |    "cell_type": "markdown",
375 |    "metadata": {},
376 |    "source": [
377 |     "![](./img/tasks-tsc.png)"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 6,
383 |    "metadata": {},
384 |    "outputs": [],
385 |    "source": [
386 |     "from sktime.datasets import load_osuleaf\n",
387 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
388 |     "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n",
389 |     "from sktime.dists_kernels import ScipyDist\n",
390 |     "\n",
391 |     "# step 1 - specify training data\n",
392 |     "X_train, y_train = load_osuleaf(split=\"train\", return_type=\"numpy3D\")\n",
393 |     "\n",
394 |     "# step 2 - specify data to predict labels for\n",
395 |     "X_new, _ = load_osuleaf(split=\"test\", return_type=\"numpy3D\")\n",
396 |     "X_new = X_new[:2]\n",
397 |     "\n",
398 |     "# step 3 - specify the classifier\n",
399 |     "mean_eucl_dist = FlatDist(ScipyDist())\n",
400 |     "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=mean_eucl_dist)\n",
401 |     "\n",
402 |     "# step 4 - fitting the classifier\n",
403 |     "clf.fit(X_train, y_train)\n",
404 |     "\n",
405 |     "# step 5 - predict labels on new data\n",
406 |     "y_pred = clf.predict(X_new)"
407 |    ]
408 |   },
409 |   {
410 |    "attachments": {},
411 |    "cell_type": "markdown",
412 |    "metadata": {},
413 |    "source": [
414 |     "### 1.4 `sktime` integrates the time series modelling ecosystem!\n",
415 |     "\n",
416 |     "the package space for time series is highly fragmented:\n",
417 |     "\n",
418 |     "* lots of great implementations and methods out there!\n",
419 |     "* but many different interfaces, not composable like `sklearn`\n",
420 |     "\n",
421 |     "`sktime` integrates the ecosystem - in friendly collaboration with all the packages out there!\n",
422 |     "\n",
423 |     "* unified interface standard\n",
424 |     "* highly composable\n",
425 |     "* mini-package manager on estiator/module level\n",
426 |     "* easily extensible - 3rd party plugins, other packages\n",
427 |     "\n",
428 |     "![](./img/unified_framework.png)"
429 |    ]
430 |   },
431 |   {
432 |    "attachments": {},
433 |    "cell_type": "markdown",
434 |    "metadata": {},
435 |    "source": [
436 |     "### 1.5 Summary/What is next!\n",
437 |     "\n",
438 |     "- `sklearn` interface: unified interface (strategy pattern), modular, composition stable, easy specification language\n",
439 |     "- `sktime` evolves the interface for time series learning tasks\n",
440 |     "- `sktime` integrates a fragmented ecosytem with interface, composability, dependency management\n",
441 |     "\n",
442 |     "- today:\n",
443 |     "    * deep dive forecasting\n",
444 |     "    * feature extraction, tuning, autoML\n",
445 |     "    * deep dive classification and panel tasks regression, clustering\n",
446 |     "    * time series distances, kernels, alignment\n",
447 |     "    * engineering/API perspective, deployment"
448 |    ]
449 |   },
450 |   {
451 |    "attachments": {},
452 |    "cell_type": "markdown",
453 |    "metadata": {},
454 |    "source": [
455 |     "---\n",
456 |     "### Credits: notebook 1 - `sktime` intro\n",
457 |     "\n",
458 |     "notebook creation: fkiraly, marrov\n",
459 |     "\n",
460 |     "some vignettes based on existing `sktime` tutorials, credit: fkiraly, miraep8\n",
461 |     "\n",
462 |     "slides (png/jpg):\n",
463 |     "\n",
464 |     "* from fkiraly's postgraduate course at UCL, Principles and Patterns in Data Scientific Software Engineering\n",
465 |     "* ecosystem slide: fkiraly, mloning\n",
466 |     "* learning tasks: fkiraly, mloning\n",
467 |     "\n",
468 |     "General credit also to `sklearn` and `sktime` contributors"
469 |    ]
470 |   }
471 |  ],
472 |  "metadata": {
473 |   "kernelspec": {
474 |    "display_name": "Python 3.8.13 ('pydata22')",
475 |    "language": "python",
476 |    "name": "python3"
477 |   },
478 |   "language_info": {
479 |    "codemirror_mode": {
480 |     "name": "ipython",
481 |     "version": 3
482 |    },
483 |    "file_extension": ".py",
484 |    "mimetype": "text/x-python",
485 |    "name": "python",
486 |    "nbconvert_exporter": "python",
487 |    "pygments_lexer": "ipython3",
488 |    "version": "3.11.3"
489 |   },
490 |   "orig_nbformat": 4,
491 |   "vscode": {
492 |    "interpreter": {
493 |     "hash": "e61b44dca3bf47c8973c8cd627825697e2dad493e19dd6592afda0a0a3c312a0"
494 |    }
495 |   }
496 |  },
497 |  "nbformat": 4,
498 |  "nbformat_minor": 2
499 | }
500 | 


--------------------------------------------------------------------------------
/notebooks/05_panel_tasks.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "attachments": {},
   5 |    "cell_type": "markdown",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "### Overview of this notebook\n",
   9 |     "\n",
  10 |     "* Introduction to time series classification, regression, clustering\n",
  11 |     "* `sktime` data format fo \"time series panels\" = collections of time series\n",
  12 |     "* Basic vignettes for TSC, TSR, TSCl\n",
  13 |     "* Advanced vignettes - pipelines, ensembles, tuning"
  14 |    ]
  15 |   },
  16 |   {
  17 |    "attachments": {},
  18 |    "cell_type": "markdown",
  19 |    "metadata": {},
  20 |    "source": [
  21 |     "# 5. Learning tasks - Classification, Regression, Clustering & more <a name=\"top-2\"></a>\n",
  22 |     "\n",
  23 |     "deal with *collections of time series* = \"panel data\"\n",
  24 |     "\n",
  25 |     "Classification = try to assign one *category* per time series, after training on time series/category examples\n",
  26 |     "\n",
  27 |     "Example: daily energy consumption profile over time - Predict season, e.g., winter/summer, or type of consumer\n",
  28 |     "\n",
  29 |     "Regression = try to assign one *category* per time series, after training on time series/category examples\n",
  30 |     "\n",
  31 |     "Example: temperature/pressure/time profile of chemical reactor. Predict total purity (fraction of 1)\n",
  32 |     "\n",
  33 |     "Clustering = put different time series in a small number of similarity buckets"
  34 |    ]
  35 |   },
  36 |   {
  37 |    "cell_type": "code",
  38 |    "execution_count": 1,
  39 |    "metadata": {},
  40 |    "outputs": [],
  41 |    "source": [
  42 |     "import numpy as np\n",
  43 |     "import pandas as pd\n",
  44 |     "\n",
  45 |     "# Increase display width\n",
  46 |     "pd.set_option('display.width', 1000)"
  47 |    ]
  48 |   },
  49 |   {
  50 |    "attachments": {},
  51 |    "cell_type": "markdown",
  52 |    "metadata": {},
  53 |    "source": [
  54 |     "## 5.1 Panel data - `sktime` data formats <a name=\"panel\"></a>\n",
  55 |     "\n",
  56 |     "`Panel` abstract data type = values observed for:\n",
  57 |     "\n",
  58 |     "* `instance`, e.g., patient\n",
  59 |     "* `variable`, e.g., blood pressure, body temperatire\n",
  60 |     "* `time`/`index`, e.g., January 12, 2023 (usually but not necessarily a time index!)\n",
  61 |     "\n",
  62 |     "One value X is: \"patient A had blood pressure X on January 12, 2023\"\n",
  63 |     "\n",
  64 |     "time series classification, regression, clustering: slices `Panel` data by instance"
  65 |    ]
  66 |   },
  67 |   {
  68 |    "cell_type": "markdown",
  69 |    "metadata": {},
  70 |    "source": [
  71 |     "\n",
  72 |     "Preferred format 1: `pd.DataFrame` with 2-level `MultiIndex`, (instance, time), cols= variables\n",
  73 |     "\n",
  74 |     "Preferred format 2: 3D `np.ndarray` with index (instance, variable, time)\n",
  75 |     "\n",
  76 |     "* `sktime` supports and recognizes multiple data formats for convenience and internal use, e.g., `dask`, `xarray`\n",
  77 |     "* abstract data type = \"scitype\"; in-memory specification = \"mtype\"\n",
  78 |     "* More information in tutorial on [in-memory data representations and data loading](https://www.sktime.net/en/latest/examples/AA_datatypes_and_datasets.html#In-memory-data-representations-and-data-loading)"
  79 |    ]
  80 |   },
  81 |   {
  82 |    "attachments": {},
  83 |    "cell_type": "markdown",
  84 |    "metadata": {},
  85 |    "source": [
  86 |     "### 5.1.1 preferred format 1 - `pd-multiindex` specification"
  87 |    ]
  88 |   },
  89 |   {
  90 |    "attachments": {},
  91 |    "cell_type": "markdown",
  92 |    "metadata": {},
  93 |    "source": [
  94 |     "`pd-multiindex` = `pd.DataFrame` with 2-level `MultiIndex`, (instance, time), cols= variables"
  95 |    ]
  96 |   },
  97 |   {
  98 |    "cell_type": "code",
  99 |    "execution_count": 2,
 100 |    "metadata": {},
 101 |    "outputs": [],
 102 |    "source": [
 103 |     "from sktime.datasets import load_italy_power_demand\n",
 104 |     "\n",
 105 |     "# load an example time series panel in pd-multiindex mtype\n",
 106 |     "X, _ = load_italy_power_demand(return_type=\"pd-multiindex\")\n",
 107 |     "\n",
 108 |     "# renaming columns for illustrative purposes\n",
 109 |     "X.columns = [\"total_power_demand\"]\n",
 110 |     "X.index.names = [\"day_ID\", \"hour_of_day\"]"
 111 |    ]
 112 |   },
 113 |   {
 114 |    "attachments": {},
 115 |    "cell_type": "markdown",
 116 |    "metadata": {},
 117 |    "source": [
 118 |     "The Italy power demand dataset has:\n",
 119 |     "\n",
 120 |     "* 1096 individual time series instances = single days of total power demand (mean subtracted)\n",
 121 |     "* one single variable per time series instances, `total_power_demand`\n",
 122 |     "    * total power demand on that day, in that hourly period\n",
 123 |     "    * hence a univariate dataset\n",
 124 |     "* individual time series are observed at 24 time (period) points (the same number for all instances)\n",
 125 |     "\n",
 126 |     "In the dataset, days are jumbled and of different scope (independent sampling).\n",
 127 |     "* considered independent\n",
 128 |     "* for task, e.g., \"identify season or weekday/week-end from pattern\""
 129 |    ]
 130 |   },
 131 |   {
 132 |    "cell_type": "code",
 133 |    "execution_count": 3,
 134 |    "metadata": {},
 135 |    "outputs": [
 136 |     {
 137 |      "data": {
 138 |       "text/html": [
 139 |        "<div>\n",
 140 |        "<style scoped>\n",
 141 |        "    .dataframe tbody tr th:only-of-type {\n",
 142 |        "        vertical-align: middle;\n",
 143 |        "    }\n",
 144 |        "\n",
 145 |        "    .dataframe tbody tr th {\n",
 146 |        "        vertical-align: top;\n",
 147 |        "    }\n",
 148 |        "\n",
 149 |        "    .dataframe thead th {\n",
 150 |        "        text-align: right;\n",
 151 |        "    }\n",
 152 |        "</style>\n",
 153 |        "<table border=\"1\" class=\"dataframe\">\n",
 154 |        "  <thead>\n",
 155 |        "    <tr style=\"text-align: right;\">\n",
 156 |        "      <th></th>\n",
 157 |        "      <th></th>\n",
 158 |        "      <th>total_power_demand</th>\n",
 159 |        "    </tr>\n",
 160 |        "    <tr>\n",
 161 |        "      <th>day_ID</th>\n",
 162 |        "      <th>hour_of_day</th>\n",
 163 |        "      <th></th>\n",
 164 |        "    </tr>\n",
 165 |        "  </thead>\n",
 166 |        "  <tbody>\n",
 167 |        "    <tr>\n",
 168 |        "      <th rowspan=\"5\" valign=\"top\">0</th>\n",
 169 |        "      <th>0</th>\n",
 170 |        "      <td>-0.710518</td>\n",
 171 |        "    </tr>\n",
 172 |        "    <tr>\n",
 173 |        "      <th>1</th>\n",
 174 |        "      <td>-1.183320</td>\n",
 175 |        "    </tr>\n",
 176 |        "    <tr>\n",
 177 |        "      <th>2</th>\n",
 178 |        "      <td>-1.372442</td>\n",
 179 |        "    </tr>\n",
 180 |        "    <tr>\n",
 181 |        "      <th>3</th>\n",
 182 |        "      <td>-1.593083</td>\n",
 183 |        "    </tr>\n",
 184 |        "    <tr>\n",
 185 |        "      <th>4</th>\n",
 186 |        "      <td>-1.467002</td>\n",
 187 |        "    </tr>\n",
 188 |        "    <tr>\n",
 189 |        "      <th>...</th>\n",
 190 |        "      <th>...</th>\n",
 191 |        "      <td>...</td>\n",
 192 |        "    </tr>\n",
 193 |        "    <tr>\n",
 194 |        "      <th rowspan=\"5\" valign=\"top\">1095</th>\n",
 195 |        "      <th>19</th>\n",
 196 |        "      <td>0.180490</td>\n",
 197 |        "    </tr>\n",
 198 |        "    <tr>\n",
 199 |        "      <th>20</th>\n",
 200 |        "      <td>-0.094058</td>\n",
 201 |        "    </tr>\n",
 202 |        "    <tr>\n",
 203 |        "      <th>21</th>\n",
 204 |        "      <td>0.729587</td>\n",
 205 |        "    </tr>\n",
 206 |        "    <tr>\n",
 207 |        "      <th>22</th>\n",
 208 |        "      <td>0.210995</td>\n",
 209 |        "    </tr>\n",
 210 |        "    <tr>\n",
 211 |        "      <th>23</th>\n",
 212 |        "      <td>-0.002542</td>\n",
 213 |        "    </tr>\n",
 214 |        "  </tbody>\n",
 215 |        "</table>\n",
 216 |        "<p>26304 rows × 1 columns</p>\n",
 217 |        "</div>"
 218 |       ],
 219 |       "text/plain": [
 220 |        "                    total_power_demand\n",
 221 |        "day_ID hour_of_day                    \n",
 222 |        "0      0                     -0.710518\n",
 223 |        "       1                     -1.183320\n",
 224 |        "       2                     -1.372442\n",
 225 |        "       3                     -1.593083\n",
 226 |        "       4                     -1.467002\n",
 227 |        "...                                ...\n",
 228 |        "1095   19                     0.180490\n",
 229 |        "       20                    -0.094058\n",
 230 |        "       21                     0.729587\n",
 231 |        "       22                     0.210995\n",
 232 |        "       23                    -0.002542\n",
 233 |        "\n",
 234 |        "[26304 rows x 1 columns]"
 235 |       ]
 236 |      },
 237 |      "execution_count": 3,
 238 |      "metadata": {},
 239 |      "output_type": "execute_result"
 240 |     }
 241 |    ],
 242 |    "source": [
 243 |     "X"
 244 |    ]
 245 |   },
 246 |   {
 247 |    "cell_type": "code",
 248 |    "execution_count": 4,
 249 |    "metadata": {},
 250 |    "outputs": [],
 251 |    "source": [
 252 |     "from sktime.datasets import load_basic_motions\n",
 253 |     "\n",
 254 |     "# load an example time series panel in pd-multiindex mtype\n",
 255 |     "X, _ = load_basic_motions(return_type=\"pd-multiindex\")\n",
 256 |     "\n",
 257 |     "# renaming columns for illustrative purposes\n",
 258 |     "X.columns = [\"accel_1\", \"accel_2\", \"accel_3\", \"gyro_1\", \"gyro_2\", \"gyro_3\"]\n",
 259 |     "X.index.names = [\"trial_no\", \"timepoint\"]"
 260 |    ]
 261 |   },
 262 |   {
 263 |    "attachments": {},
 264 |    "cell_type": "markdown",
 265 |    "metadata": {},
 266 |    "source": [
 267 |     "The basic motions dataset has:\n",
 268 |     "\n",
 269 |     "* 80 individual time series instances = trials = person engaging in activity (running, badminton, etc)\n",
 270 |     "* six variables per time series instance, `dim_0` to `dim_5`\n",
 271 |     "    * 3 accelerometer and 3 gyrometer measurements\n",
 272 |     "    * hence a multivariate dataset\n",
 273 |     "* individual time series are observed at 100 time points (the same number for all instances)"
 274 |    ]
 275 |   },
 276 |   {
 277 |    "cell_type": "code",
 278 |    "execution_count": 5,
 279 |    "metadata": {},
 280 |    "outputs": [
 281 |     {
 282 |      "data": {
 283 |       "text/html": [
 284 |        "<div>\n",
 285 |        "<style scoped>\n",
 286 |        "    .dataframe tbody tr th:only-of-type {\n",
 287 |        "        vertical-align: middle;\n",
 288 |        "    }\n",
 289 |        "\n",
 290 |        "    .dataframe tbody tr th {\n",
 291 |        "        vertical-align: top;\n",
 292 |        "    }\n",
 293 |        "\n",
 294 |        "    .dataframe thead th {\n",
 295 |        "        text-align: right;\n",
 296 |        "    }\n",
 297 |        "</style>\n",
 298 |        "<table border=\"1\" class=\"dataframe\">\n",
 299 |        "  <thead>\n",
 300 |        "    <tr style=\"text-align: right;\">\n",
 301 |        "      <th></th>\n",
 302 |        "      <th></th>\n",
 303 |        "      <th>accel_1</th>\n",
 304 |        "      <th>accel_2</th>\n",
 305 |        "      <th>accel_3</th>\n",
 306 |        "      <th>gyro_1</th>\n",
 307 |        "      <th>gyro_2</th>\n",
 308 |        "      <th>gyro_3</th>\n",
 309 |        "    </tr>\n",
 310 |        "    <tr>\n",
 311 |        "      <th>trial_no</th>\n",
 312 |        "      <th>timepoint</th>\n",
 313 |        "      <th></th>\n",
 314 |        "      <th></th>\n",
 315 |        "      <th></th>\n",
 316 |        "      <th></th>\n",
 317 |        "      <th></th>\n",
 318 |        "      <th></th>\n",
 319 |        "    </tr>\n",
 320 |        "  </thead>\n",
 321 |        "  <tbody>\n",
 322 |        "    <tr>\n",
 323 |        "      <th rowspan=\"5\" valign=\"top\">0</th>\n",
 324 |        "      <th>0</th>\n",
 325 |        "      <td>0.079106</td>\n",
 326 |        "      <td>0.394032</td>\n",
 327 |        "      <td>0.551444</td>\n",
 328 |        "      <td>0.351565</td>\n",
 329 |        "      <td>0.023970</td>\n",
 330 |        "      <td>0.633883</td>\n",
 331 |        "    </tr>\n",
 332 |        "    <tr>\n",
 333 |        "      <th>1</th>\n",
 334 |        "      <td>0.079106</td>\n",
 335 |        "      <td>0.394032</td>\n",
 336 |        "      <td>0.551444</td>\n",
 337 |        "      <td>0.351565</td>\n",
 338 |        "      <td>0.023970</td>\n",
 339 |        "      <td>0.633883</td>\n",
 340 |        "    </tr>\n",
 341 |        "    <tr>\n",
 342 |        "      <th>2</th>\n",
 343 |        "      <td>-0.903497</td>\n",
 344 |        "      <td>-3.666397</td>\n",
 345 |        "      <td>-0.282844</td>\n",
 346 |        "      <td>-0.095881</td>\n",
 347 |        "      <td>-0.319605</td>\n",
 348 |        "      <td>0.972131</td>\n",
 349 |        "    </tr>\n",
 350 |        "    <tr>\n",
 351 |        "      <th>3</th>\n",
 352 |        "      <td>1.116125</td>\n",
 353 |        "      <td>-0.656101</td>\n",
 354 |        "      <td>0.333118</td>\n",
 355 |        "      <td>1.624657</td>\n",
 356 |        "      <td>-0.569962</td>\n",
 357 |        "      <td>1.209171</td>\n",
 358 |        "    </tr>\n",
 359 |        "    <tr>\n",
 360 |        "      <th>4</th>\n",
 361 |        "      <td>1.638200</td>\n",
 362 |        "      <td>1.405135</td>\n",
 363 |        "      <td>0.393875</td>\n",
 364 |        "      <td>1.187864</td>\n",
 365 |        "      <td>-0.271664</td>\n",
 366 |        "      <td>1.739182</td>\n",
 367 |        "    </tr>\n",
 368 |        "    <tr>\n",
 369 |        "      <th>...</th>\n",
 370 |        "      <th>...</th>\n",
 371 |        "      <td>...</td>\n",
 372 |        "      <td>...</td>\n",
 373 |        "      <td>...</td>\n",
 374 |        "      <td>...</td>\n",
 375 |        "      <td>...</td>\n",
 376 |        "      <td>...</td>\n",
 377 |        "    </tr>\n",
 378 |        "    <tr>\n",
 379 |        "      <th rowspan=\"5\" valign=\"top\">79</th>\n",
 380 |        "      <th>95</th>\n",
 381 |        "      <td>28.459024</td>\n",
 382 |        "      <td>-16.633770</td>\n",
 383 |        "      <td>3.631869</td>\n",
 384 |        "      <td>8.978229</td>\n",
 385 |        "      <td>-3.611533</td>\n",
 386 |        "      <td>-1.491489</td>\n",
 387 |        "    </tr>\n",
 388 |        "    <tr>\n",
 389 |        "      <th>96</th>\n",
 390 |        "      <td>10.260094</td>\n",
 391 |        "      <td>0.102775</td>\n",
 392 |        "      <td>1.269261</td>\n",
 393 |        "      <td>-1.645964</td>\n",
 394 |        "      <td>-3.377157</td>\n",
 395 |        "      <td>1.283746</td>\n",
 396 |        "    </tr>\n",
 397 |        "    <tr>\n",
 398 |        "      <th>97</th>\n",
 399 |        "      <td>4.316471</td>\n",
 400 |        "      <td>-3.574319</td>\n",
 401 |        "      <td>2.063831</td>\n",
 402 |        "      <td>-1.717875</td>\n",
 403 |        "      <td>-1.843054</td>\n",
 404 |        "      <td>0.484734</td>\n",
 405 |        "    </tr>\n",
 406 |        "    <tr>\n",
 407 |        "      <th>98</th>\n",
 408 |        "      <td>0.704446</td>\n",
 409 |        "      <td>-4.920444</td>\n",
 410 |        "      <td>2.851857</td>\n",
 411 |        "      <td>-2.982977</td>\n",
 412 |        "      <td>-0.809665</td>\n",
 413 |        "      <td>-0.721774</td>\n",
 414 |        "    </tr>\n",
 415 |        "    <tr>\n",
 416 |        "      <th>99</th>\n",
 417 |        "      <td>-2.074749</td>\n",
 418 |        "      <td>-6.892377</td>\n",
 419 |        "      <td>4.848379</td>\n",
 420 |        "      <td>-1.350330</td>\n",
 421 |        "      <td>-1.203844</td>\n",
 422 |        "      <td>-1.776470</td>\n",
 423 |        "    </tr>\n",
 424 |        "  </tbody>\n",
 425 |        "</table>\n",
 426 |        "<p>8000 rows × 6 columns</p>\n",
 427 |        "</div>"
 428 |       ],
 429 |       "text/plain": [
 430 |        "                      accel_1    accel_2   accel_3    gyro_1    gyro_2    gyro_3\n",
 431 |        "trial_no timepoint                                                              \n",
 432 |        "0        0           0.079106   0.394032  0.551444  0.351565  0.023970  0.633883\n",
 433 |        "         1           0.079106   0.394032  0.551444  0.351565  0.023970  0.633883\n",
 434 |        "         2          -0.903497  -3.666397 -0.282844 -0.095881 -0.319605  0.972131\n",
 435 |        "         3           1.116125  -0.656101  0.333118  1.624657 -0.569962  1.209171\n",
 436 |        "         4           1.638200   1.405135  0.393875  1.187864 -0.271664  1.739182\n",
 437 |        "...                       ...        ...       ...       ...       ...       ...\n",
 438 |        "79       95         28.459024 -16.633770  3.631869  8.978229 -3.611533 -1.491489\n",
 439 |        "         96         10.260094   0.102775  1.269261 -1.645964 -3.377157  1.283746\n",
 440 |        "         97          4.316471  -3.574319  2.063831 -1.717875 -1.843054  0.484734\n",
 441 |        "         98          0.704446  -4.920444  2.851857 -2.982977 -0.809665 -0.721774\n",
 442 |        "         99         -2.074749  -6.892377  4.848379 -1.350330 -1.203844 -1.776470\n",
 443 |        "\n",
 444 |        "[8000 rows x 6 columns]"
 445 |       ]
 446 |      },
 447 |      "execution_count": 5,
 448 |      "metadata": {},
 449 |      "output_type": "execute_result"
 450 |     }
 451 |    ],
 452 |    "source": [
 453 |     "# The outermost index represents the instance number\n",
 454 |     "# whereas the inner index represents the index of the particular index\n",
 455 |     "# within that instance.\n",
 456 |     "X"
 457 |    ]
 458 |   },
 459 |   {
 460 |    "attachments": {},
 461 |    "cell_type": "markdown",
 462 |    "metadata": {},
 463 |    "source": [
 464 |     "pandas provides a simple way to access a range of value in the multi-indexed dataframe:"
 465 |    ]
 466 |   },
 467 |   {
 468 |    "cell_type": "code",
 469 |    "execution_count": 6,
 470 |    "metadata": {},
 471 |    "outputs": [
 472 |     {
 473 |      "data": {
 474 |       "text/plain": [
 475 |        "timepoint\n",
 476 |        "0     0.351565\n",
 477 |        "1     0.351565\n",
 478 |        "2    -0.095881\n",
 479 |        "3     1.624657\n",
 480 |        "4     1.187864\n",
 481 |        "        ...   \n",
 482 |        "95    0.039951\n",
 483 |        "96   -0.029297\n",
 484 |        "97    0.000000\n",
 485 |        "98    0.000000\n",
 486 |        "99   -0.007990\n",
 487 |        "Name: gyro_1, Length: 100, dtype: float64"
 488 |       ]
 489 |      },
 490 |      "execution_count": 6,
 491 |      "metadata": {},
 492 |      "output_type": "execute_result"
 493 |     }
 494 |    ],
 495 |    "source": [
 496 |     "# Select:\n",
 497 |     "# * the fourth variable (gyroscope 1)\n",
 498 |     "# * of the first instance (trial 1 = 0 in python)\n",
 499 |     "# * values at all 100 timestamps\n",
 500 |     "#\n",
 501 |     "X.loc[0, \"gyro_1\"]"
 502 |    ]
 503 |   },
 504 |   {
 505 |    "attachments": {},
 506 |    "cell_type": "markdown",
 507 |    "metadata": {},
 508 |    "source": [
 509 |     "Or if you want to access the individual values:"
 510 |    ]
 511 |   },
 512 |   {
 513 |    "cell_type": "code",
 514 |    "execution_count": 7,
 515 |    "metadata": {},
 516 |    "outputs": [
 517 |     {
 518 |      "data": {
 519 |       "text/plain": [
 520 |        "-1.27952"
 521 |       ]
 522 |      },
 523 |      "execution_count": 7,
 524 |      "metadata": {},
 525 |      "output_type": "execute_result"
 526 |     }
 527 |    ],
 528 |    "source": [
 529 |     "# Select:\n",
 530 |     "# * the fifth time time point (5 = 4 in python)\n",
 531 |     "# * the third variable (accelerometer 3)\n",
 532 |     "# * of the fourty-third instance (trial 43 = 42 in python)\n",
 533 |     "\n",
 534 |     "X.loc[(42, 4), \"accel_3\"]"
 535 |    ]
 536 |   },
 537 |   {
 538 |    "attachments": {},
 539 |    "cell_type": "markdown",
 540 |    "metadata": {},
 541 |    "source": [
 542 |     "### 5.1.2 preferred format 2 - `numpy3D` specification"
 543 |    ]
 544 |   },
 545 |   {
 546 |    "attachments": {},
 547 |    "cell_type": "markdown",
 548 |    "metadata": {},
 549 |    "source": [
 550 |     "`numpy3D` = 3D `np.ndarray` with index (instance, variable, time)\n",
 551 |     "\n",
 552 |     "instance/time index is interpreted as integer\n",
 553 |     "\n",
 554 |     "IMPORTANT: unlike `pd-multiindex`, this assumes:\n",
 555 |     "\n",
 556 |     "* all individual series have the same length\n",
 557 |     "* all individual series have the same index"
 558 |    ]
 559 |   },
 560 |   {
 561 |    "cell_type": "code",
 562 |    "execution_count": 8,
 563 |    "metadata": {},
 564 |    "outputs": [],
 565 |    "source": [
 566 |     "from sktime.datasets import load_basic_motions\n",
 567 |     "\n",
 568 |     "# load an example time series panel in numpy mtype\n",
 569 |     "X, _ = load_basic_motions(return_type=\"numpy3D\")"
 570 |    ]
 571 |   },
 572 |   {
 573 |    "attachments": {},
 574 |    "cell_type": "markdown",
 575 |    "metadata": {},
 576 |    "source": [
 577 |     "The Italy power demand dataset has:\n",
 578 |     "\n",
 579 |     "* 1096 individual time series instances = single days of total power demand (mean subtracted)\n",
 580 |     "* one single variable per time series instances, unnamed in numpy\n",
 581 |     "* individual time series are observed at 24 time (period) points (the same number for all instances)"
 582 |    ]
 583 |   },
 584 |   {
 585 |    "cell_type": "code",
 586 |    "execution_count": 9,
 587 |    "metadata": {},
 588 |    "outputs": [
 589 |     {
 590 |      "data": {
 591 |       "text/plain": [
 592 |        "(80, 6, 100)"
 593 |       ]
 594 |      },
 595 |      "execution_count": 9,
 596 |      "metadata": {},
 597 |      "output_type": "execute_result"
 598 |     }
 599 |    ],
 600 |    "source": [
 601 |     "# (num_instances, num_variables, length)\n",
 602 |     "X.shape"
 603 |    ]
 604 |   },
 605 |   {
 606 |    "cell_type": "code",
 607 |    "execution_count": 10,
 608 |    "metadata": {},
 609 |    "outputs": [],
 610 |    "source": [
 611 |     "from sktime.datasets import load_basic_motions\n",
 612 |     "\n",
 613 |     "# load an example time series panel in numpy mtype\n",
 614 |     "X, _ = load_basic_motions(return_type=\"numpy3D\")"
 615 |    ]
 616 |   },
 617 |   {
 618 |    "attachments": {},
 619 |    "cell_type": "markdown",
 620 |    "metadata": {},
 621 |    "source": [
 622 |     "The basic motions dataset has:\n",
 623 |     "\n",
 624 |     "* 80 individual time series instances = trials = person engaging in activity (running, badminton, etc)\n",
 625 |     "* six variables per time series instance, unnamed in numpy\n",
 626 |     "* individual time series are observed at 100 time points (the same number for all instances)"
 627 |    ]
 628 |   },
 629 |   {
 630 |    "cell_type": "code",
 631 |    "execution_count": 11,
 632 |    "metadata": {},
 633 |    "outputs": [
 634 |     {
 635 |      "data": {
 636 |       "text/plain": [
 637 |        "(80, 6, 100)"
 638 |       ]
 639 |      },
 640 |      "execution_count": 11,
 641 |      "metadata": {},
 642 |      "output_type": "execute_result"
 643 |     }
 644 |    ],
 645 |    "source": [
 646 |     "X.shape"
 647 |    ]
 648 |   },
 649 |   {
 650 |    "attachments": {},
 651 |    "cell_type": "markdown",
 652 |    "metadata": {},
 653 |    "source": [
 654 |     "## 5.2 Time Series Classification, Regression, Clustering - Basic Vignettes\n",
 655 |     "\n",
 656 |     "Above tasks are very similar to \"tabular\" classification, regression, clustering, as in `sklearn`\n",
 657 |     "\n",
 658 |     "Main distinction:\n",
 659 |     "* in \"tabular\" classification etc, one (feature) instance row vector of features\n",
 660 |     "* in TSC, one (feature) instance is a full time series, possibly unequal length, distinct index set"
 661 |    ]
 662 |   },
 663 |   {
 664 |    "attachments": {},
 665 |    "cell_type": "markdown",
 666 |    "metadata": {},
 667 |    "source": [
 668 |     "![](./img/tasks-tsc.png)"
 669 |    ]
 670 |   },
 671 |   {
 672 |    "attachments": {},
 673 |    "cell_type": "markdown",
 674 |    "metadata": {},
 675 |    "source": [
 676 |     "\n",
 677 |     "More formally:\n",
 678 |     "\n",
 679 |     "* \"tabular\" classification:\n",
 680 |     "    * training pairs $(x_1, y_1), \\dots, (x_n, y_n)$\n",
 681 |     "        * where $x_i$ are rows of a `pd.DataFrame` (same col types)\n",
 682 |     "        * and $y_i \\in \\mathcal{C}$ for a finite set $\\mathcal{C}$\n",
 683 |     "    * used to train a classifier that\n",
 684 |     "        * for a new `pd.DataFrame` row $x_*$\n",
 685 |     "        * predicts $y_* \\in \\mathcal{C}$"
 686 |    ]
 687 |   },
 688 |   {
 689 |    "cell_type": "markdown",
 690 |    "metadata": {},
 691 |    "source": [
 692 |     "\n",
 693 |     "* time series classification:\n",
 694 |     "    * training pairs $(x_1, y_1), \\dots, (x_n, y_n)$\n",
 695 |     "        * where $x_i$ are time series instaces, from a certain domain\n",
 696 |     "        * and $y_i \\in \\mathcal{C}$ for a finite set $\\mathcal{C}$\n",
 697 |     "    * used to train a classifier that\n",
 698 |     "        * for a new time series instance $x_*$\n",
 699 |     "        * predicts $y_* \\in \\mathcal{C}$"
 700 |    ]
 701 |   },
 702 |   {
 703 |    "attachments": {},
 704 |    "cell_type": "markdown",
 705 |    "metadata": {},
 706 |    "source": [
 707 |     "very similar for time series regression, clustering - exercise left to reader :-)"
 708 |    ]
 709 |   },
 710 |   {
 711 |    "attachments": {},
 712 |    "cell_type": "markdown",
 713 |    "metadata": {},
 714 |    "source": [
 715 |     "`sktime` design implications:\n",
 716 |     "\n",
 717 |     "* need representation of collections of time series (panels), see Section 5.1\n",
 718 |     "    * same as in \"adjacent\" learning tasks, e.g., panel forecasting\n",
 719 |     "    * same as for transformation estimators\n",
 720 |     "* algorithms that use sequentiality, can deal with unequal length, missing values etc \n",
 721 |     "* algorithms usually based on distances or kernels between time series - need to cover that in framework\n",
 722 |     "* but we can use familiar `fit` / `predict` and `scikit-learn` / `scikit-base` interface!"
 723 |    ]
 724 |   },
 725 |   {
 726 |    "attachments": {},
 727 |    "cell_type": "markdown",
 728 |    "metadata": {},
 729 |    "source": [
 730 |     "### 5.2.3 Time Series Classification - deployment vignette"
 731 |    ]
 732 |   },
 733 |   {
 734 |    "attachments": {},
 735 |    "cell_type": "markdown",
 736 |    "metadata": {},
 737 |    "source": [
 738 |     "Basic deployment vignette for TSC:\n",
 739 |     "\n",
 740 |     "1. load/setup training data, `X` in a `Panel` (more specifically `numpy3D`) format, `y` as 1D `np.ndarray`\n",
 741 |     "2. load/setup new data for prediction (can be done after 2 too)\n",
 742 |     "3. specify the classifier using `sklearn`-like syntax\n",
 743 |     "4. fit classifier to training data, `fit(X, y)`\n",
 744 |     "5. predict labels on new data, `predict(X_new)`"
 745 |    ]
 746 |   },
 747 |   {
 748 |    "cell_type": "code",
 749 |    "execution_count": 12,
 750 |    "metadata": {},
 751 |    "outputs": [],
 752 |    "source": [
 753 |     "# steps 1, 2 - prepare osuleaf dataset (train and new)\n",
 754 |     "from sktime.datasets import load_italy_power_demand\n",
 755 |     "\n",
 756 |     "X_train, y_train = load_italy_power_demand(split=\"train\", return_type=\"numpy3D\")\n",
 757 |     "X_new, _ = load_italy_power_demand(split=\"test\", return_type=\"numpy3D\")"
 758 |    ]
 759 |   },
 760 |   {
 761 |    "cell_type": "code",
 762 |    "execution_count": 13,
 763 |    "metadata": {},
 764 |    "outputs": [
 765 |     {
 766 |      "data": {
 767 |       "text/plain": [
 768 |        "(67, 1, 24)"
 769 |       ]
 770 |      },
 771 |      "execution_count": 13,
 772 |      "metadata": {},
 773 |      "output_type": "execute_result"
 774 |     }
 775 |    ],
 776 |    "source": [
 777 |     "# this is in numpy3D format, but could also be pd-multiindex or other\n",
 778 |     "X_train.shape"
 779 |    ]
 780 |   },
 781 |   {
 782 |    "cell_type": "code",
 783 |    "execution_count": 14,
 784 |    "metadata": {},
 785 |    "outputs": [
 786 |     {
 787 |      "data": {
 788 |       "text/plain": [
 789 |        "(67,)"
 790 |       ]
 791 |      },
 792 |      "execution_count": 14,
 793 |      "metadata": {},
 794 |      "output_type": "execute_result"
 795 |     }
 796 |    ],
 797 |    "source": [
 798 |     "# y is a 1D np.ndarray of labels - same length as number of instances in X_train\n",
 799 |     "y_train.shape"
 800 |    ]
 801 |   },
 802 |   {
 803 |    "cell_type": "code",
 804 |    "execution_count": 15,
 805 |    "metadata": {},
 806 |    "outputs": [],
 807 |    "source": [
 808 |     "# step 3 - specify the classifier\n",
 809 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
 810 |     "\n",
 811 |     "# example 1 - 3-NN with simple dynamic time warping distance (requires numba)\n",
 812 |     "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3)\n",
 813 |     "\n",
 814 |     "# example 2 - custom distance:\n",
 815 |     "# 3-nearest neighbour classifier with Euclidean distance (on flattened time series)\n",
 816 |     "# (requires scipy)\n",
 817 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
 818 |     "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n",
 819 |     "from sktime.dists_kernels import ScipyDist\n",
 820 |     "\n",
 821 |     "eucl_dist = FlatDist(ScipyDist())\n",
 822 |     "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)"
 823 |    ]
 824 |   },
 825 |   {
 826 |    "attachments": {},
 827 |    "cell_type": "markdown",
 828 |    "metadata": {},
 829 |    "source": [
 830 |     "we could specify any `sktime` classifier here - the rest remains the same!"
 831 |    ]
 832 |   },
 833 |   {
 834 |    "cell_type": "code",
 835 |    "execution_count": 16,
 836 |    "metadata": {},
 837 |    "outputs": [
 838 |     {
 839 |      "data": {
 840 |       "text/plain": [
 841 |        "{'algorithm': 'brute',\n",
 842 |        " 'distance': FlatDist(transformer=ScipyDist()),\n",
 843 |        " 'distance_mtype': None,\n",
 844 |        " 'distance_params': None,\n",
 845 |        " 'leaf_size': 30,\n",
 846 |        " 'n_jobs': None,\n",
 847 |        " 'n_neighbors': 3,\n",
 848 |        " 'pass_train_distances': False,\n",
 849 |        " 'weights': 'uniform',\n",
 850 |        " 'distance__transformer': ScipyDist(),\n",
 851 |        " 'distance__transformer__colalign': 'intersect',\n",
 852 |        " 'distance__transformer__metric': 'euclidean',\n",
 853 |        " 'distance__transformer__metric_kwargs': None,\n",
 854 |        " 'distance__transformer__p': 2,\n",
 855 |        " 'distance__transformer__var_weights': None}"
 856 |       ]
 857 |      },
 858 |      "execution_count": 16,
 859 |      "metadata": {},
 860 |      "output_type": "execute_result"
 861 |     }
 862 |    ],
 863 |    "source": [
 864 |     "# all classifiers is scikit-learn / scikit-base compatible!\n",
 865 |     "# nested parameter interface via get_params, set_params\n",
 866 |     "clf.get_params()"
 867 |    ]
 868 |   },
 869 |   {
 870 |    "cell_type": "code",
 871 |    "execution_count": 17,
 872 |    "metadata": {},
 873 |    "outputs": [
 874 |     {
 875 |      "data": {
 876 |       "text/html": [
 877 |        "<style>#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 {color: black;background-color: white;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 pre{padding: 0;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-toggleable {background-color: white;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-estimator:hover {background-color: #d4ebff;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-item {z-index: 1;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-parallel-item:only-child::after {width: 0;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-24c8c972-b2a8-490b-a300-f0767be7ebb7 div.sk-text-repr-fallback {display: none;}</style><div id='sk-24c8c972-b2a8-490b-a300-f0767be7ebb7' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsTimeSeriesClassifier(distance=FlatDist(transformer=ScipyDist()),\n",
 878 |        "                               n_neighbors=3)</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('56613fbf-364a-4f8a-a52f-ab4d8e751895') type=\"checkbox\" ><label for=UUID('56613fbf-364a-4f8a-a52f-ab4d8e751895') class='sk-toggleable__label sk-toggleable__label-arrow'>KNeighborsTimeSeriesClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsTimeSeriesClassifier(distance=FlatDist(transformer=ScipyDist()),\n",
 879 |        "                               n_neighbors=3)</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('fd8b05a3-94f9-4197-b082-6838df2d8991') type=\"checkbox\" ><label for=UUID('fd8b05a3-94f9-4197-b082-6838df2d8991') class='sk-toggleable__label sk-toggleable__label-arrow'>ScipyDist</label><div class=\"sk-toggleable__content\"><pre>ScipyDist()</pre></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>"
 880 |       ],
 881 |       "text/plain": [
 882 |        "KNeighborsTimeSeriesClassifier(distance=FlatDist(transformer=ScipyDist()),\n",
 883 |        "                               n_neighbors=3)"
 884 |       ]
 885 |      },
 886 |      "execution_count": 17,
 887 |      "metadata": {},
 888 |      "output_type": "execute_result"
 889 |     }
 890 |    ],
 891 |    "source": [
 892 |     "# step 4 - fit/train the classifier\n",
 893 |     "clf.fit(X_train, y_train)"
 894 |    ]
 895 |   },
 896 |   {
 897 |    "cell_type": "code",
 898 |    "execution_count": 18,
 899 |    "metadata": {},
 900 |    "outputs": [
 901 |     {
 902 |      "data": {
 903 |       "text/plain": [
 904 |        "True"
 905 |       ]
 906 |      },
 907 |      "execution_count": 18,
 908 |      "metadata": {},
 909 |      "output_type": "execute_result"
 910 |     }
 911 |    ],
 912 |    "source": [
 913 |     "# the classifier is now fitted\n",
 914 |     "clf.is_fitted"
 915 |    ]
 916 |   },
 917 |   {
 918 |    "cell_type": "code",
 919 |    "execution_count": 19,
 920 |    "metadata": {},
 921 |    "outputs": [
 922 |     {
 923 |      "data": {
 924 |       "text/plain": [
 925 |        "{'classes': array(['1', '2'], dtype='<U1'),\n",
 926 |        " 'fit_time': 1,\n",
 927 |        " 'knn_estimator': KNeighborsClassifier(algorithm='brute', metric='precomputed', n_neighbors=3),\n",
 928 |        " 'n_classes': 2,\n",
 929 |        " 'knn_estimator__classes': array(['1', '2'], dtype='<U1'),\n",
 930 |        " 'knn_estimator__effective_metric': 'precomputed',\n",
 931 |        " 'knn_estimator__effective_metric_params': {},\n",
 932 |        " 'knn_estimator__n_features_in': 67,\n",
 933 |        " 'knn_estimator__n_samples_fit': 67,\n",
 934 |        " 'knn_estimator__outputs_2d': False}"
 935 |       ]
 936 |      },
 937 |      "execution_count": 19,
 938 |      "metadata": {},
 939 |      "output_type": "execute_result"
 940 |     }
 941 |    ],
 942 |    "source": [
 943 |     "# and we can inspect fitted parameters if we like\n",
 944 |     "clf.get_fitted_params()"
 945 |    ]
 946 |   },
 947 |   {
 948 |    "cell_type": "code",
 949 |    "execution_count": 20,
 950 |    "metadata": {},
 951 |    "outputs": [],
 952 |    "source": [
 953 |     "# step 5 - predict labels on new data\n",
 954 |     "y_pred = clf.predict(X_new)"
 955 |    ]
 956 |   },
 957 |   {
 958 |    "cell_type": "code",
 959 |    "execution_count": 21,
 960 |    "metadata": {},
 961 |    "outputs": [
 962 |     {
 963 |      "data": {
 964 |       "text/plain": [
 965 |        "array(['2', '2', '2', ..., '2', '2', '2'], dtype='<U1')"
 966 |       ]
 967 |      },
 968 |      "execution_count": 21,
 969 |      "metadata": {},
 970 |      "output_type": "execute_result"
 971 |     }
 972 |    ],
 973 |    "source": [
 974 |     "# y_pred is an 1D np.ndarray, similar to sklearn classification output\n",
 975 |     "y_pred"
 976 |    ]
 977 |   },
 978 |   {
 979 |    "cell_type": "code",
 980 |    "execution_count": 22,
 981 |    "metadata": {},
 982 |    "outputs": [
 983 |     {
 984 |      "data": {
 985 |       "text/plain": [
 986 |        "(array(['1', '2'], dtype='<U1'), array([510, 519], dtype=int64))"
 987 |       ]
 988 |      },
 989 |      "execution_count": 22,
 990 |      "metadata": {},
 991 |      "output_type": "execute_result"
 992 |     }
 993 |    ],
 994 |    "source": [
 995 |     "# predictions and unique counts, for illustration\n",
 996 |     "unique, counts = np.unique(y_pred, return_counts=True)\n",
 997 |     "unique, counts"
 998 |    ]
 999 |   },
1000 |   {
1001 |    "attachments": {},
1002 |    "cell_type": "markdown",
1003 |    "metadata": {},
1004 |    "source": [
1005 |     "all together in one cell:"
1006 |    ]
1007 |   },
1008 |   {
1009 |    "cell_type": "code",
1010 |    "execution_count": 23,
1011 |    "metadata": {},
1012 |    "outputs": [],
1013 |    "source": [
1014 |     "# steps 1, 2 - prepare osuleaf dataset (train and new)\n",
1015 |     "from sktime.datasets import load_italy_power_demand\n",
1016 |     "\n",
1017 |     "X_train, y_train = load_italy_power_demand(split=\"train\", return_type=\"numpy3D\")\n",
1018 |     "X_new, _ = load_italy_power_demand(split=\"test\", return_type=\"numpy3D\")\n",
1019 |     "\n",
1020 |     "# step 3 - specify the classifier\n",
1021 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
1022 |     "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n",
1023 |     "from sktime.dists_kernels import ScipyDist\n",
1024 |     "\n",
1025 |     "eucl_dist = FlatDist(ScipyDist())\n",
1026 |     "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)\n",
1027 |     "\n",
1028 |     "# step 4 - fit/train the classifier\n",
1029 |     "clf.fit(X_train, y_train)\n",
1030 |     "\n",
1031 |     "# step 5 - predict labels on new data\n",
1032 |     "y_pred = clf.predict(X_new)"
1033 |    ]
1034 |   },
1035 |   {
1036 |    "attachments": {},
1037 |    "cell_type": "markdown",
1038 |    "metadata": {},
1039 |    "source": [
1040 |     "### 5.2.4 Time Series Classification - simple evaluation vignette"
1041 |    ]
1042 |   },
1043 |   {
1044 |    "attachments": {},
1045 |    "cell_type": "markdown",
1046 |    "metadata": {},
1047 |    "source": [
1048 |     "Evaluation is simila to `sklearn` classifiers - we split a dataset and evaluate performance on the test set.\n",
1049 |     "\n",
1050 |     "This includes as additional steps:\n",
1051 |     "\n",
1052 |     "* splitting the initial, historical data, e.g., using `train_test_split`\n",
1053 |     "* comparing predictions with a held out data set"
1054 |    ]
1055 |   },
1056 |   {
1057 |    "cell_type": "code",
1058 |    "execution_count": 24,
1059 |    "metadata": {},
1060 |    "outputs": [
1061 |     {
1062 |      "data": {
1063 |       "text/plain": [
1064 |        "0.956268221574344"
1065 |       ]
1066 |      },
1067 |      "execution_count": 24,
1068 |      "metadata": {},
1069 |      "output_type": "execute_result"
1070 |     }
1071 |    ],
1072 |    "source": [
1073 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
1074 |     "from sktime.datasets import load_italy_power_demand\n",
1075 |     "\n",
1076 |     "# data should be split into train/test\n",
1077 |     "X_train, y_train = load_italy_power_demand(split=\"train\", return_type=\"numpy3D\")\n",
1078 |     "X_test, y_test = load_italy_power_demand(split=\"test\", return_type=\"numpy3D\")\n",
1079 |     "\n",
1080 |     "# step 3-5 are the same\n",
1081 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
1082 |     "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n",
1083 |     "from sktime.dists_kernels import ScipyDist\n",
1084 |     "\n",
1085 |     "eucl_dist = FlatDist(ScipyDist())\n",
1086 |     "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)\n",
1087 |     "\n",
1088 |     "clf.fit(X_train, y_train)\n",
1089 |     "y_pred = clf.predict(X_test)\n",
1090 |     "\n",
1091 |     "# for simplest evaluation, compare ground truth to predictions\n",
1092 |     "from sklearn.metrics import accuracy_score\n",
1093 |     "\n",
1094 |     "accuracy_score(y_test, y_pred)"
1095 |    ]
1096 |   },
1097 |   {
1098 |    "attachments": {},
1099 |    "cell_type": "markdown",
1100 |    "metadata": {},
1101 |    "source": [
1102 |     "### 5.2.5 Time Series Regression - basic vignettes"
1103 |    ]
1104 |   },
1105 |   {
1106 |    "attachments": {},
1107 |    "cell_type": "markdown",
1108 |    "metadata": {},
1109 |    "source": [
1110 |     "TSR vignettes are exactly the same as TSC, except that:\n",
1111 |     "\n",
1112 |     "* `y` in `fit` input and `predict` output should be float 1D `np.ndarray`, not categorical\n",
1113 |     "* other algorithms are commonly used and/or performant"
1114 |    ]
1115 |   },
1116 |   {
1117 |    "cell_type": "code",
1118 |    "execution_count": 25,
1119 |    "metadata": {},
1120 |    "outputs": [],
1121 |    "source": [
1122 |     "# steps 1, 2 - prepare dataset (train and new)\n",
1123 |     "from sktime.datasets import load_covid_3month\n",
1124 |     "\n",
1125 |     "X_train, y_train = load_covid_3month(split=\"train\")\n",
1126 |     "y_train = y_train.astype(\"float\")\n",
1127 |     "X_new, _ = load_covid_3month(split=\"test\")\n",
1128 |     "X_new = X_new.loc[:2]  # smaller dataset for faster notebook runtime\n",
1129 |     "\n",
1130 |     "# step 3 - specify the regressor\n",
1131 |     "from sktime.regression.distance_based import KNeighborsTimeSeriesRegressor\n",
1132 |     "\n",
1133 |     "clf = KNeighborsTimeSeriesRegressor(n_neighbors=3, distance=eucl_dist)\n",
1134 |     "\n",
1135 |     "# step 4 - fit/train the regressor\n",
1136 |     "clf.fit(X_train, y_train)\n",
1137 |     "\n",
1138 |     "# step 5 - predict labels on new data\n",
1139 |     "y_pred = clf.predict(X_new)"
1140 |    ]
1141 |   },
1142 |   {
1143 |    "cell_type": "code",
1144 |    "execution_count": 26,
1145 |    "metadata": {},
1146 |    "outputs": [
1147 |     {
1148 |      "data": {
1149 |       "text/plain": [
1150 |        "array([0.02957762, 0.0065062 , 0.00183655])"
1151 |       ]
1152 |      },
1153 |      "execution_count": 26,
1154 |      "metadata": {},
1155 |      "output_type": "execute_result"
1156 |     }
1157 |    ],
1158 |    "source": [
1159 |     "y_pred  # predictions are array of float"
1160 |    ]
1161 |   },
1162 |   {
1163 |    "attachments": {},
1164 |    "cell_type": "markdown",
1165 |    "metadata": {},
1166 |    "source": [
1167 |     "### 5.2.6 Time Series Clustering - basic vignettes"
1168 |    ]
1169 |   },
1170 |   {
1171 |    "attachments": {},
1172 |    "cell_type": "markdown",
1173 |    "metadata": {},
1174 |    "source": [
1175 |     "TS clustering is similar - 1st step is also `fit`, but unsupervised\n",
1176 |     "\n",
1177 |     "i.e., no labels `y`, and next step is inspecting clusters"
1178 |    ]
1179 |   },
1180 |   {
1181 |    "cell_type": "code",
1182 |    "execution_count": 27,
1183 |    "metadata": {},
1184 |    "outputs": [
1185 |     {
1186 |      "data": {
1187 |       "text/plain": [
1188 |        "{'core_sample_indices': array([ 0,  1,  3,  4,  6,  7,  8,  9, 10, 11, 12, 13, 14, 16, 17, 18, 19,\n",
1189 |        "        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37,\n",
1190 |        "        38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 50, 52, 53, 54, 55, 56, 57,\n",
1191 |        "        58, 60, 61, 62, 63, 64, 65], dtype=int64),\n",
1192 |        " 'dbscan': DBSCAN(eps=2, metric='precomputed'),\n",
1193 |        " 'fit_time': 2,\n",
1194 |        " 'labels': array([ 0,  0, -1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,\n",
1195 |        "         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,\n",
1196 |        "         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,\n",
1197 |        "         0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0, -1],\n",
1198 |        "       dtype=int64),\n",
1199 |        " 'dbscan__components': array([[0.        , 2.21059984, 7.22653506, ..., 2.43397663, 3.42512865,\n",
1200 |        "         5.77701453],\n",
1201 |        "        [2.21059984, 0.        , 7.31863575, ..., 0.8952782 , 2.01224344,\n",
1202 |        "         5.73199202],\n",
1203 |        "        [2.98199582, 1.8413087 , 7.5785501 , ..., 1.5676963 , 1.41086552,\n",
1204 |        "         5.96418696],\n",
1205 |        "        ...,\n",
1206 |        "        [3.78429193, 2.68599227, 6.32367754, ..., 2.71202763, 1.36130647,\n",
1207 |        "         4.47124464],\n",
1208 |        "        [2.43397663, 0.8952782 , 7.59888847, ..., 0.        , 1.98453315,\n",
1209 |        "         5.99830821],\n",
1210 |        "        [3.42512865, 2.01224344, 7.02761342, ..., 1.98453315, 0.        ,\n",
1211 |        "         5.27610504]]),\n",
1212 |        " 'dbscan__core_sample_indices': array([ 0,  1,  3,  4,  6,  7,  8,  9, 10, 11, 12, 13, 14, 16, 17, 18, 19,\n",
1213 |        "        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37,\n",
1214 |        "        38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 50, 52, 53, 54, 55, 56, 57,\n",
1215 |        "        58, 60, 61, 62, 63, 64, 65], dtype=int64),\n",
1216 |        " 'dbscan__labels': array([ 0,  0, -1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,\n",
1217 |        "         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,\n",
1218 |        "         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,\n",
1219 |        "         0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0, -1],\n",
1220 |        "       dtype=int64),\n",
1221 |        " 'dbscan__n_features_in': 67}"
1222 |       ]
1223 |      },
1224 |      "execution_count": 27,
1225 |      "metadata": {},
1226 |      "output_type": "execute_result"
1227 |     }
1228 |    ],
1229 |    "source": [
1230 |     "# step 1 - prepare dataset (train and new)\n",
1231 |     "from sktime.datasets import load_italy_power_demand\n",
1232 |     "\n",
1233 |     "X, _ = load_italy_power_demand(split=\"train\", return_type=\"numpy3D\")\n",
1234 |     "\n",
1235 |     "# step 2 - specify the clusterer\n",
1236 |     "from sktime.clustering.dbscan import TimeSeriesDBSCAN\n",
1237 |     "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n",
1238 |     "from sktime.dists_kernels import ScipyDist\n",
1239 |     "\n",
1240 |     "eucl_dist = FlatDist(ScipyDist())\n",
1241 |     "clst = TimeSeriesDBSCAN(distance=eucl_dist, eps=2)\n",
1242 |     "\n",
1243 |     "# step 3 - fit the clusterer to the data\n",
1244 |     "clst.fit(X)\n",
1245 |     "\n",
1246 |     "# step 4 - inspect the clustering\n",
1247 |     "clst.get_fitted_params()"
1248 |    ]
1249 |   },
1250 |   {
1251 |    "attachments": {},
1252 |    "cell_type": "markdown",
1253 |    "metadata": {},
1254 |    "source": [
1255 |     "## 5.3 Searching for estimators, estimator tags"
1256 |    ]
1257 |   },
1258 |   {
1259 |    "attachments": {},
1260 |    "cell_type": "markdown",
1261 |    "metadata": {},
1262 |    "source": [
1263 |     "Estimators in `sktime` are tagged.\n",
1264 |     "\n",
1265 |     "Tags starting with \"capability\" indicate things the estimator can or cannot do, e.g.,\n",
1266 |     "\n",
1267 |     "* `\"capability:missing_values\"` - dealing with missing values\n",
1268 |     "* `\"capability:multivariate\"` - daling with multivariate input\n",
1269 |     "* `\"capability:unequal_length\"` - deaing with time series panels where the individual time series have unequal length and/or unequal index"
1270 |    ]
1271 |   },
1272 |   {
1273 |    "attachments": {},
1274 |    "cell_type": "markdown",
1275 |    "metadata": {},
1276 |    "source": [
1277 |     "all tags for an estimator scitype (e.g., classifier, regressor) can be inspected by `sktime.registry.all_tags`:"
1278 |    ]
1279 |   },
1280 |   {
1281 |    "cell_type": "code",
1282 |    "execution_count": 28,
1283 |    "metadata": {},
1284 |    "outputs": [
1285 |     {
1286 |      "data": {
1287 |       "text/html": [
1288 |        "<div>\n",
1289 |        "<style scoped>\n",
1290 |        "    .dataframe tbody tr th:only-of-type {\n",
1291 |        "        vertical-align: middle;\n",
1292 |        "    }\n",
1293 |        "\n",
1294 |        "    .dataframe tbody tr th {\n",
1295 |        "        vertical-align: top;\n",
1296 |        "    }\n",
1297 |        "\n",
1298 |        "    .dataframe thead th {\n",
1299 |        "        text-align: right;\n",
1300 |        "    }\n",
1301 |        "</style>\n",
1302 |        "<table border=\"1\" class=\"dataframe\">\n",
1303 |        "  <thead>\n",
1304 |        "    <tr style=\"text-align: right;\">\n",
1305 |        "      <th></th>\n",
1306 |        "      <th>name</th>\n",
1307 |        "      <th>scitype</th>\n",
1308 |        "      <th>type</th>\n",
1309 |        "      <th>description</th>\n",
1310 |        "    </tr>\n",
1311 |        "  </thead>\n",
1312 |        "  <tbody>\n",
1313 |        "    <tr>\n",
1314 |        "      <th>0</th>\n",
1315 |        "      <td>capability:contractable</td>\n",
1316 |        "      <td>classifier</td>\n",
1317 |        "      <td>bool</td>\n",
1318 |        "      <td>contract time setting, does the estimator supp...</td>\n",
1319 |        "    </tr>\n",
1320 |        "    <tr>\n",
1321 |        "      <th>1</th>\n",
1322 |        "      <td>capability:missing_values</td>\n",
1323 |        "      <td>[classifier, early_classifier, param_est, regr...</td>\n",
1324 |        "      <td>bool</td>\n",
1325 |        "      <td>can the classifier handle missing data (NA, np...</td>\n",
1326 |        "    </tr>\n",
1327 |        "    <tr>\n",
1328 |        "      <th>2</th>\n",
1329 |        "      <td>capability:multithreading</td>\n",
1330 |        "      <td>[classifier, early_classifier]</td>\n",
1331 |        "      <td>bool</td>\n",
1332 |        "      <td>can the classifier set n_jobs to use multiple ...</td>\n",
1333 |        "    </tr>\n",
1334 |        "    <tr>\n",
1335 |        "      <th>3</th>\n",
1336 |        "      <td>capability:multivariate</td>\n",
1337 |        "      <td>[classifier, early_classifier, param_est, regr...</td>\n",
1338 |        "      <td>bool</td>\n",
1339 |        "      <td>can the classifier classify time series with 2...</td>\n",
1340 |        "    </tr>\n",
1341 |        "    <tr>\n",
1342 |        "      <th>4</th>\n",
1343 |        "      <td>capability:predict_proba</td>\n",
1344 |        "      <td>classifier</td>\n",
1345 |        "      <td>bool</td>\n",
1346 |        "      <td>does the classifier implement a non-default pr...</td>\n",
1347 |        "    </tr>\n",
1348 |        "    <tr>\n",
1349 |        "      <th>5</th>\n",
1350 |        "      <td>capability:train_estimate</td>\n",
1351 |        "      <td>classifier</td>\n",
1352 |        "      <td>bool</td>\n",
1353 |        "      <td>can the classifier estimate its performance on...</td>\n",
1354 |        "    </tr>\n",
1355 |        "    <tr>\n",
1356 |        "      <th>6</th>\n",
1357 |        "      <td>capability:unequal_length</td>\n",
1358 |        "      <td>[classifier, early_classifier, regressor, tran...</td>\n",
1359 |        "      <td>bool</td>\n",
1360 |        "      <td>can the estimator handle unequal length time s...</td>\n",
1361 |        "    </tr>\n",
1362 |        "    <tr>\n",
1363 |        "      <th>7</th>\n",
1364 |        "      <td>classifier_type</td>\n",
1365 |        "      <td>classifier</td>\n",
1366 |        "      <td>(list, [dictionary, distance, feature, hybrid,...</td>\n",
1367 |        "      <td>which type the classifier falls under in the t...</td>\n",
1368 |        "    </tr>\n",
1369 |        "  </tbody>\n",
1370 |        "</table>\n",
1371 |        "</div>"
1372 |       ],
1373 |       "text/plain": [
1374 |        "                        name                                            scitype                                               type                                        description\n",
1375 |        "0    capability:contractable                                         classifier                                               bool  contract time setting, does the estimator supp...\n",
1376 |        "1  capability:missing_values  [classifier, early_classifier, param_est, regr...                                               bool  can the classifier handle missing data (NA, np...\n",
1377 |        "2  capability:multithreading                     [classifier, early_classifier]                                               bool  can the classifier set n_jobs to use multiple ...\n",
1378 |        "3    capability:multivariate  [classifier, early_classifier, param_est, regr...                                               bool  can the classifier classify time series with 2...\n",
1379 |        "4   capability:predict_proba                                         classifier                                               bool  does the classifier implement a non-default pr...\n",
1380 |        "5  capability:train_estimate                                         classifier                                               bool  can the classifier estimate its performance on...\n",
1381 |        "6  capability:unequal_length  [classifier, early_classifier, regressor, tran...                                               bool  can the estimator handle unequal length time s...\n",
1382 |        "7            classifier_type                                         classifier  (list, [dictionary, distance, feature, hybrid,...  which type the classifier falls under in the t..."
1383 |       ]
1384 |      },
1385 |      "execution_count": 28,
1386 |      "metadata": {},
1387 |      "output_type": "execute_result"
1388 |     }
1389 |    ],
1390 |    "source": [
1391 |     "from sktime.registry import all_tags\n",
1392 |     "\n",
1393 |     "all_tags(\"classifier\", as_dataframe=True)"
1394 |    ]
1395 |   },
1396 |   {
1397 |    "attachments": {},
1398 |    "cell_type": "markdown",
1399 |    "metadata": {},
1400 |    "source": [
1401 |     "valid estimator types are listed in the `all_tags` docstring, or `sktime.registry.BASE_CLASS_REGISTER`"
1402 |    ]
1403 |   },
1404 |   {
1405 |    "cell_type": "code",
1406 |    "execution_count": 29,
1407 |    "metadata": {},
1408 |    "outputs": [
1409 |     {
1410 |      "data": {
1411 |       "text/plain": [
1412 |        "('object',\n",
1413 |        " 'estimator',\n",
1414 |        " 'aligner',\n",
1415 |        " 'classifier',\n",
1416 |        " 'clusterer',\n",
1417 |        " 'early_classifier',\n",
1418 |        " 'forecaster',\n",
1419 |        " 'metric',\n",
1420 |        " 'network',\n",
1421 |        " 'param_est',\n",
1422 |        " 'regressor',\n",
1423 |        " 'series-annotator',\n",
1424 |        " 'splitter',\n",
1425 |        " 'transformer',\n",
1426 |        " 'transformer-pairwise',\n",
1427 |        " 'transformer-pairwise-panel',\n",
1428 |        " 'distribution')"
1429 |       ]
1430 |      },
1431 |      "execution_count": 29,
1432 |      "metadata": {},
1433 |      "output_type": "execute_result"
1434 |     }
1435 |    ],
1436 |    "source": [
1437 |     "from sktime.registry import BASE_CLASS_REGISTER\n",
1438 |     "\n",
1439 |     "# get only fist table column, the list of types\n",
1440 |     "list(zip(*BASE_CLASS_REGISTER))[0]"
1441 |    ]
1442 |   },
1443 |   {
1444 |    "attachments": {},
1445 |    "cell_type": "markdown",
1446 |    "metadata": {},
1447 |    "source": [
1448 |     "to find all estimators of a certain type, use `sktime.registry.all_estimators`"
1449 |    ]
1450 |   },
1451 |   {
1452 |    "cell_type": "code",
1453 |    "execution_count": 30,
1454 |    "metadata": {},
1455 |    "outputs": [
1456 |     {
1457 |      "data": {
1458 |       "text/html": [
1459 |        "<div>\n",
1460 |        "<style scoped>\n",
1461 |        "    .dataframe tbody tr th:only-of-type {\n",
1462 |        "        vertical-align: middle;\n",
1463 |        "    }\n",
1464 |        "\n",
1465 |        "    .dataframe tbody tr th {\n",
1466 |        "        vertical-align: top;\n",
1467 |        "    }\n",
1468 |        "\n",
1469 |        "    .dataframe thead th {\n",
1470 |        "        text-align: right;\n",
1471 |        "    }\n",
1472 |        "</style>\n",
1473 |        "<table border=\"1\" class=\"dataframe\">\n",
1474 |        "  <thead>\n",
1475 |        "    <tr style=\"text-align: right;\">\n",
1476 |        "      <th></th>\n",
1477 |        "      <th>name</th>\n",
1478 |        "      <th>object</th>\n",
1479 |        "    </tr>\n",
1480 |        "  </thead>\n",
1481 |        "  <tbody>\n",
1482 |        "    <tr>\n",
1483 |        "      <th>0</th>\n",
1484 |        "      <td>Arsenal</td>\n",
1485 |        "      <td>&lt;class 'sktime.classification.kernel_based._ar...</td>\n",
1486 |        "    </tr>\n",
1487 |        "    <tr>\n",
1488 |        "      <th>1</th>\n",
1489 |        "      <td>BOSSEnsemble</td>\n",
1490 |        "      <td>&lt;class 'sktime.classification.dictionary_based...</td>\n",
1491 |        "    </tr>\n",
1492 |        "    <tr>\n",
1493 |        "      <th>2</th>\n",
1494 |        "      <td>BaggingClassifier</td>\n",
1495 |        "      <td>&lt;class 'sktime.classification.ensemble._baggin...</td>\n",
1496 |        "    </tr>\n",
1497 |        "    <tr>\n",
1498 |        "      <th>3</th>\n",
1499 |        "      <td>CNNClassifier</td>\n",
1500 |        "      <td>&lt;class 'sktime.classification.deep_learning.cn...</td>\n",
1501 |        "    </tr>\n",
1502 |        "    <tr>\n",
1503 |        "      <th>4</th>\n",
1504 |        "      <td>CanonicalIntervalForest</td>\n",
1505 |        "      <td>&lt;class 'sktime.classification.interval_based._...</td>\n",
1506 |        "    </tr>\n",
1507 |        "    <tr>\n",
1508 |        "      <th>5</th>\n",
1509 |        "      <td>Catch22Classifier</td>\n",
1510 |        "      <td>&lt;class 'sktime.classification.feature_based._c...</td>\n",
1511 |        "    </tr>\n",
1512 |        "    <tr>\n",
1513 |        "      <th>6</th>\n",
1514 |        "      <td>ClassifierPipeline</td>\n",
1515 |        "      <td>&lt;class 'sktime.classification.compose._pipelin...</td>\n",
1516 |        "    </tr>\n",
1517 |        "    <tr>\n",
1518 |        "      <th>7</th>\n",
1519 |        "      <td>ColumnEnsembleClassifier</td>\n",
1520 |        "      <td>&lt;class 'sktime.classification.compose._column_...</td>\n",
1521 |        "    </tr>\n",
1522 |        "    <tr>\n",
1523 |        "      <th>8</th>\n",
1524 |        "      <td>ComposableTimeSeriesForestClassifier</td>\n",
1525 |        "      <td>&lt;class 'sktime.classification.ensemble._ctsf.C...</td>\n",
1526 |        "    </tr>\n",
1527 |        "    <tr>\n",
1528 |        "      <th>9</th>\n",
1529 |        "      <td>ContractableBOSS</td>\n",
1530 |        "      <td>&lt;class 'sktime.classification.dictionary_based...</td>\n",
1531 |        "    </tr>\n",
1532 |        "    <tr>\n",
1533 |        "      <th>10</th>\n",
1534 |        "      <td>DrCIF</td>\n",
1535 |        "      <td>&lt;class 'sktime.classification.interval_based._...</td>\n",
1536 |        "    </tr>\n",
1537 |        "    <tr>\n",
1538 |        "      <th>11</th>\n",
1539 |        "      <td>DummyClassifier</td>\n",
1540 |        "      <td>&lt;class 'sktime.classification.dummy._dummy.Dum...</td>\n",
1541 |        "    </tr>\n",
1542 |        "    <tr>\n",
1543 |        "      <th>12</th>\n",
1544 |        "      <td>ElasticEnsemble</td>\n",
1545 |        "      <td>&lt;class 'sktime.classification.distance_based._...</td>\n",
1546 |        "    </tr>\n",
1547 |        "    <tr>\n",
1548 |        "      <th>13</th>\n",
1549 |        "      <td>FCNClassifier</td>\n",
1550 |        "      <td>&lt;class 'sktime.classification.deep_learning.fc...</td>\n",
1551 |        "    </tr>\n",
1552 |        "    <tr>\n",
1553 |        "      <th>14</th>\n",
1554 |        "      <td>FreshPRINCE</td>\n",
1555 |        "      <td>&lt;class 'sktime.classification.feature_based._f...</td>\n",
1556 |        "    </tr>\n",
1557 |        "    <tr>\n",
1558 |        "      <th>15</th>\n",
1559 |        "      <td>HIVECOTEV1</td>\n",
1560 |        "      <td>&lt;class 'sktime.classification.hybrid._hivecote...</td>\n",
1561 |        "    </tr>\n",
1562 |        "    <tr>\n",
1563 |        "      <th>16</th>\n",
1564 |        "      <td>HIVECOTEV2</td>\n",
1565 |        "      <td>&lt;class 'sktime.classification.hybrid._hivecote...</td>\n",
1566 |        "    </tr>\n",
1567 |        "    <tr>\n",
1568 |        "      <th>17</th>\n",
1569 |        "      <td>InceptionTimeClassifier</td>\n",
1570 |        "      <td>&lt;class 'sktime.classification.deep_learning.in...</td>\n",
1571 |        "    </tr>\n",
1572 |        "    <tr>\n",
1573 |        "      <th>18</th>\n",
1574 |        "      <td>IndividualBOSS</td>\n",
1575 |        "      <td>&lt;class 'sktime.classification.dictionary_based...</td>\n",
1576 |        "    </tr>\n",
1577 |        "    <tr>\n",
1578 |        "      <th>19</th>\n",
1579 |        "      <td>IndividualTDE</td>\n",
1580 |        "      <td>&lt;class 'sktime.classification.dictionary_based...</td>\n",
1581 |        "    </tr>\n",
1582 |        "    <tr>\n",
1583 |        "      <th>20</th>\n",
1584 |        "      <td>KNeighborsTimeSeriesClassifier</td>\n",
1585 |        "      <td>&lt;class 'sktime.classification.distance_based._...</td>\n",
1586 |        "    </tr>\n",
1587 |        "    <tr>\n",
1588 |        "      <th>21</th>\n",
1589 |        "      <td>LSTMFCNClassifier</td>\n",
1590 |        "      <td>&lt;class 'sktime.classification.deep_learning.ls...</td>\n",
1591 |        "    </tr>\n",
1592 |        "    <tr>\n",
1593 |        "      <th>22</th>\n",
1594 |        "      <td>MACNNClassifier</td>\n",
1595 |        "      <td>&lt;class 'sktime.classification.deep_learning.ma...</td>\n",
1596 |        "    </tr>\n",
1597 |        "    <tr>\n",
1598 |        "      <th>23</th>\n",
1599 |        "      <td>MLPClassifier</td>\n",
1600 |        "      <td>&lt;class 'sktime.classification.deep_learning.ml...</td>\n",
1601 |        "    </tr>\n",
1602 |        "    <tr>\n",
1603 |        "      <th>24</th>\n",
1604 |        "      <td>MUSE</td>\n",
1605 |        "      <td>&lt;class 'sktime.classification.dictionary_based...</td>\n",
1606 |        "    </tr>\n",
1607 |        "    <tr>\n",
1608 |        "      <th>25</th>\n",
1609 |        "      <td>MatrixProfileClassifier</td>\n",
1610 |        "      <td>&lt;class 'sktime.classification.feature_based._m...</td>\n",
1611 |        "    </tr>\n",
1612 |        "    <tr>\n",
1613 |        "      <th>26</th>\n",
1614 |        "      <td>MrSQM</td>\n",
1615 |        "      <td>&lt;class 'sktime.classification.shapelet_based._...</td>\n",
1616 |        "    </tr>\n",
1617 |        "    <tr>\n",
1618 |        "      <th>27</th>\n",
1619 |        "      <td>ProbabilityThresholdEarlyClassifier</td>\n",
1620 |        "      <td>&lt;class 'sktime.classification.early_classifica...</td>\n",
1621 |        "    </tr>\n",
1622 |        "    <tr>\n",
1623 |        "      <th>28</th>\n",
1624 |        "      <td>ProximityForest</td>\n",
1625 |        "      <td>&lt;class 'sktime.classification.distance_based._...</td>\n",
1626 |        "    </tr>\n",
1627 |        "    <tr>\n",
1628 |        "      <th>29</th>\n",
1629 |        "      <td>ProximityStump</td>\n",
1630 |        "      <td>&lt;class 'sktime.classification.distance_based._...</td>\n",
1631 |        "    </tr>\n",
1632 |        "    <tr>\n",
1633 |        "      <th>30</th>\n",
1634 |        "      <td>ProximityTree</td>\n",
1635 |        "      <td>&lt;class 'sktime.classification.distance_based._...</td>\n",
1636 |        "    </tr>\n",
1637 |        "    <tr>\n",
1638 |        "      <th>31</th>\n",
1639 |        "      <td>RandomIntervalClassifier</td>\n",
1640 |        "      <td>&lt;class 'sktime.classification.feature_based._r...</td>\n",
1641 |        "    </tr>\n",
1642 |        "    <tr>\n",
1643 |        "      <th>32</th>\n",
1644 |        "      <td>RandomIntervalSpectralEnsemble</td>\n",
1645 |        "      <td>&lt;class 'sktime.classification.interval_based._...</td>\n",
1646 |        "    </tr>\n",
1647 |        "    <tr>\n",
1648 |        "      <th>33</th>\n",
1649 |        "      <td>ResNetClassifier</td>\n",
1650 |        "      <td>&lt;class 'sktime.classification.deep_learning.re...</td>\n",
1651 |        "    </tr>\n",
1652 |        "    <tr>\n",
1653 |        "      <th>34</th>\n",
1654 |        "      <td>RocketClassifier</td>\n",
1655 |        "      <td>&lt;class 'sktime.classification.kernel_based._ro...</td>\n",
1656 |        "    </tr>\n",
1657 |        "    <tr>\n",
1658 |        "      <th>35</th>\n",
1659 |        "      <td>ShapeDTW</td>\n",
1660 |        "      <td>&lt;class 'sktime.classification.distance_based._...</td>\n",
1661 |        "    </tr>\n",
1662 |        "    <tr>\n",
1663 |        "      <th>36</th>\n",
1664 |        "      <td>ShapeletTransformClassifier</td>\n",
1665 |        "      <td>&lt;class 'sktime.classification.shapelet_based._...</td>\n",
1666 |        "    </tr>\n",
1667 |        "    <tr>\n",
1668 |        "      <th>37</th>\n",
1669 |        "      <td>SignatureClassifier</td>\n",
1670 |        "      <td>&lt;class 'sktime.classification.feature_based._s...</td>\n",
1671 |        "    </tr>\n",
1672 |        "    <tr>\n",
1673 |        "      <th>38</th>\n",
1674 |        "      <td>SimpleRNNClassifier</td>\n",
1675 |        "      <td>&lt;class 'sktime.classification.deep_learning.rn...</td>\n",
1676 |        "    </tr>\n",
1677 |        "    <tr>\n",
1678 |        "      <th>39</th>\n",
1679 |        "      <td>SklearnClassifierPipeline</td>\n",
1680 |        "      <td>&lt;class 'sktime.classification.compose._pipelin...</td>\n",
1681 |        "    </tr>\n",
1682 |        "    <tr>\n",
1683 |        "      <th>40</th>\n",
1684 |        "      <td>SummaryClassifier</td>\n",
1685 |        "      <td>&lt;class 'sktime.classification.feature_based._s...</td>\n",
1686 |        "    </tr>\n",
1687 |        "    <tr>\n",
1688 |        "      <th>41</th>\n",
1689 |        "      <td>SupervisedTimeSeriesForest</td>\n",
1690 |        "      <td>&lt;class 'sktime.classification.interval_based._...</td>\n",
1691 |        "    </tr>\n",
1692 |        "    <tr>\n",
1693 |        "      <th>42</th>\n",
1694 |        "      <td>TSFreshClassifier</td>\n",
1695 |        "      <td>&lt;class 'sktime.classification.feature_based._t...</td>\n",
1696 |        "    </tr>\n",
1697 |        "    <tr>\n",
1698 |        "      <th>43</th>\n",
1699 |        "      <td>TapNetClassifier</td>\n",
1700 |        "      <td>&lt;class 'sktime.classification.deep_learning.ta...</td>\n",
1701 |        "    </tr>\n",
1702 |        "    <tr>\n",
1703 |        "      <th>44</th>\n",
1704 |        "      <td>TemporalDictionaryEnsemble</td>\n",
1705 |        "      <td>&lt;class 'sktime.classification.dictionary_based...</td>\n",
1706 |        "    </tr>\n",
1707 |        "    <tr>\n",
1708 |        "      <th>45</th>\n",
1709 |        "      <td>TimeSeriesForestClassifier</td>\n",
1710 |        "      <td>&lt;class 'sktime.classification.interval_based._...</td>\n",
1711 |        "    </tr>\n",
1712 |        "    <tr>\n",
1713 |        "      <th>46</th>\n",
1714 |        "      <td>TimeSeriesSVC</td>\n",
1715 |        "      <td>&lt;class 'sktime.classification.kernel_based._sv...</td>\n",
1716 |        "    </tr>\n",
1717 |        "    <tr>\n",
1718 |        "      <th>47</th>\n",
1719 |        "      <td>WEASEL</td>\n",
1720 |        "      <td>&lt;class 'sktime.classification.dictionary_based...</td>\n",
1721 |        "    </tr>\n",
1722 |        "    <tr>\n",
1723 |        "      <th>48</th>\n",
1724 |        "      <td>WeightedEnsembleClassifier</td>\n",
1725 |        "      <td>&lt;class 'sktime.classification.ensemble._weight...</td>\n",
1726 |        "    </tr>\n",
1727 |        "  </tbody>\n",
1728 |        "</table>\n",
1729 |        "</div>"
1730 |       ],
1731 |       "text/plain": [
1732 |        "                                    name                                             object\n",
1733 |        "0                                Arsenal  <class 'sktime.classification.kernel_based._ar...\n",
1734 |        "1                           BOSSEnsemble  <class 'sktime.classification.dictionary_based...\n",
1735 |        "2                      BaggingClassifier  <class 'sktime.classification.ensemble._baggin...\n",
1736 |        "3                          CNNClassifier  <class 'sktime.classification.deep_learning.cn...\n",
1737 |        "4                CanonicalIntervalForest  <class 'sktime.classification.interval_based._...\n",
1738 |        "5                      Catch22Classifier  <class 'sktime.classification.feature_based._c...\n",
1739 |        "6                     ClassifierPipeline  <class 'sktime.classification.compose._pipelin...\n",
1740 |        "7               ColumnEnsembleClassifier  <class 'sktime.classification.compose._column_...\n",
1741 |        "8   ComposableTimeSeriesForestClassifier  <class 'sktime.classification.ensemble._ctsf.C...\n",
1742 |        "9                       ContractableBOSS  <class 'sktime.classification.dictionary_based...\n",
1743 |        "10                                 DrCIF  <class 'sktime.classification.interval_based._...\n",
1744 |        "11                       DummyClassifier  <class 'sktime.classification.dummy._dummy.Dum...\n",
1745 |        "12                       ElasticEnsemble  <class 'sktime.classification.distance_based._...\n",
1746 |        "13                         FCNClassifier  <class 'sktime.classification.deep_learning.fc...\n",
1747 |        "14                           FreshPRINCE  <class 'sktime.classification.feature_based._f...\n",
1748 |        "15                            HIVECOTEV1  <class 'sktime.classification.hybrid._hivecote...\n",
1749 |        "16                            HIVECOTEV2  <class 'sktime.classification.hybrid._hivecote...\n",
1750 |        "17               InceptionTimeClassifier  <class 'sktime.classification.deep_learning.in...\n",
1751 |        "18                        IndividualBOSS  <class 'sktime.classification.dictionary_based...\n",
1752 |        "19                         IndividualTDE  <class 'sktime.classification.dictionary_based...\n",
1753 |        "20        KNeighborsTimeSeriesClassifier  <class 'sktime.classification.distance_based._...\n",
1754 |        "21                     LSTMFCNClassifier  <class 'sktime.classification.deep_learning.ls...\n",
1755 |        "22                       MACNNClassifier  <class 'sktime.classification.deep_learning.ma...\n",
1756 |        "23                         MLPClassifier  <class 'sktime.classification.deep_learning.ml...\n",
1757 |        "24                                  MUSE  <class 'sktime.classification.dictionary_based...\n",
1758 |        "25               MatrixProfileClassifier  <class 'sktime.classification.feature_based._m...\n",
1759 |        "26                                 MrSQM  <class 'sktime.classification.shapelet_based._...\n",
1760 |        "27   ProbabilityThresholdEarlyClassifier  <class 'sktime.classification.early_classifica...\n",
1761 |        "28                       ProximityForest  <class 'sktime.classification.distance_based._...\n",
1762 |        "29                        ProximityStump  <class 'sktime.classification.distance_based._...\n",
1763 |        "30                         ProximityTree  <class 'sktime.classification.distance_based._...\n",
1764 |        "31              RandomIntervalClassifier  <class 'sktime.classification.feature_based._r...\n",
1765 |        "32        RandomIntervalSpectralEnsemble  <class 'sktime.classification.interval_based._...\n",
1766 |        "33                      ResNetClassifier  <class 'sktime.classification.deep_learning.re...\n",
1767 |        "34                      RocketClassifier  <class 'sktime.classification.kernel_based._ro...\n",
1768 |        "35                              ShapeDTW  <class 'sktime.classification.distance_based._...\n",
1769 |        "36           ShapeletTransformClassifier  <class 'sktime.classification.shapelet_based._...\n",
1770 |        "37                   SignatureClassifier  <class 'sktime.classification.feature_based._s...\n",
1771 |        "38                   SimpleRNNClassifier  <class 'sktime.classification.deep_learning.rn...\n",
1772 |        "39             SklearnClassifierPipeline  <class 'sktime.classification.compose._pipelin...\n",
1773 |        "40                     SummaryClassifier  <class 'sktime.classification.feature_based._s...\n",
1774 |        "41            SupervisedTimeSeriesForest  <class 'sktime.classification.interval_based._...\n",
1775 |        "42                     TSFreshClassifier  <class 'sktime.classification.feature_based._t...\n",
1776 |        "43                      TapNetClassifier  <class 'sktime.classification.deep_learning.ta...\n",
1777 |        "44            TemporalDictionaryEnsemble  <class 'sktime.classification.dictionary_based...\n",
1778 |        "45            TimeSeriesForestClassifier  <class 'sktime.classification.interval_based._...\n",
1779 |        "46                         TimeSeriesSVC  <class 'sktime.classification.kernel_based._sv...\n",
1780 |        "47                                WEASEL  <class 'sktime.classification.dictionary_based...\n",
1781 |        "48            WeightedEnsembleClassifier  <class 'sktime.classification.ensemble._weight..."
1782 |       ]
1783 |      },
1784 |      "execution_count": 30,
1785 |      "metadata": {},
1786 |      "output_type": "execute_result"
1787 |     }
1788 |    ],
1789 |    "source": [
1790 |     "# list all classifiers in sktime\n",
1791 |     "from sktime.registry import all_estimators\n",
1792 |     "\n",
1793 |     "all_estimators(\"classifier\", as_dataframe=True)"
1794 |    ]
1795 |   },
1796 |   {
1797 |    "attachments": {},
1798 |    "cell_type": "markdown",
1799 |    "metadata": {},
1800 |    "source": [
1801 |     "for listing all estimators of a certain type with a certain capability,\n",
1802 |     "use the `filter_tags` argument of `all_estimators`:"
1803 |    ]
1804 |   },
1805 |   {
1806 |    "cell_type": "code",
1807 |    "execution_count": 31,
1808 |    "metadata": {},
1809 |    "outputs": [
1810 |     {
1811 |      "data": {
1812 |       "text/html": [
1813 |        "<div>\n",
1814 |        "<style scoped>\n",
1815 |        "    .dataframe tbody tr th:only-of-type {\n",
1816 |        "        vertical-align: middle;\n",
1817 |        "    }\n",
1818 |        "\n",
1819 |        "    .dataframe tbody tr th {\n",
1820 |        "        vertical-align: top;\n",
1821 |        "    }\n",
1822 |        "\n",
1823 |        "    .dataframe thead th {\n",
1824 |        "        text-align: right;\n",
1825 |        "    }\n",
1826 |        "</style>\n",
1827 |        "<table border=\"1\" class=\"dataframe\">\n",
1828 |        "  <thead>\n",
1829 |        "    <tr style=\"text-align: right;\">\n",
1830 |        "      <th></th>\n",
1831 |        "      <th>name</th>\n",
1832 |        "      <th>object</th>\n",
1833 |        "    </tr>\n",
1834 |        "  </thead>\n",
1835 |        "  <tbody>\n",
1836 |        "    <tr>\n",
1837 |        "      <th>0</th>\n",
1838 |        "      <td>BaggingClassifier</td>\n",
1839 |        "      <td>&lt;class 'sktime.classification.ensemble._baggin...</td>\n",
1840 |        "    </tr>\n",
1841 |        "    <tr>\n",
1842 |        "      <th>1</th>\n",
1843 |        "      <td>DummyClassifier</td>\n",
1844 |        "      <td>&lt;class 'sktime.classification.dummy._dummy.Dum...</td>\n",
1845 |        "    </tr>\n",
1846 |        "    <tr>\n",
1847 |        "      <th>2</th>\n",
1848 |        "      <td>KNeighborsTimeSeriesClassifier</td>\n",
1849 |        "      <td>&lt;class 'sktime.classification.distance_based._...</td>\n",
1850 |        "    </tr>\n",
1851 |        "    <tr>\n",
1852 |        "      <th>3</th>\n",
1853 |        "      <td>SklearnClassifierPipeline</td>\n",
1854 |        "      <td>&lt;class 'sktime.classification.compose._pipelin...</td>\n",
1855 |        "    </tr>\n",
1856 |        "    <tr>\n",
1857 |        "      <th>4</th>\n",
1858 |        "      <td>TimeSeriesSVC</td>\n",
1859 |        "      <td>&lt;class 'sktime.classification.kernel_based._sv...</td>\n",
1860 |        "    </tr>\n",
1861 |        "    <tr>\n",
1862 |        "      <th>5</th>\n",
1863 |        "      <td>WeightedEnsembleClassifier</td>\n",
1864 |        "      <td>&lt;class 'sktime.classification.ensemble._weight...</td>\n",
1865 |        "    </tr>\n",
1866 |        "  </tbody>\n",
1867 |        "</table>\n",
1868 |        "</div>"
1869 |       ],
1870 |       "text/plain": [
1871 |        "                             name                                             object\n",
1872 |        "0               BaggingClassifier  <class 'sktime.classification.ensemble._baggin...\n",
1873 |        "1                 DummyClassifier  <class 'sktime.classification.dummy._dummy.Dum...\n",
1874 |        "2  KNeighborsTimeSeriesClassifier  <class 'sktime.classification.distance_based._...\n",
1875 |        "3       SklearnClassifierPipeline  <class 'sktime.classification.compose._pipelin...\n",
1876 |        "4                   TimeSeriesSVC  <class 'sktime.classification.kernel_based._sv...\n",
1877 |        "5      WeightedEnsembleClassifier  <class 'sktime.classification.ensemble._weight..."
1878 |       ]
1879 |      },
1880 |      "execution_count": 31,
1881 |      "metadata": {},
1882 |      "output_type": "execute_result"
1883 |     }
1884 |    ],
1885 |    "source": [
1886 |     "# list all classifiers in sktime\n",
1887 |     "# that can classify panels of time series containing missing data\n",
1888 |     "from sktime.registry import all_estimators\n",
1889 |     "\n",
1890 |     "all_estimators(\"classifier\", as_dataframe=True, filter_tags={\"capability:missing_values\": True})"
1891 |    ]
1892 |   },
1893 |   {
1894 |    "attachments": {},
1895 |    "cell_type": "markdown",
1896 |    "metadata": {},
1897 |    "source": [
1898 |     "side note:\n",
1899 |     "\n",
1900 |     "don't worry about how short the list is - when in doubt, it is always possible to pipeline with `Imputer`\n",
1901 |     "\n",
1902 |     "as in the next section :-)"
1903 |    ]
1904 |   },
1905 |   {
1906 |    "attachments": {},
1907 |    "cell_type": "markdown",
1908 |    "metadata": {},
1909 |    "source": [
1910 |     "## 5.4 Pipelines, Feature Extraction, Tuning, Composition\n"
1911 |    ]
1912 |   },
1913 |   {
1914 |    "attachments": {},
1915 |    "cell_type": "markdown",
1916 |    "metadata": {},
1917 |    "source": [
1918 |     "similar to `sklearn` for \"tabular\" classification, regression, etc,\n",
1919 |     "\n",
1920 |     "`sktime` has a rich set of tools for:\n",
1921 |     "\n",
1922 |     "* feature extraction via transformers\n",
1923 |     "* pipeline transformers with any estimator\n",
1924 |     "* tuning individual estimators or pipelines via grid search and similar\n",
1925 |     "* building ensembles out of individual estimators, or other composites\n",
1926 |     "\n",
1927 |     "`sktime` is also fully interoperable with `sklearn` interface if `numpy` based data mtypes are used\n",
1928 |     "\n",
1929 |     "(although this loses support for unequal length time series)"
1930 |    ]
1931 |   },
1932 |   {
1933 |    "attachments": {},
1934 |    "cell_type": "markdown",
1935 |    "metadata": {},
1936 |    "source": [
1937 |     "### 5.4.1 Primer on `sktime` transformers for feature extraction"
1938 |    ]
1939 |   },
1940 |   {
1941 |    "attachments": {},
1942 |    "cell_type": "markdown",
1943 |    "metadata": {},
1944 |    "source": [
1945 |     "all `sktime` transformers work natively with panel data:"
1946 |    ]
1947 |   },
1948 |   {
1949 |    "cell_type": "code",
1950 |    "execution_count": 32,
1951 |    "metadata": {},
1952 |    "outputs": [
1953 |     {
1954 |      "data": {
1955 |       "text/html": [
1956 |        "<div>\n",
1957 |        "<style scoped>\n",
1958 |        "    .dataframe tbody tr th:only-of-type {\n",
1959 |        "        vertical-align: middle;\n",
1960 |        "    }\n",
1961 |        "\n",
1962 |        "    .dataframe tbody tr th {\n",
1963 |        "        vertical-align: top;\n",
1964 |        "    }\n",
1965 |        "\n",
1966 |        "    .dataframe thead th {\n",
1967 |        "        text-align: right;\n",
1968 |        "    }\n",
1969 |        "</style>\n",
1970 |        "<table border=\"1\" class=\"dataframe\">\n",
1971 |        "  <thead>\n",
1972 |        "    <tr style=\"text-align: right;\">\n",
1973 |        "      <th></th>\n",
1974 |        "      <th></th>\n",
1975 |        "      <th>dim_0</th>\n",
1976 |        "    </tr>\n",
1977 |        "    <tr>\n",
1978 |        "      <th></th>\n",
1979 |        "      <th>timepoints</th>\n",
1980 |        "      <th></th>\n",
1981 |        "    </tr>\n",
1982 |        "  </thead>\n",
1983 |        "  <tbody>\n",
1984 |        "    <tr>\n",
1985 |        "      <th rowspan=\"5\" valign=\"top\">0</th>\n",
1986 |        "      <th>0</th>\n",
1987 |        "      <td>0.267711</td>\n",
1988 |        "    </tr>\n",
1989 |        "    <tr>\n",
1990 |        "      <th>1</th>\n",
1991 |        "      <td>-0.290155</td>\n",
1992 |        "    </tr>\n",
1993 |        "    <tr>\n",
1994 |        "      <th>2</th>\n",
1995 |        "      <td>-0.564339</td>\n",
1996 |        "    </tr>\n",
1997 |        "    <tr>\n",
1998 |        "      <th>3</th>\n",
1999 |        "      <td>-0.870044</td>\n",
2000 |        "    </tr>\n",
2001 |        "    <tr>\n",
2002 |        "      <th>4</th>\n",
2003 |        "      <td>-0.829027</td>\n",
2004 |        "    </tr>\n",
2005 |        "    <tr>\n",
2006 |        "      <th>...</th>\n",
2007 |        "      <th>...</th>\n",
2008 |        "      <td>...</td>\n",
2009 |        "    </tr>\n",
2010 |        "    <tr>\n",
2011 |        "      <th rowspan=\"5\" valign=\"top\">1095</th>\n",
2012 |        "      <th>19</th>\n",
2013 |        "      <td>-0.425904</td>\n",
2014 |        "    </tr>\n",
2015 |        "    <tr>\n",
2016 |        "      <th>20</th>\n",
2017 |        "      <td>-0.781304</td>\n",
2018 |        "    </tr>\n",
2019 |        "    <tr>\n",
2020 |        "      <th>21</th>\n",
2021 |        "      <td>-0.038512</td>\n",
2022 |        "    </tr>\n",
2023 |        "    <tr>\n",
2024 |        "      <th>22</th>\n",
2025 |        "      <td>-0.637956</td>\n",
2026 |        "    </tr>\n",
2027 |        "    <tr>\n",
2028 |        "      <th>23</th>\n",
2029 |        "      <td>-0.932346</td>\n",
2030 |        "    </tr>\n",
2031 |        "  </tbody>\n",
2032 |        "</table>\n",
2033 |        "<p>26304 rows × 1 columns</p>\n",
2034 |        "</div>"
2035 |       ],
2036 |       "text/plain": [
2037 |        "                    dim_0\n",
2038 |        "     timepoints          \n",
2039 |        "0    0           0.267711\n",
2040 |        "     1          -0.290155\n",
2041 |        "     2          -0.564339\n",
2042 |        "     3          -0.870044\n",
2043 |        "     4          -0.829027\n",
2044 |        "...                   ...\n",
2045 |        "1095 19         -0.425904\n",
2046 |        "     20         -0.781304\n",
2047 |        "     21         -0.038512\n",
2048 |        "     22         -0.637956\n",
2049 |        "     23         -0.932346\n",
2050 |        "\n",
2051 |        "[26304 rows x 1 columns]"
2052 |       ]
2053 |      },
2054 |      "execution_count": 32,
2055 |      "metadata": {},
2056 |      "output_type": "execute_result"
2057 |     }
2058 |    ],
2059 |    "source": [
2060 |     "from sktime.datasets import load_italy_power_demand\n",
2061 |     "from sktime.transformations.series.detrend import Detrender\n",
2062 |     "\n",
2063 |     "# load some panel data\n",
2064 |     "X, _ = load_italy_power_demand(return_type=\"pd-multiindex\")\n",
2065 |     "\n",
2066 |     "# specify a linear detrender\n",
2067 |     "detrender = Detrender()\n",
2068 |     "\n",
2069 |     "# detrend X by removing linear trend from each instance\n",
2070 |     "X_detrended = detrender.fit_transform(X)\n",
2071 |     "X_detrended"
2072 |    ]
2073 |   },
2074 |   {
2075 |    "attachments": {},
2076 |    "cell_type": "markdown",
2077 |    "metadata": {},
2078 |    "source": [
2079 |     "for panel tasks such as TSC, TSR, clustering, there are two distinctions to be aware of:\n",
2080 |     "\n",
2081 |     "* series-to-series transformers transform individual series to series, panels to panels. E.g., instance-wise detrender above\n",
2082 |     "* series-to-primitive transformers transform individual series to a set of tabular features. E>g., summary feature extractor\n",
2083 |     "\n",
2084 |     "either type of transform can be instance-wise:\n",
2085 |     "\n",
2086 |     "* instance-wise transforms use only the i-th series to transform the i-th series. E.g., instance-wise detrender\n",
2087 |     "* non-instance-wise transforms train on all series to transform the i-th series. E.g., PCA, overall mean detrender"
2088 |    ]
2089 |   },
2090 |   {
2091 |    "cell_type": "code",
2092 |    "execution_count": 33,
2093 |    "metadata": {},
2094 |    "outputs": [
2095 |     {
2096 |      "data": {
2097 |       "text/html": [
2098 |        "<div>\n",
2099 |        "<style scoped>\n",
2100 |        "    .dataframe tbody tr th:only-of-type {\n",
2101 |        "        vertical-align: middle;\n",
2102 |        "    }\n",
2103 |        "\n",
2104 |        "    .dataframe tbody tr th {\n",
2105 |        "        vertical-align: top;\n",
2106 |        "    }\n",
2107 |        "\n",
2108 |        "    .dataframe thead th {\n",
2109 |        "        text-align: right;\n",
2110 |        "    }\n",
2111 |        "</style>\n",
2112 |        "<table border=\"1\" class=\"dataframe\">\n",
2113 |        "  <thead>\n",
2114 |        "    <tr style=\"text-align: right;\">\n",
2115 |        "      <th></th>\n",
2116 |        "      <th>mean</th>\n",
2117 |        "      <th>std</th>\n",
2118 |        "      <th>min</th>\n",
2119 |        "      <th>max</th>\n",
2120 |        "      <th>0.1</th>\n",
2121 |        "      <th>0.25</th>\n",
2122 |        "      <th>0.5</th>\n",
2123 |        "      <th>0.75</th>\n",
2124 |        "      <th>0.9</th>\n",
2125 |        "    </tr>\n",
2126 |        "  </thead>\n",
2127 |        "  <tbody>\n",
2128 |        "    <tr>\n",
2129 |        "      <th>0</th>\n",
2130 |        "      <td>-1.041667e-09</td>\n",
2131 |        "      <td>1.0</td>\n",
2132 |        "      <td>-1.593083</td>\n",
2133 |        "      <td>1.464375</td>\n",
2134 |        "      <td>-1.372442</td>\n",
2135 |        "      <td>-0.805078</td>\n",
2136 |        "      <td>0.030207</td>\n",
2137 |        "      <td>0.936412</td>\n",
2138 |        "      <td>1.218518</td>\n",
2139 |        "    </tr>\n",
2140 |        "    <tr>\n",
2141 |        "      <th>1</th>\n",
2142 |        "      <td>-1.958333e-09</td>\n",
2143 |        "      <td>1.0</td>\n",
2144 |        "      <td>-1.630917</td>\n",
2145 |        "      <td>1.201393</td>\n",
2146 |        "      <td>-1.533955</td>\n",
2147 |        "      <td>-0.999388</td>\n",
2148 |        "      <td>0.384871</td>\n",
2149 |        "      <td>0.735720</td>\n",
2150 |        "      <td>1.084018</td>\n",
2151 |        "    </tr>\n",
2152 |        "    <tr>\n",
2153 |        "      <th>2</th>\n",
2154 |        "      <td>-1.775000e-09</td>\n",
2155 |        "      <td>1.0</td>\n",
2156 |        "      <td>-1.397118</td>\n",
2157 |        "      <td>2.349344</td>\n",
2158 |        "      <td>-1.003740</td>\n",
2159 |        "      <td>-0.741487</td>\n",
2160 |        "      <td>-0.132687</td>\n",
2161 |        "      <td>0.265374</td>\n",
2162 |        "      <td>1.515756</td>\n",
2163 |        "    </tr>\n",
2164 |        "    <tr>\n",
2165 |        "      <th>3</th>\n",
2166 |        "      <td>-8.541667e-10</td>\n",
2167 |        "      <td>1.0</td>\n",
2168 |        "      <td>-1.646458</td>\n",
2169 |        "      <td>1.344487</td>\n",
2170 |        "      <td>-1.476779</td>\n",
2171 |        "      <td>-0.898722</td>\n",
2172 |        "      <td>0.266022</td>\n",
2173 |        "      <td>0.776495</td>\n",
2174 |        "      <td>1.039641</td>\n",
2175 |        "    </tr>\n",
2176 |        "    <tr>\n",
2177 |        "      <th>4</th>\n",
2178 |        "      <td>-3.416667e-09</td>\n",
2179 |        "      <td>1.0</td>\n",
2180 |        "      <td>-1.620240</td>\n",
2181 |        "      <td>1.303502</td>\n",
2182 |        "      <td>-1.511644</td>\n",
2183 |        "      <td>-0.978061</td>\n",
2184 |        "      <td>0.405495</td>\n",
2185 |        "      <td>0.692648</td>\n",
2186 |        "      <td>1.061249</td>\n",
2187 |        "    </tr>\n",
2188 |        "    <tr>\n",
2189 |        "      <th>...</th>\n",
2190 |        "      <td>...</td>\n",
2191 |        "      <td>...</td>\n",
2192 |        "      <td>...</td>\n",
2193 |        "      <td>...</td>\n",
2194 |        "      <td>...</td>\n",
2195 |        "      <td>...</td>\n",
2196 |        "      <td>...</td>\n",
2197 |        "      <td>...</td>\n",
2198 |        "      <td>...</td>\n",
2199 |        "    </tr>\n",
2200 |        "    <tr>\n",
2201 |        "      <th>1091</th>\n",
2202 |        "      <td>-1.041667e-09</td>\n",
2203 |        "      <td>1.0</td>\n",
2204 |        "      <td>-1.817799</td>\n",
2205 |        "      <td>1.630397</td>\n",
2206 |        "      <td>-1.323058</td>\n",
2207 |        "      <td>-0.643414</td>\n",
2208 |        "      <td>0.081208</td>\n",
2209 |        "      <td>0.568453</td>\n",
2210 |        "      <td>1.390523</td>\n",
2211 |        "    </tr>\n",
2212 |        "    <tr>\n",
2213 |        "      <th>1092</th>\n",
2214 |        "      <td>-4.166666e-10</td>\n",
2215 |        "      <td>1.0</td>\n",
2216 |        "      <td>-1.550077</td>\n",
2217 |        "      <td>1.513605</td>\n",
2218 |        "      <td>-1.343747</td>\n",
2219 |        "      <td>-0.768526</td>\n",
2220 |        "      <td>0.075550</td>\n",
2221 |        "      <td>0.857101</td>\n",
2222 |        "      <td>1.276013</td>\n",
2223 |        "    </tr>\n",
2224 |        "    <tr>\n",
2225 |        "      <th>1093</th>\n",
2226 |        "      <td>4.166667e-09</td>\n",
2227 |        "      <td>1.0</td>\n",
2228 |        "      <td>-1.706992</td>\n",
2229 |        "      <td>1.052255</td>\n",
2230 |        "      <td>-1.498879</td>\n",
2231 |        "      <td>-1.139943</td>\n",
2232 |        "      <td>0.467669</td>\n",
2233 |        "      <td>0.713195</td>\n",
2234 |        "      <td>0.993797</td>\n",
2235 |        "    </tr>\n",
2236 |        "    <tr>\n",
2237 |        "      <th>1094</th>\n",
2238 |        "      <td>1.583333e-09</td>\n",
2239 |        "      <td>1.0</td>\n",
2240 |        "      <td>-1.673857</td>\n",
2241 |        "      <td>2.420163</td>\n",
2242 |        "      <td>-0.744173</td>\n",
2243 |        "      <td>-0.479768</td>\n",
2244 |        "      <td>-0.266538</td>\n",
2245 |        "      <td>0.159923</td>\n",
2246 |        "      <td>1.550184</td>\n",
2247 |        "    </tr>\n",
2248 |        "    <tr>\n",
2249 |        "      <th>1095</th>\n",
2250 |        "      <td>3.495833e-09</td>\n",
2251 |        "      <td>1.0</td>\n",
2252 |        "      <td>-1.680337</td>\n",
2253 |        "      <td>1.461716</td>\n",
2254 |        "      <td>-1.488154</td>\n",
2255 |        "      <td>-0.810934</td>\n",
2256 |        "      <td>0.241501</td>\n",
2257 |        "      <td>0.645697</td>\n",
2258 |        "      <td>1.184117</td>\n",
2259 |        "    </tr>\n",
2260 |        "  </tbody>\n",
2261 |        "</table>\n",
2262 |        "<p>1096 rows × 9 columns</p>\n",
2263 |        "</div>"
2264 |       ],
2265 |       "text/plain": [
2266 |        "              mean  std       min       max       0.1      0.25       0.5      0.75       0.9\n",
2267 |        "0    -1.041667e-09  1.0 -1.593083  1.464375 -1.372442 -0.805078  0.030207  0.936412  1.218518\n",
2268 |        "1    -1.958333e-09  1.0 -1.630917  1.201393 -1.533955 -0.999388  0.384871  0.735720  1.084018\n",
2269 |        "2    -1.775000e-09  1.0 -1.397118  2.349344 -1.003740 -0.741487 -0.132687  0.265374  1.515756\n",
2270 |        "3    -8.541667e-10  1.0 -1.646458  1.344487 -1.476779 -0.898722  0.266022  0.776495  1.039641\n",
2271 |        "4    -3.416667e-09  1.0 -1.620240  1.303502 -1.511644 -0.978061  0.405495  0.692648  1.061249\n",
2272 |        "...            ...  ...       ...       ...       ...       ...       ...       ...       ...\n",
2273 |        "1091 -1.041667e-09  1.0 -1.817799  1.630397 -1.323058 -0.643414  0.081208  0.568453  1.390523\n",
2274 |        "1092 -4.166666e-10  1.0 -1.550077  1.513605 -1.343747 -0.768526  0.075550  0.857101  1.276013\n",
2275 |        "1093  4.166667e-09  1.0 -1.706992  1.052255 -1.498879 -1.139943  0.467669  0.713195  0.993797\n",
2276 |        "1094  1.583333e-09  1.0 -1.673857  2.420163 -0.744173 -0.479768 -0.266538  0.159923  1.550184\n",
2277 |        "1095  3.495833e-09  1.0 -1.680337  1.461716 -1.488154 -0.810934  0.241501  0.645697  1.184117\n",
2278 |        "\n",
2279 |        "[1096 rows x 9 columns]"
2280 |       ]
2281 |      },
2282 |      "execution_count": 33,
2283 |      "metadata": {},
2284 |      "output_type": "execute_result"
2285 |     }
2286 |    ],
2287 |    "source": [
2288 |     "# example of a series-to-primitive transformer\n",
2289 |     "from sktime.transformations.series.summarize import SummaryTransformer\n",
2290 |     "\n",
2291 |     "# specify summary transformer\n",
2292 |     "summary_trafo = SummaryTransformer()\n",
2293 |     "\n",
2294 |     "# extract summary features - one per instance in the panel\n",
2295 |     "X_summaries = summary_trafo.fit_transform(X)\n",
2296 |     "X_summaries"
2297 |    ]
2298 |   },
2299 |   {
2300 |    "attachments": {},
2301 |    "cell_type": "markdown",
2302 |    "metadata": {},
2303 |    "source": [
2304 |     "just like classifiers, we can search for transformers of either type via the right tag:\n",
2305 |     "\n",
2306 |     "* `\"scitype:transform-input\"` and `\"scitype:transform-output\"` define input and output, e.g., \"series-to-series\" (both are scitype strings)\n",
2307 |     "* `\"scitype:instancewise\"` is boolean and tells us whether the transform is instance-wise"
2308 |    ]
2309 |   },
2310 |   {
2311 |    "cell_type": "code",
2312 |    "execution_count": 34,
2313 |    "metadata": {},
2314 |    "outputs": [
2315 |     {
2316 |      "data": {
2317 |       "text/html": [
2318 |        "<div>\n",
2319 |        "<style scoped>\n",
2320 |        "    .dataframe tbody tr th:only-of-type {\n",
2321 |        "        vertical-align: middle;\n",
2322 |        "    }\n",
2323 |        "\n",
2324 |        "    .dataframe tbody tr th {\n",
2325 |        "        vertical-align: top;\n",
2326 |        "    }\n",
2327 |        "\n",
2328 |        "    .dataframe thead th {\n",
2329 |        "        text-align: right;\n",
2330 |        "    }\n",
2331 |        "</style>\n",
2332 |        "<table border=\"1\" class=\"dataframe\">\n",
2333 |        "  <thead>\n",
2334 |        "    <tr style=\"text-align: right;\">\n",
2335 |        "      <th></th>\n",
2336 |        "      <th>name</th>\n",
2337 |        "      <th>object</th>\n",
2338 |        "    </tr>\n",
2339 |        "  </thead>\n",
2340 |        "  <tbody>\n",
2341 |        "    <tr>\n",
2342 |        "      <th>0</th>\n",
2343 |        "      <td>Catch22</td>\n",
2344 |        "      <td>&lt;class 'sktime.transformations.panel.catch22.C...</td>\n",
2345 |        "    </tr>\n",
2346 |        "    <tr>\n",
2347 |        "      <th>1</th>\n",
2348 |        "      <td>Catch22Wrapper</td>\n",
2349 |        "      <td>&lt;class 'sktime.transformations.panel.catch22wr...</td>\n",
2350 |        "    </tr>\n",
2351 |        "    <tr>\n",
2352 |        "      <th>2</th>\n",
2353 |        "      <td>FittedParamExtractor</td>\n",
2354 |        "      <td>&lt;class 'sktime.transformations.panel.summarize...</td>\n",
2355 |        "    </tr>\n",
2356 |        "    <tr>\n",
2357 |        "      <th>3</th>\n",
2358 |        "      <td>RandomIntervalFeatureExtractor</td>\n",
2359 |        "      <td>&lt;class 'sktime.transformations.panel.summarize...</td>\n",
2360 |        "    </tr>\n",
2361 |        "    <tr>\n",
2362 |        "      <th>4</th>\n",
2363 |        "      <td>RandomIntervals</td>\n",
2364 |        "      <td>&lt;class 'sktime.transformations.panel.random_in...</td>\n",
2365 |        "    </tr>\n",
2366 |        "    <tr>\n",
2367 |        "      <th>5</th>\n",
2368 |        "      <td>RandomShapeletTransform</td>\n",
2369 |        "      <td>&lt;class 'sktime.transformations.panel.shapelet_...</td>\n",
2370 |        "    </tr>\n",
2371 |        "    <tr>\n",
2372 |        "      <th>6</th>\n",
2373 |        "      <td>SignatureTransformer</td>\n",
2374 |        "      <td>&lt;class 'sktime.transformations.panel.signature...</td>\n",
2375 |        "    </tr>\n",
2376 |        "    <tr>\n",
2377 |        "      <th>7</th>\n",
2378 |        "      <td>SummaryTransformer</td>\n",
2379 |        "      <td>&lt;class 'sktime.transformations.series.summariz...</td>\n",
2380 |        "    </tr>\n",
2381 |        "    <tr>\n",
2382 |        "      <th>8</th>\n",
2383 |        "      <td>TSFreshFeatureExtractor</td>\n",
2384 |        "      <td>&lt;class 'sktime.transformations.panel.tsfresh.T...</td>\n",
2385 |        "    </tr>\n",
2386 |        "    <tr>\n",
2387 |        "      <th>9</th>\n",
2388 |        "      <td>Tabularizer</td>\n",
2389 |        "      <td>&lt;class 'sktime.transformations.panel.reduce.Ta...</td>\n",
2390 |        "    </tr>\n",
2391 |        "    <tr>\n",
2392 |        "      <th>10</th>\n",
2393 |        "      <td>TimeBinner</td>\n",
2394 |        "      <td>&lt;class 'sktime.transformations.panel.reduce.Ti...</td>\n",
2395 |        "    </tr>\n",
2396 |        "  </tbody>\n",
2397 |        "</table>\n",
2398 |        "</div>"
2399 |       ],
2400 |       "text/plain": [
2401 |        "                              name                                             object\n",
2402 |        "0                          Catch22  <class 'sktime.transformations.panel.catch22.C...\n",
2403 |        "1                   Catch22Wrapper  <class 'sktime.transformations.panel.catch22wr...\n",
2404 |        "2             FittedParamExtractor  <class 'sktime.transformations.panel.summarize...\n",
2405 |        "3   RandomIntervalFeatureExtractor  <class 'sktime.transformations.panel.summarize...\n",
2406 |        "4                  RandomIntervals  <class 'sktime.transformations.panel.random_in...\n",
2407 |        "5          RandomShapeletTransform  <class 'sktime.transformations.panel.shapelet_...\n",
2408 |        "6             SignatureTransformer  <class 'sktime.transformations.panel.signature...\n",
2409 |        "7               SummaryTransformer  <class 'sktime.transformations.series.summariz...\n",
2410 |        "8          TSFreshFeatureExtractor  <class 'sktime.transformations.panel.tsfresh.T...\n",
2411 |        "9                      Tabularizer  <class 'sktime.transformations.panel.reduce.Ta...\n",
2412 |        "10                      TimeBinner  <class 'sktime.transformations.panel.reduce.Ti..."
2413 |       ]
2414 |      },
2415 |      "execution_count": 34,
2416 |      "metadata": {},
2417 |      "output_type": "execute_result"
2418 |     }
2419 |    ],
2420 |    "source": [
2421 |     "# example: looking for all series-to-primitive transformers that are instance-wise\n",
2422 |     "from sktime.registry import all_estimators\n",
2423 |     "\n",
2424 |     "all_estimators(\n",
2425 |     "    \"transformer\",\n",
2426 |     "    as_dataframe=True,\n",
2427 |     "    filter_tags={\n",
2428 |     "        \"scitype:transform-input\": \"Series\",\n",
2429 |     "        \"scitype:transform-output\": \"Primitives\",\n",
2430 |     "        \"scitype:instancewise\": True,\n",
2431 |     "    },\n",
2432 |     ")"
2433 |    ]
2434 |   },
2435 |   {
2436 |    "attachments": {},
2437 |    "cell_type": "markdown",
2438 |    "metadata": {},
2439 |    "source": [
2440 |     "Further details on transformations and feature extraction can be found in the tutorial 3, transformers.\n",
2441 |     "\n",
2442 |     "All composition steps therein (e.g., chaining, column subsetting) work together with all estimator types in `sktime`, including classifiers, regressors, clusterers."
2443 |    ]
2444 |   },
2445 |   {
2446 |    "attachments": {},
2447 |    "cell_type": "markdown",
2448 |    "metadata": {},
2449 |    "source": [
2450 |     "### 5.4.2 Pipelines for time series panel tasks"
2451 |    ]
2452 |   },
2453 |   {
2454 |    "attachments": {},
2455 |    "cell_type": "markdown",
2456 |    "metadata": {},
2457 |    "source": [
2458 |     "all panel estimators pipeline with `sktime` transformers, via the `*` dunder or `make_pipeline`.\n",
2459 |     "\n",
2460 |     "The pipeline does the following:\n",
2461 |     "\n",
2462 |     "* in `fit`: runs the transformers' `fit_transform` in sequence, then `fit` of the panel estimator\n",
2463 |     "* in `predict`, runs the fitted transformers' `transform` in sequence, then `predict` of the panel estimator\n",
2464 |     "\n",
2465 |     "(same logic as for `sklearn` pipelines)"
2466 |    ]
2467 |   },
2468 |   {
2469 |    "cell_type": "code",
2470 |    "execution_count": 35,
2471 |    "metadata": {},
2472 |    "outputs": [
2473 |     {
2474 |      "data": {
2475 |       "text/html": [
2476 |        "<style>#sk-6e583018-f2e4-47c7-a55a-f306e319e22c {color: black;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c pre{padding: 0;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable {background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator:hover {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-item {z-index: 1;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:only-child::after {width: 0;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-text-repr-fallback {display: none;}</style><div id='sk-6e583018-f2e4-47c7-a55a-f306e319e22c' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2477 |        "                   transformers=[ExponentTransformer()])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('fb50cf60-146b-423a-b0ab-3b6323d1020f') type=\"checkbox\" ><label for=UUID('fb50cf60-146b-423a-b0ab-3b6323d1020f') class='sk-toggleable__label sk-toggleable__label-arrow'>ClassifierPipeline</label><div class=\"sk-toggleable__content\"><pre>ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2478 |        "                   transformers=[ExponentTransformer()])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('9ab34dbf-f1b7-45ae-acfc-b3ec943b0235') type=\"checkbox\" ><label for=UUID('9ab34dbf-f1b7-45ae-acfc-b3ec943b0235') class='sk-toggleable__label sk-toggleable__label-arrow'>KNeighborsTimeSeriesClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsTimeSeriesClassifier()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('6125b755-4ebb-4230-a39a-a4254f6d7c23') type=\"checkbox\" ><label for=UUID('6125b755-4ebb-4230-a39a-a4254f6d7c23') class='sk-toggleable__label sk-toggleable__label-arrow'>ExponentTransformer</label><div class=\"sk-toggleable__content\"><pre>ExponentTransformer()</pre></div></div></div></div></div></div></div></div></div></div>"
2479 |       ],
2480 |       "text/plain": [
2481 |        "ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2482 |        "                   transformers=[ExponentTransformer()])"
2483 |       ]
2484 |      },
2485 |      "execution_count": 35,
2486 |      "metadata": {},
2487 |      "output_type": "execute_result"
2488 |     }
2489 |    ],
2490 |    "source": [
2491 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
2492 |     "from sktime.transformations.series.exponent import ExponentTransformer\n",
2493 |     "\n",
2494 |     "pipe = ExponentTransformer() * KNeighborsTimeSeriesClassifier()\n",
2495 |     "\n",
2496 |     "# this constructs a ClassifierPipeline, which is also a classifier\n",
2497 |     "pipe"
2498 |    ]
2499 |   },
2500 |   {
2501 |    "cell_type": "code",
2502 |    "execution_count": 36,
2503 |    "metadata": {},
2504 |    "outputs": [],
2505 |    "source": [
2506 |     "# alternative to construct:\n",
2507 |     "from sktime.pipeline import make_pipeline\n",
2508 |     "\n",
2509 |     "pipe = make_pipeline(ExponentTransformer(), KNeighborsTimeSeriesClassifier())"
2510 |    ]
2511 |   },
2512 |   {
2513 |    "cell_type": "code",
2514 |    "execution_count": 37,
2515 |    "metadata": {},
2516 |    "outputs": [
2517 |     {
2518 |      "data": {
2519 |       "text/html": [
2520 |        "<style>#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 {color: black;background-color: white;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 pre{padding: 0;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-toggleable {background-color: white;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-item {z-index: 1;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-parallel-item:only-child::after {width: 0;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-472a23fd-2ae2-47cc-8c6b-77238204fec1 div.sk-text-repr-fallback {display: none;}</style><div id='sk-472a23fd-2ae2-47cc-8c6b-77238204fec1' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2521 |        "                   transformers=[ExponentTransformer()])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('8e433c3d-d3a4-49f7-a85c-690d90fb6f6c') type=\"checkbox\" ><label for=UUID('8e433c3d-d3a4-49f7-a85c-690d90fb6f6c') class='sk-toggleable__label sk-toggleable__label-arrow'>ClassifierPipeline</label><div class=\"sk-toggleable__content\"><pre>ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2522 |        "                   transformers=[ExponentTransformer()])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('8b38211e-42d6-4550-8bec-8f7b83ab5d2e') type=\"checkbox\" ><label for=UUID('8b38211e-42d6-4550-8bec-8f7b83ab5d2e') class='sk-toggleable__label sk-toggleable__label-arrow'>KNeighborsTimeSeriesClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsTimeSeriesClassifier()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('88a184f7-a03b-43e5-b698-221f49c95f74') type=\"checkbox\" ><label for=UUID('88a184f7-a03b-43e5-b698-221f49c95f74') class='sk-toggleable__label sk-toggleable__label-arrow'>ExponentTransformer</label><div class=\"sk-toggleable__content\"><pre>ExponentTransformer()</pre></div></div></div></div></div></div></div></div></div></div>"
2523 |       ],
2524 |       "text/plain": [
2525 |        "ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2526 |        "                   transformers=[ExponentTransformer()])"
2527 |       ]
2528 |      },
2529 |      "execution_count": 37,
2530 |      "metadata": {},
2531 |      "output_type": "execute_result"
2532 |     }
2533 |    ],
2534 |    "source": [
2535 |     "from sktime.datasets import load_unit_test\n",
2536 |     "\n",
2537 |     "X_train, y_train = load_unit_test(split=\"TRAIN\")\n",
2538 |     "X_test, _ = load_unit_test(split=\"TEST\")\n",
2539 |     "\n",
2540 |     "# this is a forecaster with the same interface as knn-classifier\n",
2541 |     "# first applies exponent transform, then knn-classifier\n",
2542 |     "pipe.fit(X_train, y_train)"
2543 |    ]
2544 |   },
2545 |   {
2546 |    "attachments": {},
2547 |    "cell_type": "markdown",
2548 |    "metadata": {},
2549 |    "source": [
2550 |     "`sktime` transformers pipeline with `sklearn` classifiers!\n",
2551 |     "\n",
2552 |     "This allows to build \"time series feature extraction then `sklearn` classify`\" pipelines:"
2553 |    ]
2554 |   },
2555 |   {
2556 |    "cell_type": "code",
2557 |    "execution_count": 38,
2558 |    "metadata": {},
2559 |    "outputs": [
2560 |     {
2561 |      "data": {
2562 |       "text/html": [
2563 |        "<style>#sk-f972d941-041b-4b35-84dc-7569b5c10931 {color: black;background-color: white;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 pre{padding: 0;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-toggleable {background-color: white;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-estimator:hover {background-color: #d4ebff;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-item {z-index: 1;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-parallel-item:only-child::after {width: 0;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-f972d941-041b-4b35-84dc-7569b5c10931 div.sk-text-repr-fallback {display: none;}</style><div id='sk-f972d941-041b-4b35-84dc-7569b5c10931' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SklearnClassifierPipeline(classifier=RandomForestClassifier(),\n",
2564 |        "                          transformers=[SummaryTransformer()])</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('15a0748d-fcf1-4338-a5f8-c3d49c551288') type=\"checkbox\" ><label for=UUID('15a0748d-fcf1-4338-a5f8-c3d49c551288') class='sk-toggleable__label sk-toggleable__label-arrow'>SklearnClassifierPipeline</label><div class=\"sk-toggleable__content\"><pre>SklearnClassifierPipeline(classifier=RandomForestClassifier(),\n",
2565 |        "                          transformers=[SummaryTransformer()])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('18cba1d9-515c-4e69-86c6-7308242bac1d') type=\"checkbox\" ><label for=UUID('18cba1d9-515c-4e69-86c6-7308242bac1d') class='sk-toggleable__label sk-toggleable__label-arrow'>RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('faf1e5c0-a948-42d5-8b29-78966f222038') type=\"checkbox\" ><label for=UUID('faf1e5c0-a948-42d5-8b29-78966f222038') class='sk-toggleable__label sk-toggleable__label-arrow'>SummaryTransformer</label><div class=\"sk-toggleable__content\"><pre>SummaryTransformer()</pre></div></div></div></div></div></div></div></div></div></div>"
2566 |       ],
2567 |       "text/plain": [
2568 |        "SklearnClassifierPipeline(classifier=RandomForestClassifier(),\n",
2569 |        "                          transformers=[SummaryTransformer()])"
2570 |       ]
2571 |      },
2572 |      "execution_count": 38,
2573 |      "metadata": {},
2574 |      "output_type": "execute_result"
2575 |     }
2576 |    ],
2577 |    "source": [
2578 |     "from sklearn.ensemble import RandomForestClassifier\n",
2579 |     "from sktime.transformations.series.summarize import SummaryTransformer\n",
2580 |     "\n",
2581 |     "# specify summary transformer\n",
2582 |     "summary_rf = SummaryTransformer() * RandomForestClassifier()\n",
2583 |     "\n",
2584 |     "summary_rf.fit(X_train, y_train)"
2585 |    ]
2586 |   },
2587 |   {
2588 |    "attachments": {},
2589 |    "cell_type": "markdown",
2590 |    "metadata": {},
2591 |    "source": [
2592 |     "### 5.4.3 Using transformers to deal with unequal length or missing values"
2593 |    ]
2594 |   },
2595 |   {
2596 |    "attachments": {},
2597 |    "cell_type": "markdown",
2598 |    "metadata": {},
2599 |    "source": [
2600 |     "pro tip: useful transformers to pipeline are those that \"improve\" capabilities!\n",
2601 |     "\n",
2602 |     "Search for these transformer tags:\n",
2603 |     "\n",
2604 |     "* `\"capability:unequal_length:removes\"` - ensures all instances in the panel have equal length afterwards. Examples: padding, cutting, resampling.\n",
2605 |     "* `\"capability:missing_values:removes\"` - removes all missing values from the data (e.g., series, panel) passed to it. Example: mean imputation"
2606 |    ]
2607 |   },
2608 |   {
2609 |    "cell_type": "code",
2610 |    "execution_count": 39,
2611 |    "metadata": {},
2612 |    "outputs": [
2613 |     {
2614 |      "data": {
2615 |       "text/html": [
2616 |        "<div>\n",
2617 |        "<style scoped>\n",
2618 |        "    .dataframe tbody tr th:only-of-type {\n",
2619 |        "        vertical-align: middle;\n",
2620 |        "    }\n",
2621 |        "\n",
2622 |        "    .dataframe tbody tr th {\n",
2623 |        "        vertical-align: top;\n",
2624 |        "    }\n",
2625 |        "\n",
2626 |        "    .dataframe thead th {\n",
2627 |        "        text-align: right;\n",
2628 |        "    }\n",
2629 |        "</style>\n",
2630 |        "<table border=\"1\" class=\"dataframe\">\n",
2631 |        "  <thead>\n",
2632 |        "    <tr style=\"text-align: right;\">\n",
2633 |        "      <th></th>\n",
2634 |        "      <th>name</th>\n",
2635 |        "      <th>object</th>\n",
2636 |        "    </tr>\n",
2637 |        "  </thead>\n",
2638 |        "  <tbody>\n",
2639 |        "    <tr>\n",
2640 |        "      <th>0</th>\n",
2641 |        "      <td>ClearSky</td>\n",
2642 |        "      <td>&lt;class 'sktime.transformations.series.clear_sk...</td>\n",
2643 |        "    </tr>\n",
2644 |        "    <tr>\n",
2645 |        "      <th>1</th>\n",
2646 |        "      <td>IntervalSegmenter</td>\n",
2647 |        "      <td>&lt;class 'sktime.transformations.panel.segment.I...</td>\n",
2648 |        "    </tr>\n",
2649 |        "    <tr>\n",
2650 |        "      <th>2</th>\n",
2651 |        "      <td>PaddingTransformer</td>\n",
2652 |        "      <td>&lt;class 'sktime.transformations.panel.padder.Pa...</td>\n",
2653 |        "    </tr>\n",
2654 |        "    <tr>\n",
2655 |        "      <th>3</th>\n",
2656 |        "      <td>RandomIntervalSegmenter</td>\n",
2657 |        "      <td>&lt;class 'sktime.transformations.panel.segment.R...</td>\n",
2658 |        "    </tr>\n",
2659 |        "    <tr>\n",
2660 |        "      <th>4</th>\n",
2661 |        "      <td>SlopeTransformer</td>\n",
2662 |        "      <td>&lt;class 'sktime.transformations.panel.slope.Slo...</td>\n",
2663 |        "    </tr>\n",
2664 |        "    <tr>\n",
2665 |        "      <th>5</th>\n",
2666 |        "      <td>TimeBinAggregate</td>\n",
2667 |        "      <td>&lt;class 'sktime.transformations.series.binning....</td>\n",
2668 |        "    </tr>\n",
2669 |        "    <tr>\n",
2670 |        "      <th>6</th>\n",
2671 |        "      <td>TruncationTransformer</td>\n",
2672 |        "      <td>&lt;class 'sktime.transformations.panel.truncatio...</td>\n",
2673 |        "    </tr>\n",
2674 |        "  </tbody>\n",
2675 |        "</table>\n",
2676 |        "</div>"
2677 |       ],
2678 |       "text/plain": [
2679 |        "                      name                                             object\n",
2680 |        "0                 ClearSky  <class 'sktime.transformations.series.clear_sk...\n",
2681 |        "1        IntervalSegmenter  <class 'sktime.transformations.panel.segment.I...\n",
2682 |        "2       PaddingTransformer  <class 'sktime.transformations.panel.padder.Pa...\n",
2683 |        "3  RandomIntervalSegmenter  <class 'sktime.transformations.panel.segment.R...\n",
2684 |        "4         SlopeTransformer  <class 'sktime.transformations.panel.slope.Slo...\n",
2685 |        "5         TimeBinAggregate  <class 'sktime.transformations.series.binning....\n",
2686 |        "6    TruncationTransformer  <class 'sktime.transformations.panel.truncatio..."
2687 |       ]
2688 |      },
2689 |      "execution_count": 39,
2690 |      "metadata": {},
2691 |      "output_type": "execute_result"
2692 |     }
2693 |    ],
2694 |    "source": [
2695 |     "# all transformers that guarantee that the output is equal length and equal index\n",
2696 |     "from sktime.registry import all_estimators\n",
2697 |     "\n",
2698 |     "all_estimators(\"transformer\", as_dataframe=True, filter_tags={\"capability:unequal_length:removes\": True })"
2699 |    ]
2700 |   },
2701 |   {
2702 |    "cell_type": "code",
2703 |    "execution_count": 40,
2704 |    "metadata": {},
2705 |    "outputs": [
2706 |     {
2707 |      "data": {
2708 |       "text/html": [
2709 |        "<div>\n",
2710 |        "<style scoped>\n",
2711 |        "    .dataframe tbody tr th:only-of-type {\n",
2712 |        "        vertical-align: middle;\n",
2713 |        "    }\n",
2714 |        "\n",
2715 |        "    .dataframe tbody tr th {\n",
2716 |        "        vertical-align: top;\n",
2717 |        "    }\n",
2718 |        "\n",
2719 |        "    .dataframe thead th {\n",
2720 |        "        text-align: right;\n",
2721 |        "    }\n",
2722 |        "</style>\n",
2723 |        "<table border=\"1\" class=\"dataframe\">\n",
2724 |        "  <thead>\n",
2725 |        "    <tr style=\"text-align: right;\">\n",
2726 |        "      <th></th>\n",
2727 |        "      <th>name</th>\n",
2728 |        "      <th>object</th>\n",
2729 |        "    </tr>\n",
2730 |        "  </thead>\n",
2731 |        "  <tbody>\n",
2732 |        "    <tr>\n",
2733 |        "      <th>0</th>\n",
2734 |        "      <td>ClearSky</td>\n",
2735 |        "      <td>&lt;class 'sktime.transformations.series.clear_sk...</td>\n",
2736 |        "    </tr>\n",
2737 |        "    <tr>\n",
2738 |        "      <th>1</th>\n",
2739 |        "      <td>Imputer</td>\n",
2740 |        "      <td>&lt;class 'sktime.transformations.series.impute.I...</td>\n",
2741 |        "    </tr>\n",
2742 |        "  </tbody>\n",
2743 |        "</table>\n",
2744 |        "</div>"
2745 |       ],
2746 |       "text/plain": [
2747 |        "       name                                             object\n",
2748 |        "0  ClearSky  <class 'sktime.transformations.series.clear_sk...\n",
2749 |        "1   Imputer  <class 'sktime.transformations.series.impute.I..."
2750 |       ]
2751 |      },
2752 |      "execution_count": 40,
2753 |      "metadata": {},
2754 |      "output_type": "execute_result"
2755 |     }
2756 |    ],
2757 |    "source": [
2758 |     "# all transformers that guarantee the output has no missing values\n",
2759 |     "from sktime.registry import all_estimators\n",
2760 |     "\n",
2761 |     "all_estimators(\"transformer\", as_dataframe=True, filter_tags={\"capability:missing_values:removes\": True })"
2762 |    ]
2763 |   },
2764 |   {
2765 |    "attachments": {},
2766 |    "cell_type": "markdown",
2767 |    "metadata": {},
2768 |    "source": [
2769 |     "minor note:\n",
2770 |     "\n",
2771 |     "some transformers guarantee \"no missing values\" under some conditions but not always, e.g., `TimeBinAggregate`"
2772 |    ]
2773 |   },
2774 |   {
2775 |    "attachments": {},
2776 |    "cell_type": "markdown",
2777 |    "metadata": {},
2778 |    "source": [
2779 |     "let's check the tags in one example"
2780 |    ]
2781 |   },
2782 |   {
2783 |    "cell_type": "code",
2784 |    "execution_count": 41,
2785 |    "metadata": {},
2786 |    "outputs": [
2787 |     {
2788 |      "data": {
2789 |       "text/plain": [
2790 |        "{'python_dependencies_alias': {'scikit-learn': 'sklearn'},\n",
2791 |        " 'X_inner_mtype': 'numpy3D',\n",
2792 |        " 'capability:multivariate': False,\n",
2793 |        " 'capability:unequal_length': False,\n",
2794 |        " 'capability:missing_values': False,\n",
2795 |        " 'capability:train_estimate': False,\n",
2796 |        " 'capability:contractable': False,\n",
2797 |        " 'capability:multithreading': True,\n",
2798 |        " 'capability:predict_proba': True,\n",
2799 |        " 'python_version': None,\n",
2800 |        " 'requires_cython': False,\n",
2801 |        " 'classifier_type': 'distance',\n",
2802 |        " 'python_dependencies': 'scikit-learn!=1.3.0'}"
2803 |       ]
2804 |      },
2805 |      "execution_count": 41,
2806 |      "metadata": {},
2807 |      "output_type": "execute_result"
2808 |     }
2809 |    ],
2810 |    "source": [
2811 |     "# list all classifiers in sktime\n",
2812 |     "from sktime.classification.feature_based import MatrixProfileClassifier\n",
2813 |     "\n",
2814 |     "no_missing_clf = MatrixProfileClassifier()\n",
2815 |     "\n",
2816 |     "no_missing_clf.get_tags()"
2817 |    ]
2818 |   },
2819 |   {
2820 |    "cell_type": "code",
2821 |    "execution_count": 42,
2822 |    "metadata": {},
2823 |    "outputs": [
2824 |     {
2825 |      "data": {
2826 |       "text/plain": [
2827 |        "{'python_dependencies_alias': {'scikit-learn': 'sklearn'},\n",
2828 |        " 'X_inner_mtype': 'pd-multiindex',\n",
2829 |        " 'capability:multivariate': False,\n",
2830 |        " 'capability:unequal_length': False,\n",
2831 |        " 'capability:missing_values': True,\n",
2832 |        " 'capability:train_estimate': False,\n",
2833 |        " 'capability:contractable': False,\n",
2834 |        " 'capability:multithreading': False,\n",
2835 |        " 'capability:predict_proba': True,\n",
2836 |        " 'python_version': None,\n",
2837 |        " 'requires_cython': False}"
2838 |       ]
2839 |      },
2840 |      "execution_count": 42,
2841 |      "metadata": {},
2842 |      "output_type": "execute_result"
2843 |     }
2844 |    ],
2845 |    "source": [
2846 |     "from sktime.transformations.series.impute import Imputer\n",
2847 |     "\n",
2848 |     "clf_can_do_missing = Imputer() * MatrixProfileClassifier()\n",
2849 |     "\n",
2850 |     "clf_can_do_missing.get_tags()"
2851 |    ]
2852 |   },
2853 |   {
2854 |    "attachments": {},
2855 |    "cell_type": "markdown",
2856 |    "metadata": {},
2857 |    "source": [
2858 |     "### 5.4.4 Tuning and model selection"
2859 |    ]
2860 |   },
2861 |   {
2862 |    "attachments": {},
2863 |    "cell_type": "markdown",
2864 |    "metadata": {},
2865 |    "source": [
2866 |     "`sktime` classifiers are compatible with `sklearn` model selection and composition tools using `sktime` data formats.\n",
2867 |     "\n",
2868 |     "This extends to grid tuning and cross-validation, as long as `numpy` based formats or length/instance indexed formats are used."
2869 |    ]
2870 |   },
2871 |   {
2872 |    "cell_type": "code",
2873 |    "execution_count": 43,
2874 |    "metadata": {},
2875 |    "outputs": [],
2876 |    "source": [
2877 |     "from sktime.datasets import load_unit_test\n",
2878 |     "\n",
2879 |     "X_train, y_train = load_unit_test(split=\"TRAIN\")\n",
2880 |     "X_test, _ = load_unit_test(split=\"TEST\")\n"
2881 |    ]
2882 |   },
2883 |   {
2884 |    "attachments": {},
2885 |    "cell_type": "markdown",
2886 |    "metadata": {},
2887 |    "source": [
2888 |     "Cross-validation using the `sklearn` `cross_val_score` and `KFold` functionality:"
2889 |    ]
2890 |   },
2891 |   {
2892 |    "cell_type": "code",
2893 |    "execution_count": 44,
2894 |    "metadata": {},
2895 |    "outputs": [
2896 |     {
2897 |      "data": {
2898 |       "text/plain": [
2899 |        "array([1. , 0.8, 0.6, 0.8])"
2900 |       ]
2901 |      },
2902 |      "execution_count": 44,
2903 |      "metadata": {},
2904 |      "output_type": "execute_result"
2905 |     }
2906 |    ],
2907 |    "source": [
2908 |     "from sklearn.model_selection import KFold, cross_val_score\n",
2909 |     "from sktime.classification.feature_based import MatrixProfileClassifier\n",
2910 |     "\n",
2911 |     "clf = MatrixProfileClassifier()\n",
2912 |     "\n",
2913 |     "cross_val_score(clf, X_train, y=y_train, cv=KFold(n_splits=4))"
2914 |    ]
2915 |   },
2916 |   {
2917 |    "attachments": {},
2918 |    "cell_type": "markdown",
2919 |    "metadata": {},
2920 |    "source": [
2921 |     "Parameter tuning using `sklearn` `GridSearchCV`, we tune the _k_ and distance measure for a K-NN classifier:"
2922 |    ]
2923 |   },
2924 |   {
2925 |    "cell_type": "code",
2926 |    "execution_count": 45,
2927 |    "metadata": {},
2928 |    "outputs": [],
2929 |    "source": [
2930 |     "from sklearn.model_selection import GridSearchCV\n",
2931 |     "\n",
2932 |     "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
2933 |     "\n",
2934 |     "knn = KNeighborsTimeSeriesClassifier()\n",
2935 |     "param_grid = {\"n_neighbors\": [1, 5], \"distance\": [\"euclidean\", \"dtw\"]}\n",
2936 |     "parameter_tuning_method = GridSearchCV(knn, param_grid, cv=KFold(n_splits=4))\n",
2937 |     "\n",
2938 |     "parameter_tuning_method.fit(X_train, y_train)\n",
2939 |     "y_pred = parameter_tuning_method.predict(X_test)"
2940 |    ]
2941 |   },
2942 |   {
2943 |    "attachments": {},
2944 |    "cell_type": "markdown",
2945 |    "metadata": {},
2946 |    "source": [
2947 |     "### 5.4.5 Advanced Composition cheat sheet - AutoML, bagging, ensembles"
2948 |    ]
2949 |   },
2950 |   {
2951 |    "attachments": {},
2952 |    "cell_type": "markdown",
2953 |    "metadata": {},
2954 |    "source": [
2955 |     "* common ensembling patterns: `BaggingClassifier`, `WeightedEnsembleClassifier`\n",
2956 |     "* composability with `sklearn` classifier, regressor building blocks still applies\n",
2957 |     "* AutoML can be achieved by combining tuning with `MultiplexClassifier` or `MultiplexTransformer`"
2958 |    ]
2959 |   },
2960 |   {
2961 |    "attachments": {},
2962 |    "cell_type": "markdown",
2963 |    "metadata": {},
2964 |    "source": [
2965 |     "pro tip: bagging with a fixed single column subset can be used to turn an univariate classifier into a multivariate classifier!"
2966 |    ]
2967 |   },
2968 |   {
2969 |    "attachments": {},
2970 |    "cell_type": "markdown",
2971 |    "metadata": {},
2972 |    "source": [
2973 |     "## 5.5 Appendix - Extension guide"
2974 |    ]
2975 |   },
2976 |   {
2977 |    "attachments": {},
2978 |    "cell_type": "markdown",
2979 |    "metadata": {},
2980 |    "source": [
2981 |     "`sktime` is meant to be easily extensible, for direct contribution to `sktime` as well as for local/private extension with custom methods.\n",
2982 |     "\n",
2983 |     "To extend `sktime` with a new local or contributed estimator, a good workflow to follow is:\n",
2984 |     "\n",
2985 |     "0. find the right extension template for the type of estimator you want to add - e.g., classifier, regressor, clusterer, etc. The extension templates are located in the [`extension_templates](https://github.com/sktime/sktime/blob/main/extension_templates) directory\n",
2986 |     "1. read through the extension template - this is a `python` file with `todo` blocks that mark the places in which changes need to be added.\n",
2987 |     "2. optionally, if you are planning any major surgeries to the interface: look at the base class - note that \"ordinary\" extension (e.g., new algorithm) should be easily doable without this.\n",
2988 |     "3. copy the extension template to a local folder in your own repository (local/private extension), or to a suitable location in your clone of the `sktime` or affiliated repository (if contributed extension), inside `sktime.[name_of_task]`; rename the file and update the file docstring appropriately.\n",
2989 |     "4. address the \"todo\" parts. Usually, this means: changing the name of the class, setting the tag values, specifying hyper-parameters, filling in `__init__`, `_fit`, `_predict` and/or other methods (for details see the extension template). You can add private methods as long as they do not override the default public interface. For more details, see the extension template.\n",
2990 |     "5. to test your estimator manually: import your estimator and run it in the basic vignettes above.\n",
2991 |     "6. to test your estimator automatically: call `sktime.tests.test_all_estimators.check_estimator` on your estimator. You can call this on a class or object instance. Ensure you have specified test parameters in the `get_test_params` method, according to the extension template.\n",
2992 |     "\n",
2993 |     "In case of direct contribution to `sktime` or one of its affiliated packages, additionally:\n",
2994 |     "* add yourself as an author to the code, and to the `CODEOWNERS` for the new estimator file(s).\n",
2995 |     "* create a pull request that contains only the new estimators (and their inheritance tree, if it's not just one class), as well as the automated tests as described above.\n",
2996 |     "* in the pull request, describe the estimator and optimally provide a publication or other technical reference for the strategy it implements.\n",
2997 |     "* before making the pull request, ensure that you have all necessary permissions to contribute the code to a permissive license (BSD-3) open source project."
2998 |    ]
2999 |   },
3000 |   {
3001 |    "attachments": {},
3002 |    "cell_type": "markdown",
3003 |    "metadata": {},
3004 |    "source": [
3005 |     "---\n",
3006 |     "\n",
3007 |     "### Credits: notebook 5 - time series panel tasks - classification, regression, clustering\n",
3008 |     "\n",
3009 |     "notebook creation: fkiraly, achieveordie"
3010 |    ]
3011 |   }
3012 |  ],
3013 |  "metadata": {
3014 |   "kernelspec": {
3015 |    "display_name": "sktime-dl",
3016 |    "language": "python",
3017 |    "name": "python3"
3018 |   },
3019 |   "language_info": {
3020 |    "codemirror_mode": {
3021 |     "name": "ipython",
3022 |     "version": 3
3023 |    },
3024 |    "file_extension": ".py",
3025 |    "mimetype": "text/x-python",
3026 |    "name": "python",
3027 |    "nbconvert_exporter": "python",
3028 |    "pygments_lexer": "ipython3",
3029 |    "version": "3.11.3"
3030 |   }
3031 |  },
3032 |  "nbformat": 4,
3033 |  "nbformat_minor": 2
3034 | }
3035 | 


--------------------------------------------------------------------------------
/notebooks/08_mlflow.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "id": "37997396",
  7 |    "metadata": {},
  8 |    "source": [
  9 |     "# Deploying `sktime` via `MLflow` & `MLflavors`"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "attachments": {},
 14 |    "cell_type": "markdown",
 15 |    "id": "f990e6e0",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "requires MLflow using the [MLflavors library](https://github.com/ml-toolkits/mlflavors).\n",
 19 |     "\n",
 20 |     "**Notebook contents**:\n",
 21 |     "\n",
 22 |     "- saving `sktime` models as MLflow artifacts.\n",
 23 |     "- loading `sktime` models from MLflow for batch inference.\n",
 24 |     "- deploying `sktime` models to a serving endpoint using MLflow"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "attachments": {},
 29 |    "cell_type": "markdown",
 30 |    "id": "35906f49",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "Design summary:\n",
 34 |     "\n",
 35 |     "* uses `pyfunc` based custom flavor similar to `sktime` example in [MLflow documentaion](https://mlflow.org/docs/latest/models.html#custom-flavors)\n",
 36 |     "* single-row `pandas` `DataFrame` configuration arguments to address the `sktime` prediction API in inference step"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "attachments": {},
 41 |    "cell_type": "markdown",
 42 |    "id": "ae526e0a",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "## Saving `sktime` model as an MLflow artifact"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "attachments": {},
 50 |    "cell_type": "markdown",
 51 |    "id": "600f758c",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "Example: save fitted model, model parameters, and results of this experiment to server\n",
 55 |     "\n",
 56 |     "* fit `NaiveForecaster` on longley data (with exogenous vars)\n",
 57 |     "* evaluate via MAE and MAPE"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "attachments": {},
 62 |    "cell_type": "markdown",
 63 |    "id": "62662cad",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "first without mlflow"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 1,
 72 |    "id": "9f739fd6",
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "from sktime.datasets import load_longley\n",
 77 |     "from sktime.forecasting.model_selection import temporal_train_test_split\n",
 78 |     "from sktime.forecasting.naive import NaiveForecaster\n",
 79 |     "from sktime.performance_metrics.forecasting import (\n",
 80 |     "    mean_absolute_error,\n",
 81 |     "    mean_absolute_percentage_error,\n",
 82 |     ")\n",
 83 |     "\n",
 84 |     "y, X = load_longley()\n",
 85 |     "y_train, y_test, X_train, X_test = temporal_train_test_split(y, X)\n",
 86 |     "\n",
 87 |     "forecaster = NaiveForecaster()\n",
 88 |     "forecaster.fit(\n",
 89 |     "    y_train,\n",
 90 |     "    X=X_train,\n",
 91 |     "    fh=[1, 2, 3, 4],\n",
 92 |     ")\n",
 93 |     "\n",
 94 |     "# Extract parameters\n",
 95 |     "parameters = forecaster.get_params()\n",
 96 |     "\n",
 97 |     "# Evaluate model\n",
 98 |     "y_pred = forecaster.predict(X=X_test)\n",
 99 |     "metrics = {\n",
100 |     "    \"mae\": mean_absolute_error(y_test, y_pred),\n",
101 |     "    \"mape\": mean_absolute_percentage_error(y_test, y_pred),\n",
102 |     "}\n"
103 |    ]
104 |   },
105 |   {
106 |    "attachments": {},
107 |    "cell_type": "markdown",
108 |    "id": "1fcf7200",
109 |    "metadata": {},
110 |    "source": [
111 |     "with `mlflow` / `mlflavors`:\n",
112 |     "\n",
113 |     "* use `mlflow` context manager `start_run`\n",
114 |     "* results are logged/saved using standard `mlflow.log_params`, `log_metrics`\n",
115 |     "* model is logged/saved using `mlflavors.sktime.log_model`\n",
116 |     "\n",
117 |     "for further use (load), get artefact URI using `get_artifact_uri`"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "id": "a25fb2d2",
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "import json\n",
128 |     "\n",
129 |     "import mlflavors\n",
130 |     "import mlflow\n",
131 |     "from sktime.datasets import load_longley\n",
132 |     "from sktime.forecasting.model_selection import temporal_train_test_split\n",
133 |     "from sktime.forecasting.naive import NaiveForecaster\n",
134 |     "from sktime.performance_metrics.forecasting import (\n",
135 |     "    mean_absolute_error,\n",
136 |     "    mean_absolute_percentage_error,\n",
137 |     ")\n",
138 |     "\n",
139 |     "\n",
140 |     "ARTIFACT_PATH = \"model\"\n",
141 |     "\n",
142 |     "with mlflow.start_run() as run:\n",
143 |     "    y, X = load_longley()\n",
144 |     "    y_train, y_test, X_train, X_test = temporal_train_test_split(y, X)\n",
145 |     "\n",
146 |     "    forecaster = NaiveForecaster()\n",
147 |     "    forecaster.fit(\n",
148 |     "        y_train,\n",
149 |     "        X=X_train,\n",
150 |     "        fh=[1, 2, 3, 4],\n",
151 |     "    )\n",
152 |     "\n",
153 |     "    # Extract parameters\n",
154 |     "    parameters = forecaster.get_params()\n",
155 |     "\n",
156 |     "    # Evaluate model\n",
157 |     "    y_pred = forecaster.predict(X=X_test)\n",
158 |     "    metrics = {\n",
159 |     "        \"mae\": mean_absolute_error(y_test, y_pred),\n",
160 |     "        \"mape\": mean_absolute_percentage_error(y_test, y_pred),\n",
161 |     "    }\n",
162 |     "\n",
163 |     "    print(f\"Parameters: \\n{json.dumps(parameters, indent=2)}\")\n",
164 |     "    print(f\"\\nMetrics: \\n{json.dumps(metrics, indent=2)}\")\n",
165 |     "\n",
166 |     "    # Log parameters and metrics\n",
167 |     "    mlflow.log_params(parameters)\n",
168 |     "    mlflow.log_metrics(metrics)\n",
169 |     "\n",
170 |     "    # Log model to MLflow tracking server\n",
171 |     "    mlflavors.sktime.log_model(\n",
172 |     "        sktime_model=forecaster,\n",
173 |     "        artifact_path=ARTIFACT_PATH,\n",
174 |     "    )\n",
175 |     "    \n",
176 |     "    # Return model uri from the current run\n",
177 |     "    model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH)\n",
178 |     "    \n",
179 |     "# Print the run id wich is used below for serving the model to a local REST API endpoint\n",
180 |     "print(f\"\\nMLflow run id:\\n{run.info.run_id}\")"
181 |    ]
182 |   },
183 |   {
184 |    "attachments": {},
185 |    "cell_type": "markdown",
186 |    "id": "3df09b27",
187 |    "metadata": {},
188 |    "source": [
189 |     "## Viewing the model in the MLflow UI\n",
190 |     "To view the run output in the MLflow UI run the following command:\n",
191 |     "\n",
192 |     "```bash\n",
193 |     "mlflow ui\n",
194 |     "```\n",
195 |     "\n",
196 |     "When opening the MLflow runs detail page the serialized model artifact will show up, such as:\n",
197 |     "\n",
198 |     "![title](../images/tracking_artifact_ui.png)"
199 |    ]
200 |   },
201 |   {
202 |    "attachments": {},
203 |    "cell_type": "markdown",
204 |    "id": "c2f2f7c6",
205 |    "metadata": {},
206 |    "source": [
207 |     "## Loading the model from MLflow"
208 |    ]
209 |   },
210 |   {
211 |    "attachments": {},
212 |    "cell_type": "markdown",
213 |    "id": "d36fdfbf",
214 |    "metadata": {},
215 |    "source": [
216 |     "two options to load and predict:\n",
217 |     "\n",
218 |     "* load in native format using `load_model`, then call method directly\n",
219 |     "* using `pyfunc.load_model` and `predict` with a `DataFrame` configuration to address method"
220 |    ]
221 |   },
222 |   {
223 |    "attachments": {},
224 |    "cell_type": "markdown",
225 |    "id": "7b9c715c",
226 |    "metadata": {},
227 |    "source": [
228 |     "option 1: `load_model`, native"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "id": "b310fcb8",
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "loaded_model = mlflavors.sktime.load_model(model_uri=model_uri)\n",
239 |     "print(loaded_model.predict_interval(fh=[1, 2, 3], X=X_test, coverage=[0.9, 0.95]))"
240 |    ]
241 |   },
242 |   {
243 |    "attachments": {},
244 |    "cell_type": "markdown",
245 |    "id": "6e5ec99a",
246 |    "metadata": {},
247 |    "source": [
248 |     "option 2: `pyfunc` based"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "id": "ea02a4a5",
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": [
258 |     "import pandas as pd\n",
259 |     "\n",
260 |     "# Convert test data to numpy array so it can be passed to pyfunc predict using\n",
261 |     "# a single-row Pandas DataFrame configuration argument\n",
262 |     "X_test_array = X_test.to_numpy()\n",
263 |     "\n",
264 |     "# Create configuration DataFrame for interval forecast with nominal coverage\n",
265 |     "# value [0.9,0.95], future forecast horizon of 3 periods, and exogenous regressor.\n",
266 |     "predict_conf = pd.DataFrame(\n",
267 |     "    [\n",
268 |     "        {\n",
269 |     "            \"fh\": [1, 2, 3],\n",
270 |     "            \"predict_method\": \"predict_interval\",\n",
271 |     "            \"coverage\": [0.9, 0.95],\n",
272 |     "            \"X\": X_test_array,\n",
273 |     "        }\n",
274 |     "    ]\n",
275 |     ")\n",
276 |     "\n",
277 |     "loaded_pyfunc = mlflavors.sktime.pyfunc.load_model(model_uri=model_uri)\n",
278 |     "print(loaded_pyfunc.predict(predict_conf))"
279 |    ]
280 |   },
281 |   {
282 |    "attachments": {},
283 |    "cell_type": "markdown",
284 |    "id": "a07ccb6a",
285 |    "metadata": {},
286 |    "source": [
287 |     "# Serving the model to an endpoint"
288 |    ]
289 |   },
290 |   {
291 |    "attachments": {},
292 |    "cell_type": "markdown",
293 |    "id": "f3949845",
294 |    "metadata": {},
295 |    "source": [
296 |     "for serving at **local REST API endpoint**:\n",
297 |     "\n",
298 |     "```bash\n",
299 |     "mlflow models serve -m runs:/<run_id>/model --env-manager local --host 127.0.0.1\n",
300 |     "```\n",
301 |     "\n",
302 |     "with `run_id` as obtained in the \"save\" step.\n",
303 |     "\n",
304 |     "Then, run the below model scoring script to request a prediction from the served model."
305 |    ]
306 |   },
307 |   {
308 |    "attachments": {},
309 |    "cell_type": "markdown",
310 |    "id": "d889b702",
311 |    "metadata": {},
312 |    "source": [
313 |     "for serving the model to an **endpoint in the cloud** (e.g. Azure ML, AWS SageMaker, etc.):\n",
314 |     "\n",
315 |     "use [MLflow deployment tools](https://mlflow.org/docs/latest/models.html#built-in-deployment-tools)):"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": null,
321 |    "id": "58c11c04",
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "import pandas as pd\n",
326 |     "import requests\n",
327 |     "from sktime.datasets import load_longley\n",
328 |     "from sktime.forecasting.model_selection import temporal_train_test_split\n",
329 |     "\n",
330 |     "y, X = load_longley()\n",
331 |     "y_train, y_test, X_train, X_test = temporal_train_test_split(y, X)\n",
332 |     "\n",
333 |     "# Define local host and endpoint url\n",
334 |     "host = \"127.0.0.1\"\n",
335 |     "url = f\"http://{host}:5000/invocations\"\n",
336 |     "\n",
337 |     "# Model scoring via REST API requires transforming the configuration DataFrame\n",
338 |     "# into JSON format. As numpy ndarray type is not JSON serializable we need to\n",
339 |     "# convert the exogenous regressor into a list. The wrapper instance will convert\n",
340 |     "# the list back to ndarray type as required by sktime predict methods. For more\n",
341 |     "# details read the MLflow deployment API reference.\n",
342 |     "# (https://mlflow.org/docs/latest/models.html#deploy-mlflow-models)\n",
343 |     "X_test_list = X_test.to_numpy().tolist()\n",
344 |     "predict_conf = pd.DataFrame(\n",
345 |     "    [\n",
346 |     "        {\n",
347 |     "            \"fh\": [1, 2, 3],\n",
348 |     "            \"predict_method\": \"predict_interval\",\n",
349 |     "            \"coverage\": [0.9, 0.95],\n",
350 |     "            \"X\": X_test_list,\n",
351 |     "        }\n",
352 |     "    ]\n",
353 |     ")\n",
354 |     "\n",
355 |     "# Create dictionary with pandas DataFrame in the split orientation\n",
356 |     "json_data = {\"dataframe_split\": predict_conf.to_dict(orient=\"split\")}\n",
357 |     "\n",
358 |     "# Score model\n",
359 |     "response = requests.post(url, json=json_data)\n",
360 |     "print(response.json())"
361 |    ]
362 |   },
363 |   {
364 |    "attachments": {},
365 |    "cell_type": "markdown",
366 |    "id": "2d1c59a2",
367 |    "metadata": {},
368 |    "source": [
369 |     "---\n",
370 |     "### Credits: notebook 6 - deploy to production with mlflow / mlflavors\n",
371 |     "\n",
372 |     "notebook creation: benjaminbluhm\n",
373 |     "\n",
374 |     "minor rearranging by fkiraly\n",
375 |     "\n",
376 |     "mlflavors, `sktime` mlflow interface: benjaminbluhm"
377 |    ]
378 |   }
379 |  ],
380 |  "metadata": {
381 |   "kernelspec": {
382 |    "display_name": "Python 3 (ipykernel)",
383 |    "language": "python",
384 |    "name": "python3"
385 |   },
386 |   "language_info": {
387 |    "codemirror_mode": {
388 |     "name": "ipython",
389 |     "version": 3
390 |    },
391 |    "file_extension": ".py",
392 |    "mimetype": "text/x-python",
393 |    "name": "python",
394 |    "nbconvert_exporter": "python",
395 |    "pygments_lexer": "ipython3",
396 |    "version": "3.10.11"
397 |   }
398 |  },
399 |  "nbformat": 4,
400 |  "nbformat_minor": 5
401 | }
402 | 


--------------------------------------------------------------------------------
/notebooks/09_outro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "id": "37997396",
  7 |    "metadata": {},
  8 |    "source": [
  9 |     "# Summary & get involved!"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "attachments": {},
 14 |    "cell_type": "markdown",
 15 |    "id": "9936be02",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "- `sklearn` / `sktime` interface:\n",
 19 |     "  - unified interface for objects/estimators\n",
 20 |     "  - modular design, strategy pattern\n",
 21 |     "  - composable, composites are interface homogenous\n",
 22 |     "  - simple specification language and parameter interface\n",
 23 |     "  - visually informative pretty printing"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "attachments": {},
 28 |    "cell_type": "markdown",
 29 |    "id": "7820cc3c",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "* Forecasting module:\n",
 33 |     "    - Univariate/multivariate forecasting (stats and ML)\n",
 34 |     "    - Use of exogeneous data\n",
 35 |     "    - Probabilistic forecasting\n",
 36 |     "    - Hierarchical forecasting"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "attachments": {},
 41 |    "cell_type": "markdown",
 42 |    "id": "cd081e8c",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "* Transformers & pipelines\n",
 46 |     "\n",
 47 |     "    * sequential pipelines\n",
 48 |     "    * feature engineering, postproc\n",
 49 |     "    * dunders `*`, `+` etc\n",
 50 |     "    * tuning: parameter est., backtest-gridsearch, autoML"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "attachments": {},
 55 |    "cell_type": "markdown",
 56 |    "id": "4a55d803",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "* APIs, engineering & deployment\n",
 60 |     "\n",
 61 |     "    * modular unified framework interface\n",
 62 |     "    * multiple learning tasks, \"what is my task\" guide\n",
 63 |     "    * dependency management at estimator level\n",
 64 |     "    * 3rd party extensible via templates & test suite\n",
 65 |     "    * deploy via `mlflow` / `mlflavors`"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "attachments": {},
 70 |    "cell_type": "markdown",
 71 |    "id": "fbb8d3c5",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "Further reading:\n",
 75 |     "\n",
 76 |     "* main `sktime` [tutorials on binder](https://mybinder.org/v2/gh/sktime/sktime/main?filepath=examples)\n",
 77 |     "* recorded [video tutorials](https://www.youtube.com/playlist?list=PLKs3UgGjlWHqNzu0LEOeLKvnjvvest2d0)\n",
 78 |     "* find a bug or type? [tutorial feedback thread](https://github.com/sktime/sktime/issues/1447)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "attachments": {},
 83 |    "cell_type": "markdown",
 84 |    "id": "b66e01a8",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "---"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "attachments": {},
 92 |    "cell_type": "markdown",
 93 |    "id": "b6da2ae3",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "## Join sktime!\n",
 97 |     "\n",
 98 |     "### Vision statement\n",
 99 |     "\n",
100 |     "* an easy-to-use, easy-to-extend, comprehensive **python framework** for ML and AI with time series\n",
101 |     "* **open source, permissive license, free to use**\n",
102 |     "* **openly and transparently governed**\n",
103 |     "* **friendly, responsive, kind and inclusive** community, with an active commitment to ensure fairness and equal opportunity\n",
104 |     "* an academically and commercially **neutral space**, with an **ecosystem integration** ambition and neutral point of view\n",
105 |     "* an **educational platform**, providing mentoring and upskilling opportunities for all career stages, especially early career\n",
106 |     "\n",
107 |     "https://opendatascience.com/sktime-python-toolbox-for-machine-learning-with-time-series/\n",
108 |     "\n",
109 |     "**EVERYONE CAN JOIN! EVERYONE CAN BECOME A COMMUNITY LEADER!**\n",
110 |     "\n",
111 |     "* join our community discord ([join link](https://discord.com/invite/54ACzaFsn7))!\n",
112 |     "    * **help-desk for Q&A** and getting started as a user!\n",
113 |     "    * **dev-chat** for help getting started with open source!\n",
114 |     "        * contributor [getting started guide](https://github.com/sktime/sktime/issues/1147)\n",
115 |     "        * [good first issues](https://github.com/sktime/sktime/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)\n",
116 |     "* join `sktime`'s user representatives or governance working group\n",
117 |     "    * register as a user ([form](https://forms.gle/eVuzrCjKDRupxawL7)) - roadmap, bugfix prio, elections\n",
118 |     "        * [roadmap 2023-2024 planning](https://github.com/sktime/sktime/issues/4691)\n",
119 |     "    * join [council sessions](https://github.com/sktime/community-org/tree/main/community_council/previous_meetings) and give input\n",
120 |     "\n",
121 |     "Opportunities:\n",
122 |     "\n",
123 |     "* regular **job opportunities**, watch the jobs channel on discord\n",
124 |     "* sktime **mentoring programme**: [link](github.com/sktime/mentoring)\n",
125 |     "    * applications on a rolling basis\n",
126 |     "    * summer programme is starting over next weeks!\n",
127 |     "\n",
128 |     "Events & meetups:\n",
129 |     "\n",
130 |     "* regular **community collaboration sessions**\n",
131 |     "    * meet-ups Fri 3pm UTC on [discord](https://discord.com/invite/54ACzaFsn7)\n",
132 |     "* multiple **Sprints and Dev Days** per year\n",
133 |     "    * [EuroPython 2023, Prague](https://ep2023.europython.eu/), 22-23 July week-end:\n",
134 |     "        * new contributor **onboarding sprint**!\n",
135 |     "        * **user feedback session**!\n",
136 |     "        * **developer meet-up**!\n",
137 |     "\n",
138 |     "Support us if `sktime` has generated value for you!\n",
139 |     "\n",
140 |     "* star us on [GitHub](https://github.com/sktime/sktime)\n",
141 |     "* follow us on [LinkedIn](https://www.linkedin.com/company/scikit-time/)\n",
142 |     "* donate! Every cent helps the time series ecosystem ([GitHub sponsors](https://github.com/sponsors/sktime))"
143 |    ]
144 |   },
145 |   {
146 |    "attachments": {},
147 |    "cell_type": "markdown",
148 |    "id": "adfa5420",
149 |    "metadata": {},
150 |    "source": [
151 |     "---"
152 |    ]
153 |   },
154 |   {
155 |    "attachments": {},
156 |    "cell_type": "markdown",
157 |    "id": "286d4c8d",
158 |    "metadata": {},
159 |    "source": [
160 |     "\n",
161 |     "## Thank you for your attention\n",
162 |     "\n",
163 |     "<img src=\"./img/sktime-logo-text-horizontal.jpg\" alt=\"Sktime Logo\" width=\"1000\"/>"
164 |    ]
165 |   },
166 |   {
167 |    "attachments": {},
168 |    "cell_type": "markdown",
169 |    "id": "2d1c59a2",
170 |    "metadata": {},
171 |    "source": [
172 |     "---\n",
173 |     "### Credits: sktime\n",
174 |     "\n",
175 |     "#### many thanks to [all `sktime` contributors!](https://www.sktime.net/en/latest/about/contributors.html)\n",
176 |     "\n",
177 |     "Citations & credits in academic research papers:\n",
178 |     "\n",
179 |     "`sktime` toolbox:\n",
180 |     " [sktime: A unified interface for machine learning with time series](https://arxiv.org/abs/1909.07872)\n",
181 |     "\n",
182 |     "`sktime` design principles: [Designing machine learning toolboxes: Concepts, principles and patterns](https://arxiv.org/abs/2101.04938)"
183 |    ]
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python 3 (ipykernel)",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.10.11"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 5
207 | }
208 | 


--------------------------------------------------------------------------------
/notebooks/hierarchical_demo_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sktime.utils._testing.hierarchical import _make_hierarchical
 3 | 
 4 | 
 5 | def load_product_hierarchy():
 6 |     # Get daily historic sales and rename columns and indexes according to hierarchy above
 7 |     n_years = 5
 8 |     y = (
 9 |         _make_hierarchical(
10 |             hierarchy_levels=(2, 4),
11 |             min_timepoints=365 * n_years,
12 |             max_timepoints=365 * n_years,
13 |             random_state=0,
14 |         )
15 |         .drop(
16 |             index=[
17 |                 ("h0_0", "h1_2"),
18 |                 ("h0_0", "h1_3"),
19 |                 ("h0_1", "h1_0"),
20 |                 ("h0_1", "h1_1"),
21 |             ]
22 |         )
23 |         .rename(
24 |             index={
25 |                 "h0_0": "Food preparation",
26 |                 "h0_1": "Food preservation",
27 |                 "h1_0": "Hobs",
28 |                 "h1_1": "Ovens",
29 |                 "h1_2": "Fridges",
30 |                 "h1_3": "Freezers",
31 |             }
32 |         )
33 |         .reset_index()
34 |         .rename(
35 |             columns={
36 |                 "h0": "Product line",
37 |                 "h1": "Product group",
38 |                 "time": "Date",
39 |                 "c0": "Sales",
40 |             }
41 |         )
42 |     )
43 | 
44 |     # Set date as monthly as sales as int and aggregate date
45 |     y["Date"] = y["Date"].dt.to_period("M")
46 |     y = y.groupby(by=["Product line", "Product group", "Date"]).sum()
47 | 
48 |     # Add noise to have different time series
49 |     noise = np.random.RandomState(seed=0).normal(1, 0.3, np.shape(y))
50 |     y = (y * noise).round(0)
51 | 
52 |     return y
53 | 


--------------------------------------------------------------------------------
/notebooks/img/ask_chatgpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ask_chatgpt.png


--------------------------------------------------------------------------------
/notebooks/img/estimator-conceptual-model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/estimator-conceptual-model.jpg


--------------------------------------------------------------------------------
/notebooks/img/implementing_estimators.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/implementing_estimators.jpg


--------------------------------------------------------------------------------
/notebooks/img/implementing_estimators.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/implementing_estimators.png


--------------------------------------------------------------------------------
/notebooks/img/sklearn-unified-interface.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/sklearn-unified-interface.jpg


--------------------------------------------------------------------------------
/notebooks/img/sktime-logo-text-horizontal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/sktime-logo-text-horizontal.jpg


--------------------------------------------------------------------------------
/notebooks/img/tasks-forecasting.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-forecasting.jpg


--------------------------------------------------------------------------------
/notebooks/img/tasks-forecasting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-forecasting.png


--------------------------------------------------------------------------------
/notebooks/img/tasks-tsc-large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-tsc-large.png


--------------------------------------------------------------------------------
/notebooks/img/tasks-tsc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-tsc.png


--------------------------------------------------------------------------------
/notebooks/img/ts-tasks.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ts-tasks.jpg


--------------------------------------------------------------------------------
/notebooks/img/ts-tasks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ts-tasks.png


--------------------------------------------------------------------------------
/notebooks/img/unified_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/unified_framework.png


--------------------------------------------------------------------------------
/notebooks/img/verdena_shapelet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/verdena_shapelet.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sktime[all_extras]==0.20.1
2 | mlflavors==0.1.0
3 | 


--------------------------------------------------------------------------------