├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── images
├── hierarchy.png
├── sktime-logo-text-horizontal.png
├── tabularization.png
├── tracking_artifact_ui.png
└── ts_quiz.png
├── notebooks
├── 01_introduction.ipynb
├── 02_timeseries.ipynb
├── 03_forecasting.ipynb
├── 04_feateng_pipe_tune.ipynb
├── 05_panel_tasks.ipynb
├── 06_distances_kernels_alignment.ipynb
├── 07_MLengineering.ipynb
├── 08_mlflow.ipynb
├── 09_outro.ipynb
├── hierarchical_demo_utils.py
└── img
│ ├── ask_chatgpt.png
│ ├── estimator-conceptual-model.jpg
│ ├── implementing_estimators.jpg
│ ├── implementing_estimators.png
│ ├── sklearn-unified-interface.jpg
│ ├── sktime-logo-text-horizontal.jpg
│ ├── tasks-forecasting.jpg
│ ├── tasks-forecasting.png
│ ├── tasks-tsc-large.png
│ ├── tasks-tsc.png
│ ├── ts-tasks.jpg
│ ├── ts-tasks.png
│ ├── unified_framework.png
│ └── verdena_shapelet.png
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # IDE
132 | .vscode/
133 |
134 | # MacOS
135 | .DS_Store
136 | *.csv
137 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 |
3 | - repo: https://github.com/pre-commit/pre-commit-hooks
4 | rev: v4.0.1
5 | hooks:
6 | - id: check-added-large-files
7 | args: ['--maxkb=1000']
8 | - id: check-case-conflict
9 | - id: check-merge-conflict
10 | - id: check-symlinks
11 | - id: check-yaml
12 | - id: debug-statements
13 | - id: end-of-file-fixer
14 | - id: fix-encoding-pragma
15 | - id: requirements-txt-fixer
16 | - id: trailing-whitespace
17 |
18 | - repo: https://github.com/pycqa/isort
19 | rev: 5.8.0
20 | hooks:
21 | - id: isort
22 | name: isort (python)
23 |
24 | - repo: https://github.com/psf/black
25 | rev: 21.5b1
26 | hooks:
27 | - id: black
28 | language_version: python3
29 | # args: [--line-length 79]
30 |
31 | - repo: https://github.com/pycqa/flake8
32 | rev: 3.9.2
33 | hooks:
34 | - id: flake8
35 | exclude: docs/conf.py
36 | additional_dependencies: [flake8-bugbear, flake8-print]
37 | args: ["--max-line-length=88"]
38 |
39 | - repo: https://github.com/nbQA-dev/nbQA
40 | rev: 0.13.0
41 | hooks:
42 | - id: nbqa-black
43 | args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells]
44 | additional_dependencies: [black==20.8b1]
45 | # - id: nbqa-isort
46 | # args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells]
47 | # additional_dependencies: [isort==5.6.4]
48 | - id: nbqa-flake8
49 | args: [--nbqa-dont-skip-bad-cells, "--extend-ignore=E402,E203", "--max-line-length=88"]
50 | additional_dependencies: [flake8==3.8.3]
51 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2023 The sktime developers.
4 |
5 | All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | * Neither the name of the copyright holder nor the names of its
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | Welcome to the sktime tutorial at Europython 2023
4 | =================================================
5 |
6 | This tutorial is about [sktime] - a unified framework for machine learning with time series. sktime contains algorithms and tools for building, applying, evaluating modular pipelines and composites for a variety of time series learning tasks, including forecasting, classification, regression.
7 |
8 | `sktime` is easily extensible by anyone, and interoperable with the python data science stack.
9 |
10 | This is an introductory `sktime` half-day tutorial with:
11 |
12 | * a general introduction to `sktime`
13 | * forecasting with `sktime` - uni/multivariate, hierarchical/global, probabilistic
14 | * feature extraction, transformation pipelines, parameter tuning, autoML
15 | * time series classification, regression, and clustering with `sktime`
16 | * customizing time series distances, kernels, time series aligners and alignment distances
17 | * engineering topics:
18 | * APIs, estimator and dependency management
19 | * writing `sktime` compatible 3rd party estimators
20 | * deploying `sktime` in production using `mlflow` with the `mlflavours` plugin
21 |
22 | [sktime]: https://sktime.net
23 |
24 | [](https://mybinder.org/v2/gh/sktime/sktime-tutorial-europython-2023/main?filepath=notebooks) [](https://discord.com/invite/54ACzaFsn7) [](https://www.linkedin.com/company/scikit-time/)
25 |
26 | ## :rocket: How to get started
27 |
28 | In the tutorial, we will move through notebooks section by section.
29 |
30 | You have different options how to run the tutorial notebooks:
31 |
32 | * Run the notebooks in the cloud on [Binder] - for this you don't have to install anything!
33 | * Run the notebooks on your machine. [Clone] this repository, get [conda], install the required packages (`sktime`, `seaborn`, `jupyter`) in an environment, and open the notebooks with that environment. For detail instructions, see below. For troubleshooting, see sktime's more detailed [installation instructions].
34 | * or, use python venv, and/or an editable install of this repo as a package. Instructions below.
35 |
36 | [Binder]: https://mybinder.org/v2/gh/sktime/sktime-tutorial-europython-2023/main?filepath=notebooks
37 | [clone]: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository
38 | [conda]: https://docs.conda.io/en/latest/
39 | [installation instructions]: https://www.sktime.net/en/latest/installation.html
40 |
41 | Please let us know on the [sktime discord](https://discord.com/invite/54ACzaFsn7) if you have any issues during the conference, or join to ask for help anytime.
42 |
43 | ## :bulb: Description
44 |
45 | This tutorial presents [sktime] - a unified framework for machine learning with time series. sktime covers multiple time series learning problems, including time series transformation, classification and forecasting, among others.`sktime` allows you to easily apply an algorithm for one task to solve another (e.g. a scikit-learn regressor to solve a forecasting problem). In the tutorial, you will learn about how you can identify these problems, what their key differences are and how they are related.
46 |
47 | `sktime` provides various time series algorithms and modular composition tools for pipelining, ensembling and tuning.
48 | `sktime` also provides API compatible interfaces to many popular libraries, such as `statsmodels`, `prophet`, `statsforecast`, `tslearn`, `tsfresh`, etc,
49 | which can be readily combined using `sktime` composition patterns.
50 |
51 | In this tutorial, you will learn how to use, combine, tune and evaluate different algorithms on real-world data sets.
52 | The tutorial consists of step-by-step using Jupyter Notebooks.
53 |
54 | `sktime` not just a package, but also an active community which aims to be welcoming to new joiners.
55 | We invite anyone to get involved as a developer, user, supporter (or any combination of these).
56 |
57 | ## :movie_camera: Other Tutorials:
58 |
59 | - [Pydata Berlin 2022 - Advanced Forecasting Tutorial](https://www.youtube.com/watch?v=4Rf9euAhjNc)
60 |
61 | - [Pydata London 2022 - How to implement your own estimator in sktime](https://www.youtube.com/watch?v=S_3ewcvs_pg)
62 |
63 | - [Pydata Global 2022 - Feature extraction, Pipelines, Tuning](https://github.com/sktime/sktime-tutorial-pydata-global-2022)
64 |
65 | - [Pydata London 2023 - Time Series Classification, Regression, Distances & Kernels](https://github.com/sktime/sktime-tutorial-pydata-london-2023)
66 |
67 | ## :wave: How to contribute
68 |
69 | If you're interested in contributing to sktime, you can find out more how to get involved [here](https://www.sktime.net/en/latest/get_involved.html).
70 |
71 | Any contributions are welcome, not just code!
72 |
73 | We also invite everyone to the "getting started with contributions" onboarding feature at the community sprint at EuroPython (July 22-23)!
74 |
75 | ## Installation instructions for local use
76 |
77 | To run the notebooks locally, you will need:
78 |
79 | * a local repository clone
80 | * a python environment with required packages installed
81 |
82 | ### Cloning the repository
83 |
84 | To clone the repository locally:
85 |
86 | `git clone https://github.com/sktime/sktime-tutorial-europython-2023.git`
87 |
88 | ### Using conda env
89 |
90 | 1. Create a python virtual environment:
91 | `conda create -y -n europython_sktime python=3.9`
92 | 2. Install required packages:
93 | `conda install -y -n europython_sktime pip sktime seaborn jupyter pmdarima statsmodels dtw-python`
94 | 3. Activate your environment:
95 | `conda activate europython_sktime`
96 | 4. If using jupyter: make the environment available in jupyter:
97 | `python -m ipykernel install --user --name=europython_sktime`
98 |
99 | ### Using python venv
100 |
101 | 1. Create a python virtual environment:
102 | `python -m venv europython_sktime`
103 | 2. Activate your environment:
104 | `source europython_sktime/bin/activate`
105 | 3. Install the requirements:
106 | `pip install sktime seaborn jupyter pmdarima statsmodels dtw-python`
107 | 4. If using jupyter: make the environment available in jupyter:
108 | `python -m ipykernel install --user --name=europython_sktime`
109 |
--------------------------------------------------------------------------------
/images/hierarchy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/hierarchy.png
--------------------------------------------------------------------------------
/images/sktime-logo-text-horizontal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/sktime-logo-text-horizontal.png
--------------------------------------------------------------------------------
/images/tabularization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/tabularization.png
--------------------------------------------------------------------------------
/images/tracking_artifact_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/tracking_artifact_ui.png
--------------------------------------------------------------------------------
/images/ts_quiz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/images/ts_quiz.png
--------------------------------------------------------------------------------
/notebooks/01_introduction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | ""
9 | ]
10 | },
11 | {
12 | "attachments": {},
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### Agenda for today\n",
17 | "\n",
18 | "1. General introduction to `sktime` & `sklearn`\n",
19 | "\n",
20 | "2. forecasting with `sktime`\n",
21 | "\n",
22 | "3. feature extraction, tuning, autoML\n",
23 | "\n",
24 | "4. time series classification, regression, and clustering with `sktime`\n",
25 | "\n",
26 | "5. customizing time series distances, kernels, time series aligners and alignment distances\n",
27 | "\n",
28 | "6. overview from ML eng & API perspective - estimators modules, learning tasks, library\n",
29 | "\n",
30 | "7. deployment with `mlflow` / `mlflavors`"
31 | ]
32 | },
33 | {
34 | "attachments": {},
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "### Running the notebooks"
39 | ]
40 | },
41 | {
42 | "attachments": {},
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "all notebooks available on `github.com/sktime`\n",
47 | "\n",
48 | "repository: `github.com/sktime/sktime-tutorial-europython-2023`\n",
49 | "\n",
50 | "* README instructions to run notebooks locally\n",
51 | "* binder to run notebooks in the cloud (if wifi allows)\n",
52 | "\n",
53 | "help, Q&A, developer chat in EuroPython [`sktime` tutorial discord thread](https://discord.com/channels/1120766458528542794/1130170803137282118)"
54 | ]
55 | },
56 | {
57 | "attachments": {},
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "## 1 - Introduction to ``sktime``\n",
62 | "\n",
63 | "### 1.1 What is ``sktime``?\n",
64 | "\n",
65 | "- `sktime` is a python library for time series learning tasks!\n",
66 | " - check [our website](https://www.sktime.net/en/latest/index.html)!\n",
67 | " - integrative framework layer in the time series space"
68 | ]
69 | },
70 | {
71 | "attachments": {},
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "- `sklearn` / `sktime` interface:\n",
76 | " - unified interface for objects/estimators\n",
77 | " - modular design, strategy pattern\n",
78 | " - composable, composites are interface homogenous\n",
79 | " - simple specification language and parameter interface"
80 | ]
81 | },
82 | {
83 | "attachments": {},
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "- `sktime` is a vibrant, welcoming community with mentoring opportunities!\n",
88 | " - We *love* new contributors. Especially if you are new to open source!\n",
89 | " - join the ``sktime`` sprint this week-end at EuroPython! Gentle intro to contributing!\n",
90 | " - Check out the ``sktime`` [new contributors guide](https://www.sktime.net/en/latest/get_involved/contributing.html)\n",
91 | " - join our [discord](https://discord.com/invite/54ACzaFsn7) and/or one of our regular meetups!\n",
92 | " - follow us on [LinkedIn](https://www.linkedin.com/company/scikit-time/)!\n",
93 | " - star us on [GitHub](https://github.com/sktime/sktime)!"
94 | ]
95 | },
96 | {
97 | "attachments": {},
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "### 1.2 sklearn unified interface - the strategy pattern\n",
102 | "\n",
103 | "`sklearn` provides a unified interface to multiple learning tasks including classification, regression.\n",
104 | "\n",
105 | "any (supervised) estimator has the following interface points\n",
106 | "\n",
107 | "1. **Instantiate** your model of choice, with parameter settings\n",
108 | "2. **Fit** the instance of your model\n",
109 | "3. Use that fitted instance to **predict** new data!\n",
110 | "\n",
111 | ""
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 1,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "# get data to use the model on\n",
121 | "from sklearn.datasets import load_iris\n",
122 | "from sklearn.model_selection import train_test_split\n",
123 | "\n",
124 | "X, y = load_iris(return_X_y=True, as_frame=True)\n",
125 | "X_train, X_test, y_train, y_test = train_test_split(X, y)\n",
126 | "\n",
127 | "# data are pd.DataFrame/pd.Series\n",
128 | "# X_train.shape = (112, 4) - 112 train flowers times 4 features\n",
129 | "# sepal length, sepal width, petal length, petal width\n",
130 | "# y_train.shape = (112,) - 112 categorical labels - one of 3 flower types 0, 1, 2\n",
131 | "# X_test.shape = (38, 4) - 38 test flowers times 4 features\n",
132 | "# y_test.shape = (38,) - 38 cagetorical labels - one of 3 flower types\n"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 2,
138 | "metadata": {},
139 | "outputs": [
140 | {
141 | "data": {
142 | "text/plain": [
143 | "array([2, 1, 2, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 0, 1, 1, 0, 2, 0, 0, 2, 2,\n",
144 | " 0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2])"
145 | ]
146 | },
147 | "execution_count": 2,
148 | "metadata": {},
149 | "output_type": "execute_result"
150 | }
151 | ],
152 | "source": [
153 | "from sklearn.svm import SVC\n",
154 | "\n",
155 | "# 1. Instantiate SVC with parameters gamma, C\n",
156 | "clf = SVC(gamma=0.001, C=100.)\n",
157 | "# clf is an instance of SVC now\n",
158 | "\n",
159 | "# 2. Fit clf to training data = 112 feature/label pairs\n",
160 | "clf.fit(X_train, y_train)\n",
161 | "# clf changes state to \"fitted\", computes model\n",
162 | "\n",
163 | "# 3. Predict labels on test data = 38 feature vectors\n",
164 | "y_test_pred = clf.predict(X_test)\n",
165 | "# produces predictions for the test data, 38 labels\n",
166 | "\n",
167 | "y_test_pred\n",
168 | "# y_test_pred.shape = (38,)"
169 | ]
170 | },
171 | {
172 | "attachments": {},
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "IMPORTANT: to use another classifier, only the specification line, part 1 changes!\n",
177 | "\n",
178 | "`SVC` could have been `RandomForest`, steps 2 and 3 remain the same - unified interface:"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 3,
184 | "metadata": {},
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/plain": [
189 | "array([2, 1, 2, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 0, 1, 1, 0, 2, 0, 0, 2, 2,\n",
190 | " 0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2])"
191 | ]
192 | },
193 | "execution_count": 3,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "from sklearn.ensemble import RandomForestClassifier\n",
200 | "\n",
201 | "# 1. Instantiate SVC with parameters gamma, C\n",
202 | "clf = RandomForestClassifier(n_estimators=100)\n",
203 | "\n",
204 | "# 2. Fit clf to training data\n",
205 | "clf.fit(X_train, y_train)\n",
206 | "\n",
207 | "# 3. Predict labels on test data\n",
208 | "y_test_pred = clf.predict(X_test)\n",
209 | "\n",
210 | "y_test_pred"
211 | ]
212 | },
213 | {
214 | "attachments": {},
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "in object oriented design terminology, this is called **\"strategy pattern\"**\n",
219 | "\n",
220 | "= different estimators can be switched out without change to the interface\n",
221 | "\n",
222 | "= like a power plug adapter, it's plug&play if it conforms with the interface\n",
223 | "\n",
224 | "Pictorial summary:\n",
225 | ""
226 | ]
227 | },
228 | {
229 | "attachments": {},
230 | "cell_type": "markdown",
231 | "metadata": {},
232 | "source": [
233 | "`sklearn` estimators are parametric:\n",
234 | "\n",
235 | "all parameters in the \"blueprint\" accessed and set via `get_params`, `set_params`:"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 4,
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "data": {
245 | "text/plain": [
246 | "{'bootstrap': True,\n",
247 | " 'ccp_alpha': 0.0,\n",
248 | " 'class_weight': None,\n",
249 | " 'criterion': 'gini',\n",
250 | " 'max_depth': None,\n",
251 | " 'max_features': 'sqrt',\n",
252 | " 'max_leaf_nodes': None,\n",
253 | " 'max_samples': None,\n",
254 | " 'min_impurity_decrease': 0.0,\n",
255 | " 'min_samples_leaf': 1,\n",
256 | " 'min_samples_split': 2,\n",
257 | " 'min_weight_fraction_leaf': 0.0,\n",
258 | " 'n_estimators': 100,\n",
259 | " 'n_jobs': None,\n",
260 | " 'oob_score': False,\n",
261 | " 'random_state': None,\n",
262 | " 'verbose': 0,\n",
263 | " 'warm_start': False}"
264 | ]
265 | },
266 | "execution_count": 4,
267 | "metadata": {},
268 | "output_type": "execute_result"
269 | }
270 | ],
271 | "source": [
272 | "clf.get_params()"
273 | ]
274 | },
275 | {
276 | "attachments": {},
277 | "cell_type": "markdown",
278 | "metadata": {},
279 | "source": [
280 | "### 1.3 `sktime` is devoted to time-series data analysis\n",
281 | "\n",
282 | "Richer space of time series tasks, compared to \"tabular\":\n",
283 | "\n",
284 | "- **Forecasting** - predict energy consumption tomorrow, based on past weeks\n",
285 | "- **Classification** - classify electrocardiograms to healthy/sick, based on prior examples\n",
286 | "- **Regression** - predict compound purity in bioreactor based on temperature/pressure profile\n",
287 | "- **Clustering** - sort outlines of tree leaves into a small number of similar classes\n",
288 | "- **Annotation** - identify jumps, anomalies, events in a data stream"
289 | ]
290 | },
291 | {
292 | "attachments": {},
293 | "cell_type": "markdown",
294 | "metadata": {},
295 | "source": [
296 | "`sktime` aims to provide `sklearn`-like, modular, composable, interfaces for these!"
297 | ]
298 | },
299 | {
300 | "attachments": {},
301 | "cell_type": "markdown",
302 | "metadata": {},
303 | "source": [
304 | "| Task | Status | Links |\n",
305 | "|---|---|---|\n",
306 | "| **Forecasting** | stable | [Tutorial](https://www.sktime.net/en/latest/examples/01_forecasting.html) · [API Reference](https://www.sktime.net/en/latest/api_reference/forecasting.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/forecasting.py) |\n",
307 | "| **Time Series Classification** | stable | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/02_classification.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/classification.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/classification.py) |\n",
308 | "| **Time Series Regression** | stable | [API Reference](https://www.sktime.net/en/latest/api_reference/regression.html) |\n",
309 | "| **Transformations** | stable | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/03_transformers.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/transformations.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/transformer.py) |\n",
310 | "| **Parameter fitting** | maturing | [API Reference](https://www.sktime.net/en/latest/api_reference/param_est.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/transformer.py) |\n",
311 | "| **Time Series Clustering** | maturing | [API Reference](https://www.sktime.net/en/latest/api_reference/clustering.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/clustering.py) |\n",
312 | "| **Time Series Distances/Kernels** | maturing | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/03_transformers.ipynb) · [API Reference](https://www.sktime.net/en/latest/api_reference/dists_kernels.html) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/dist_kern_panel.py) |\n",
313 | "| **Annotation** | experimental | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/annotation.py) |\n",
314 | "| **Distributions and simulation** | experimental | |"
315 | ]
316 | },
317 | {
318 | "attachments": {},
319 | "cell_type": "markdown",
320 | "metadata": {},
321 | "source": [
322 | "Example - forecasting"
323 | ]
324 | },
325 | {
326 | "attachments": {},
327 | "cell_type": "markdown",
328 | "metadata": {},
329 | "source": [
330 | ""
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 5,
336 | "metadata": {},
337 | "outputs": [],
338 | "source": [
339 | "from sktime.datasets import load_airline\n",
340 | "from sktime.forecasting.naive import NaiveForecaster\n",
341 | "import numpy as np\n",
342 | "\n",
343 | "# step 1: data specification\n",
344 | "y = load_airline()\n",
345 | "# y = pandas.Series, y.shape = (150,)\n",
346 | "# 150 months of monthly observations\n",
347 | "\n",
348 | "# step 2: specifying forecasting horizon\n",
349 | "fh = np.arange(1, 37)\n",
350 | "# we want to forecast 36 months = 3 years\n",
351 | "\n",
352 | "# step 3: specifying the forecasting algorithm\n",
353 | "forecaster = NaiveForecaster(strategy=\"last\", sp=12)\n",
354 | "\n",
355 | "# step 4: fitting the forecaster\n",
356 | "forecaster.fit(y)\n",
357 | "\n",
358 | "# step 5: querying predictions\n",
359 | "y_pred = forecaster.predict(fh)\n",
360 | "# y_pred is a \"continuation\" of y\n",
361 | "# y_pred.shape = (36,) = forecasts for 36 months"
362 | ]
363 | },
364 | {
365 | "attachments": {},
366 | "cell_type": "markdown",
367 | "metadata": {},
368 | "source": [
369 | "Example - classification"
370 | ]
371 | },
372 | {
373 | "attachments": {},
374 | "cell_type": "markdown",
375 | "metadata": {},
376 | "source": [
377 | ""
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 6,
383 | "metadata": {},
384 | "outputs": [],
385 | "source": [
386 | "from sktime.datasets import load_osuleaf\n",
387 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
388 | "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n",
389 | "from sktime.dists_kernels import ScipyDist\n",
390 | "\n",
391 | "# step 1 - specify training data\n",
392 | "X_train, y_train = load_osuleaf(split=\"train\", return_type=\"numpy3D\")\n",
393 | "\n",
394 | "# step 2 - specify data to predict labels for\n",
395 | "X_new, _ = load_osuleaf(split=\"test\", return_type=\"numpy3D\")\n",
396 | "X_new = X_new[:2]\n",
397 | "\n",
398 | "# step 3 - specify the classifier\n",
399 | "mean_eucl_dist = FlatDist(ScipyDist())\n",
400 | "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=mean_eucl_dist)\n",
401 | "\n",
402 | "# step 4 - fitting the classifier\n",
403 | "clf.fit(X_train, y_train)\n",
404 | "\n",
405 | "# step 5 - predict labels on new data\n",
406 | "y_pred = clf.predict(X_new)"
407 | ]
408 | },
409 | {
410 | "attachments": {},
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "### 1.4 `sktime` integrates the time series modelling ecosystem!\n",
415 | "\n",
416 | "the package space for time series is highly fragmented:\n",
417 | "\n",
418 | "* lots of great implementations and methods out there!\n",
419 | "* but many different interfaces, not composable like `sklearn`\n",
420 | "\n",
421 | "`sktime` integrates the ecosystem - in friendly collaboration with all the packages out there!\n",
422 | "\n",
423 | "* unified interface standard\n",
424 | "* highly composable\n",
425 | "* mini-package manager on estiator/module level\n",
426 | "* easily extensible - 3rd party plugins, other packages\n",
427 | "\n",
428 | ""
429 | ]
430 | },
431 | {
432 | "attachments": {},
433 | "cell_type": "markdown",
434 | "metadata": {},
435 | "source": [
436 | "### 1.5 Summary/What is next!\n",
437 | "\n",
438 | "- `sklearn` interface: unified interface (strategy pattern), modular, composition stable, easy specification language\n",
439 | "- `sktime` evolves the interface for time series learning tasks\n",
440 | "- `sktime` integrates a fragmented ecosytem with interface, composability, dependency management\n",
441 | "\n",
442 | "- today:\n",
443 | " * deep dive forecasting\n",
444 | " * feature extraction, tuning, autoML\n",
445 | " * deep dive classification and panel tasks regression, clustering\n",
446 | " * time series distances, kernels, alignment\n",
447 | " * engineering/API perspective, deployment"
448 | ]
449 | },
450 | {
451 | "attachments": {},
452 | "cell_type": "markdown",
453 | "metadata": {},
454 | "source": [
455 | "---\n",
456 | "### Credits: notebook 1 - `sktime` intro\n",
457 | "\n",
458 | "notebook creation: fkiraly, marrov\n",
459 | "\n",
460 | "some vignettes based on existing `sktime` tutorials, credit: fkiraly, miraep8\n",
461 | "\n",
462 | "slides (png/jpg):\n",
463 | "\n",
464 | "* from fkiraly's postgraduate course at UCL, Principles and Patterns in Data Scientific Software Engineering\n",
465 | "* ecosystem slide: fkiraly, mloning\n",
466 | "* learning tasks: fkiraly, mloning\n",
467 | "\n",
468 | "General credit also to `sklearn` and `sktime` contributors"
469 | ]
470 | }
471 | ],
472 | "metadata": {
473 | "kernelspec": {
474 | "display_name": "Python 3.8.13 ('pydata22')",
475 | "language": "python",
476 | "name": "python3"
477 | },
478 | "language_info": {
479 | "codemirror_mode": {
480 | "name": "ipython",
481 | "version": 3
482 | },
483 | "file_extension": ".py",
484 | "mimetype": "text/x-python",
485 | "name": "python",
486 | "nbconvert_exporter": "python",
487 | "pygments_lexer": "ipython3",
488 | "version": "3.11.3"
489 | },
490 | "orig_nbformat": 4,
491 | "vscode": {
492 | "interpreter": {
493 | "hash": "e61b44dca3bf47c8973c8cd627825697e2dad493e19dd6592afda0a0a3c312a0"
494 | }
495 | }
496 | },
497 | "nbformat": 4,
498 | "nbformat_minor": 2
499 | }
500 |
--------------------------------------------------------------------------------
/notebooks/05_panel_tasks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "### Overview of this notebook\n",
9 | "\n",
10 | "* Introduction to time series classification, regression, clustering\n",
11 | "* `sktime` data format fo \"time series panels\" = collections of time series\n",
12 | "* Basic vignettes for TSC, TSR, TSCl\n",
13 | "* Advanced vignettes - pipelines, ensembles, tuning"
14 | ]
15 | },
16 | {
17 | "attachments": {},
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 5. Learning tasks - Classification, Regression, Clustering & more \n",
22 | "\n",
23 | "deal with *collections of time series* = \"panel data\"\n",
24 | "\n",
25 | "Classification = try to assign one *category* per time series, after training on time series/category examples\n",
26 | "\n",
27 | "Example: daily energy consumption profile over time - Predict season, e.g., winter/summer, or type of consumer\n",
28 | "\n",
29 | "Regression = try to assign one *category* per time series, after training on time series/category examples\n",
30 | "\n",
31 | "Example: temperature/pressure/time profile of chemical reactor. Predict total purity (fraction of 1)\n",
32 | "\n",
33 | "Clustering = put different time series in a small number of similarity buckets"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 1,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "import numpy as np\n",
43 | "import pandas as pd\n",
44 | "\n",
45 | "# Increase display width\n",
46 | "pd.set_option('display.width', 1000)"
47 | ]
48 | },
49 | {
50 | "attachments": {},
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "## 5.1 Panel data - `sktime` data formats \n",
55 | "\n",
56 | "`Panel` abstract data type = values observed for:\n",
57 | "\n",
58 | "* `instance`, e.g., patient\n",
59 | "* `variable`, e.g., blood pressure, body temperatire\n",
60 | "* `time`/`index`, e.g., January 12, 2023 (usually but not necessarily a time index!)\n",
61 | "\n",
62 | "One value X is: \"patient A had blood pressure X on January 12, 2023\"\n",
63 | "\n",
64 | "time series classification, regression, clustering: slices `Panel` data by instance"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "\n",
72 | "Preferred format 1: `pd.DataFrame` with 2-level `MultiIndex`, (instance, time), cols= variables\n",
73 | "\n",
74 | "Preferred format 2: 3D `np.ndarray` with index (instance, variable, time)\n",
75 | "\n",
76 | "* `sktime` supports and recognizes multiple data formats for convenience and internal use, e.g., `dask`, `xarray`\n",
77 | "* abstract data type = \"scitype\"; in-memory specification = \"mtype\"\n",
78 | "* More information in tutorial on [in-memory data representations and data loading](https://www.sktime.net/en/latest/examples/AA_datatypes_and_datasets.html#In-memory-data-representations-and-data-loading)"
79 | ]
80 | },
81 | {
82 | "attachments": {},
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "### 5.1.1 preferred format 1 - `pd-multiindex` specification"
87 | ]
88 | },
89 | {
90 | "attachments": {},
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "`pd-multiindex` = `pd.DataFrame` with 2-level `MultiIndex`, (instance, time), cols= variables"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 2,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "from sktime.datasets import load_italy_power_demand\n",
104 | "\n",
105 | "# load an example time series panel in pd-multiindex mtype\n",
106 | "X, _ = load_italy_power_demand(return_type=\"pd-multiindex\")\n",
107 | "\n",
108 | "# renaming columns for illustrative purposes\n",
109 | "X.columns = [\"total_power_demand\"]\n",
110 | "X.index.names = [\"day_ID\", \"hour_of_day\"]"
111 | ]
112 | },
113 | {
114 | "attachments": {},
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "The Italy power demand dataset has:\n",
119 | "\n",
120 | "* 1096 individual time series instances = single days of total power demand (mean subtracted)\n",
121 | "* one single variable per time series instances, `total_power_demand`\n",
122 | " * total power demand on that day, in that hourly period\n",
123 | " * hence a univariate dataset\n",
124 | "* individual time series are observed at 24 time (period) points (the same number for all instances)\n",
125 | "\n",
126 | "In the dataset, days are jumbled and of different scope (independent sampling).\n",
127 | "* considered independent\n",
128 | "* for task, e.g., \"identify season or weekday/week-end from pattern\""
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 3,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "data": {
138 | "text/html": [
139 | "
\n",
140 | "\n",
153 | "
\n",
154 | " \n",
155 | " \n",
156 | " | \n",
157 | " | \n",
158 | " total_power_demand | \n",
159 | "
\n",
160 | " \n",
161 | " day_ID | \n",
162 | " hour_of_day | \n",
163 | " | \n",
164 | "
\n",
165 | " \n",
166 | " \n",
167 | " \n",
168 | " 0 | \n",
169 | " 0 | \n",
170 | " -0.710518 | \n",
171 | "
\n",
172 | " \n",
173 | " 1 | \n",
174 | " -1.183320 | \n",
175 | "
\n",
176 | " \n",
177 | " 2 | \n",
178 | " -1.372442 | \n",
179 | "
\n",
180 | " \n",
181 | " 3 | \n",
182 | " -1.593083 | \n",
183 | "
\n",
184 | " \n",
185 | " 4 | \n",
186 | " -1.467002 | \n",
187 | "
\n",
188 | " \n",
189 | " ... | \n",
190 | " ... | \n",
191 | " ... | \n",
192 | "
\n",
193 | " \n",
194 | " 1095 | \n",
195 | " 19 | \n",
196 | " 0.180490 | \n",
197 | "
\n",
198 | " \n",
199 | " 20 | \n",
200 | " -0.094058 | \n",
201 | "
\n",
202 | " \n",
203 | " 21 | \n",
204 | " 0.729587 | \n",
205 | "
\n",
206 | " \n",
207 | " 22 | \n",
208 | " 0.210995 | \n",
209 | "
\n",
210 | " \n",
211 | " 23 | \n",
212 | " -0.002542 | \n",
213 | "
\n",
214 | " \n",
215 | "
\n",
216 | "
26304 rows × 1 columns
\n",
217 | "
"
218 | ],
219 | "text/plain": [
220 | " total_power_demand\n",
221 | "day_ID hour_of_day \n",
222 | "0 0 -0.710518\n",
223 | " 1 -1.183320\n",
224 | " 2 -1.372442\n",
225 | " 3 -1.593083\n",
226 | " 4 -1.467002\n",
227 | "... ...\n",
228 | "1095 19 0.180490\n",
229 | " 20 -0.094058\n",
230 | " 21 0.729587\n",
231 | " 22 0.210995\n",
232 | " 23 -0.002542\n",
233 | "\n",
234 | "[26304 rows x 1 columns]"
235 | ]
236 | },
237 | "execution_count": 3,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | }
241 | ],
242 | "source": [
243 | "X"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 4,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "from sktime.datasets import load_basic_motions\n",
253 | "\n",
254 | "# load an example time series panel in pd-multiindex mtype\n",
255 | "X, _ = load_basic_motions(return_type=\"pd-multiindex\")\n",
256 | "\n",
257 | "# renaming columns for illustrative purposes\n",
258 | "X.columns = [\"accel_1\", \"accel_2\", \"accel_3\", \"gyro_1\", \"gyro_2\", \"gyro_3\"]\n",
259 | "X.index.names = [\"trial_no\", \"timepoint\"]"
260 | ]
261 | },
262 | {
263 | "attachments": {},
264 | "cell_type": "markdown",
265 | "metadata": {},
266 | "source": [
267 | "The basic motions dataset has:\n",
268 | "\n",
269 | "* 80 individual time series instances = trials = person engaging in activity (running, badminton, etc)\n",
270 | "* six variables per time series instance, `dim_0` to `dim_5`\n",
271 | " * 3 accelerometer and 3 gyrometer measurements\n",
272 | " * hence a multivariate dataset\n",
273 | "* individual time series are observed at 100 time points (the same number for all instances)"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 5,
279 | "metadata": {},
280 | "outputs": [
281 | {
282 | "data": {
283 | "text/html": [
284 | "\n",
285 | "\n",
298 | "
\n",
299 | " \n",
300 | " \n",
301 | " | \n",
302 | " | \n",
303 | " accel_1 | \n",
304 | " accel_2 | \n",
305 | " accel_3 | \n",
306 | " gyro_1 | \n",
307 | " gyro_2 | \n",
308 | " gyro_3 | \n",
309 | "
\n",
310 | " \n",
311 | " trial_no | \n",
312 | " timepoint | \n",
313 | " | \n",
314 | " | \n",
315 | " | \n",
316 | " | \n",
317 | " | \n",
318 | " | \n",
319 | "
\n",
320 | " \n",
321 | " \n",
322 | " \n",
323 | " 0 | \n",
324 | " 0 | \n",
325 | " 0.079106 | \n",
326 | " 0.394032 | \n",
327 | " 0.551444 | \n",
328 | " 0.351565 | \n",
329 | " 0.023970 | \n",
330 | " 0.633883 | \n",
331 | "
\n",
332 | " \n",
333 | " 1 | \n",
334 | " 0.079106 | \n",
335 | " 0.394032 | \n",
336 | " 0.551444 | \n",
337 | " 0.351565 | \n",
338 | " 0.023970 | \n",
339 | " 0.633883 | \n",
340 | "
\n",
341 | " \n",
342 | " 2 | \n",
343 | " -0.903497 | \n",
344 | " -3.666397 | \n",
345 | " -0.282844 | \n",
346 | " -0.095881 | \n",
347 | " -0.319605 | \n",
348 | " 0.972131 | \n",
349 | "
\n",
350 | " \n",
351 | " 3 | \n",
352 | " 1.116125 | \n",
353 | " -0.656101 | \n",
354 | " 0.333118 | \n",
355 | " 1.624657 | \n",
356 | " -0.569962 | \n",
357 | " 1.209171 | \n",
358 | "
\n",
359 | " \n",
360 | " 4 | \n",
361 | " 1.638200 | \n",
362 | " 1.405135 | \n",
363 | " 0.393875 | \n",
364 | " 1.187864 | \n",
365 | " -0.271664 | \n",
366 | " 1.739182 | \n",
367 | "
\n",
368 | " \n",
369 | " ... | \n",
370 | " ... | \n",
371 | " ... | \n",
372 | " ... | \n",
373 | " ... | \n",
374 | " ... | \n",
375 | " ... | \n",
376 | " ... | \n",
377 | "
\n",
378 | " \n",
379 | " 79 | \n",
380 | " 95 | \n",
381 | " 28.459024 | \n",
382 | " -16.633770 | \n",
383 | " 3.631869 | \n",
384 | " 8.978229 | \n",
385 | " -3.611533 | \n",
386 | " -1.491489 | \n",
387 | "
\n",
388 | " \n",
389 | " 96 | \n",
390 | " 10.260094 | \n",
391 | " 0.102775 | \n",
392 | " 1.269261 | \n",
393 | " -1.645964 | \n",
394 | " -3.377157 | \n",
395 | " 1.283746 | \n",
396 | "
\n",
397 | " \n",
398 | " 97 | \n",
399 | " 4.316471 | \n",
400 | " -3.574319 | \n",
401 | " 2.063831 | \n",
402 | " -1.717875 | \n",
403 | " -1.843054 | \n",
404 | " 0.484734 | \n",
405 | "
\n",
406 | " \n",
407 | " 98 | \n",
408 | " 0.704446 | \n",
409 | " -4.920444 | \n",
410 | " 2.851857 | \n",
411 | " -2.982977 | \n",
412 | " -0.809665 | \n",
413 | " -0.721774 | \n",
414 | "
\n",
415 | " \n",
416 | " 99 | \n",
417 | " -2.074749 | \n",
418 | " -6.892377 | \n",
419 | " 4.848379 | \n",
420 | " -1.350330 | \n",
421 | " -1.203844 | \n",
422 | " -1.776470 | \n",
423 | "
\n",
424 | " \n",
425 | "
\n",
426 | "
8000 rows × 6 columns
\n",
427 | "
"
428 | ],
429 | "text/plain": [
430 | " accel_1 accel_2 accel_3 gyro_1 gyro_2 gyro_3\n",
431 | "trial_no timepoint \n",
432 | "0 0 0.079106 0.394032 0.551444 0.351565 0.023970 0.633883\n",
433 | " 1 0.079106 0.394032 0.551444 0.351565 0.023970 0.633883\n",
434 | " 2 -0.903497 -3.666397 -0.282844 -0.095881 -0.319605 0.972131\n",
435 | " 3 1.116125 -0.656101 0.333118 1.624657 -0.569962 1.209171\n",
436 | " 4 1.638200 1.405135 0.393875 1.187864 -0.271664 1.739182\n",
437 | "... ... ... ... ... ... ...\n",
438 | "79 95 28.459024 -16.633770 3.631869 8.978229 -3.611533 -1.491489\n",
439 | " 96 10.260094 0.102775 1.269261 -1.645964 -3.377157 1.283746\n",
440 | " 97 4.316471 -3.574319 2.063831 -1.717875 -1.843054 0.484734\n",
441 | " 98 0.704446 -4.920444 2.851857 -2.982977 -0.809665 -0.721774\n",
442 | " 99 -2.074749 -6.892377 4.848379 -1.350330 -1.203844 -1.776470\n",
443 | "\n",
444 | "[8000 rows x 6 columns]"
445 | ]
446 | },
447 | "execution_count": 5,
448 | "metadata": {},
449 | "output_type": "execute_result"
450 | }
451 | ],
452 | "source": [
453 | "# The outermost index represents the instance number\n",
454 | "# whereas the inner index represents the index of the particular index\n",
455 | "# within that instance.\n",
456 | "X"
457 | ]
458 | },
459 | {
460 | "attachments": {},
461 | "cell_type": "markdown",
462 | "metadata": {},
463 | "source": [
464 | "pandas provides a simple way to access a range of value in the multi-indexed dataframe:"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 6,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/plain": [
475 | "timepoint\n",
476 | "0 0.351565\n",
477 | "1 0.351565\n",
478 | "2 -0.095881\n",
479 | "3 1.624657\n",
480 | "4 1.187864\n",
481 | " ... \n",
482 | "95 0.039951\n",
483 | "96 -0.029297\n",
484 | "97 0.000000\n",
485 | "98 0.000000\n",
486 | "99 -0.007990\n",
487 | "Name: gyro_1, Length: 100, dtype: float64"
488 | ]
489 | },
490 | "execution_count": 6,
491 | "metadata": {},
492 | "output_type": "execute_result"
493 | }
494 | ],
495 | "source": [
496 | "# Select:\n",
497 | "# * the fourth variable (gyroscope 1)\n",
498 | "# * of the first instance (trial 1 = 0 in python)\n",
499 | "# * values at all 100 timestamps\n",
500 | "#\n",
501 | "X.loc[0, \"gyro_1\"]"
502 | ]
503 | },
504 | {
505 | "attachments": {},
506 | "cell_type": "markdown",
507 | "metadata": {},
508 | "source": [
509 | "Or if you want to access the individual values:"
510 | ]
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": 7,
515 | "metadata": {},
516 | "outputs": [
517 | {
518 | "data": {
519 | "text/plain": [
520 | "-1.27952"
521 | ]
522 | },
523 | "execution_count": 7,
524 | "metadata": {},
525 | "output_type": "execute_result"
526 | }
527 | ],
528 | "source": [
529 | "# Select:\n",
530 | "# * the fifth time time point (5 = 4 in python)\n",
531 | "# * the third variable (accelerometer 3)\n",
532 | "# * of the fourty-third instance (trial 43 = 42 in python)\n",
533 | "\n",
534 | "X.loc[(42, 4), \"accel_3\"]"
535 | ]
536 | },
537 | {
538 | "attachments": {},
539 | "cell_type": "markdown",
540 | "metadata": {},
541 | "source": [
542 | "### 5.1.2 preferred format 2 - `numpy3D` specification"
543 | ]
544 | },
545 | {
546 | "attachments": {},
547 | "cell_type": "markdown",
548 | "metadata": {},
549 | "source": [
550 | "`numpy3D` = 3D `np.ndarray` with index (instance, variable, time)\n",
551 | "\n",
552 | "instance/time index is interpreted as integer\n",
553 | "\n",
554 | "IMPORTANT: unlike `pd-multiindex`, this assumes:\n",
555 | "\n",
556 | "* all individual series have the same length\n",
557 | "* all individual series have the same index"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 8,
563 | "metadata": {},
564 | "outputs": [],
565 | "source": [
566 | "from sktime.datasets import load_basic_motions\n",
567 | "\n",
568 | "# load an example time series panel in numpy mtype\n",
569 | "X, _ = load_basic_motions(return_type=\"numpy3D\")"
570 | ]
571 | },
572 | {
573 | "attachments": {},
574 | "cell_type": "markdown",
575 | "metadata": {},
576 | "source": [
577 | "The Italy power demand dataset has:\n",
578 | "\n",
579 | "* 1096 individual time series instances = single days of total power demand (mean subtracted)\n",
580 | "* one single variable per time series instances, unnamed in numpy\n",
581 | "* individual time series are observed at 24 time (period) points (the same number for all instances)"
582 | ]
583 | },
584 | {
585 | "cell_type": "code",
586 | "execution_count": 9,
587 | "metadata": {},
588 | "outputs": [
589 | {
590 | "data": {
591 | "text/plain": [
592 | "(80, 6, 100)"
593 | ]
594 | },
595 | "execution_count": 9,
596 | "metadata": {},
597 | "output_type": "execute_result"
598 | }
599 | ],
600 | "source": [
601 | "# (num_instances, num_variables, length)\n",
602 | "X.shape"
603 | ]
604 | },
605 | {
606 | "cell_type": "code",
607 | "execution_count": 10,
608 | "metadata": {},
609 | "outputs": [],
610 | "source": [
611 | "from sktime.datasets import load_basic_motions\n",
612 | "\n",
613 | "# load an example time series panel in numpy mtype\n",
614 | "X, _ = load_basic_motions(return_type=\"numpy3D\")"
615 | ]
616 | },
617 | {
618 | "attachments": {},
619 | "cell_type": "markdown",
620 | "metadata": {},
621 | "source": [
622 | "The basic motions dataset has:\n",
623 | "\n",
624 | "* 80 individual time series instances = trials = person engaging in activity (running, badminton, etc)\n",
625 | "* six variables per time series instance, unnamed in numpy\n",
626 | "* individual time series are observed at 100 time points (the same number for all instances)"
627 | ]
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": 11,
632 | "metadata": {},
633 | "outputs": [
634 | {
635 | "data": {
636 | "text/plain": [
637 | "(80, 6, 100)"
638 | ]
639 | },
640 | "execution_count": 11,
641 | "metadata": {},
642 | "output_type": "execute_result"
643 | }
644 | ],
645 | "source": [
646 | "X.shape"
647 | ]
648 | },
649 | {
650 | "attachments": {},
651 | "cell_type": "markdown",
652 | "metadata": {},
653 | "source": [
654 | "## 5.2 Time Series Classification, Regression, Clustering - Basic Vignettes\n",
655 | "\n",
656 | "Above tasks are very similar to \"tabular\" classification, regression, clustering, as in `sklearn`\n",
657 | "\n",
658 | "Main distinction:\n",
659 | "* in \"tabular\" classification etc, one (feature) instance row vector of features\n",
660 | "* in TSC, one (feature) instance is a full time series, possibly unequal length, distinct index set"
661 | ]
662 | },
663 | {
664 | "attachments": {},
665 | "cell_type": "markdown",
666 | "metadata": {},
667 | "source": [
668 | ""
669 | ]
670 | },
671 | {
672 | "attachments": {},
673 | "cell_type": "markdown",
674 | "metadata": {},
675 | "source": [
676 | "\n",
677 | "More formally:\n",
678 | "\n",
679 | "* \"tabular\" classification:\n",
680 | " * training pairs $(x_1, y_1), \\dots, (x_n, y_n)$\n",
681 | " * where $x_i$ are rows of a `pd.DataFrame` (same col types)\n",
682 | " * and $y_i \\in \\mathcal{C}$ for a finite set $\\mathcal{C}$\n",
683 | " * used to train a classifier that\n",
684 | " * for a new `pd.DataFrame` row $x_*$\n",
685 | " * predicts $y_* \\in \\mathcal{C}$"
686 | ]
687 | },
688 | {
689 | "cell_type": "markdown",
690 | "metadata": {},
691 | "source": [
692 | "\n",
693 | "* time series classification:\n",
694 | " * training pairs $(x_1, y_1), \\dots, (x_n, y_n)$\n",
695 | " * where $x_i$ are time series instaces, from a certain domain\n",
696 | " * and $y_i \\in \\mathcal{C}$ for a finite set $\\mathcal{C}$\n",
697 | " * used to train a classifier that\n",
698 | " * for a new time series instance $x_*$\n",
699 | " * predicts $y_* \\in \\mathcal{C}$"
700 | ]
701 | },
702 | {
703 | "attachments": {},
704 | "cell_type": "markdown",
705 | "metadata": {},
706 | "source": [
707 | "very similar for time series regression, clustering - exercise left to reader :-)"
708 | ]
709 | },
710 | {
711 | "attachments": {},
712 | "cell_type": "markdown",
713 | "metadata": {},
714 | "source": [
715 | "`sktime` design implications:\n",
716 | "\n",
717 | "* need representation of collections of time series (panels), see Section 5.1\n",
718 | " * same as in \"adjacent\" learning tasks, e.g., panel forecasting\n",
719 | " * same as for transformation estimators\n",
720 | "* algorithms that use sequentiality, can deal with unequal length, missing values etc \n",
721 | "* algorithms usually based on distances or kernels between time series - need to cover that in framework\n",
722 | "* but we can use familiar `fit` / `predict` and `scikit-learn` / `scikit-base` interface!"
723 | ]
724 | },
725 | {
726 | "attachments": {},
727 | "cell_type": "markdown",
728 | "metadata": {},
729 | "source": [
730 | "### 5.2.3 Time Series Classification - deployment vignette"
731 | ]
732 | },
733 | {
734 | "attachments": {},
735 | "cell_type": "markdown",
736 | "metadata": {},
737 | "source": [
738 | "Basic deployment vignette for TSC:\n",
739 | "\n",
740 | "1. load/setup training data, `X` in a `Panel` (more specifically `numpy3D`) format, `y` as 1D `np.ndarray`\n",
741 | "2. load/setup new data for prediction (can be done after 2 too)\n",
742 | "3. specify the classifier using `sklearn`-like syntax\n",
743 | "4. fit classifier to training data, `fit(X, y)`\n",
744 | "5. predict labels on new data, `predict(X_new)`"
745 | ]
746 | },
747 | {
748 | "cell_type": "code",
749 | "execution_count": 12,
750 | "metadata": {},
751 | "outputs": [],
752 | "source": [
753 | "# steps 1, 2 - prepare osuleaf dataset (train and new)\n",
754 | "from sktime.datasets import load_italy_power_demand\n",
755 | "\n",
756 | "X_train, y_train = load_italy_power_demand(split=\"train\", return_type=\"numpy3D\")\n",
757 | "X_new, _ = load_italy_power_demand(split=\"test\", return_type=\"numpy3D\")"
758 | ]
759 | },
760 | {
761 | "cell_type": "code",
762 | "execution_count": 13,
763 | "metadata": {},
764 | "outputs": [
765 | {
766 | "data": {
767 | "text/plain": [
768 | "(67, 1, 24)"
769 | ]
770 | },
771 | "execution_count": 13,
772 | "metadata": {},
773 | "output_type": "execute_result"
774 | }
775 | ],
776 | "source": [
777 | "# this is in numpy3D format, but could also be pd-multiindex or other\n",
778 | "X_train.shape"
779 | ]
780 | },
781 | {
782 | "cell_type": "code",
783 | "execution_count": 14,
784 | "metadata": {},
785 | "outputs": [
786 | {
787 | "data": {
788 | "text/plain": [
789 | "(67,)"
790 | ]
791 | },
792 | "execution_count": 14,
793 | "metadata": {},
794 | "output_type": "execute_result"
795 | }
796 | ],
797 | "source": [
798 | "# y is a 1D np.ndarray of labels - same length as number of instances in X_train\n",
799 | "y_train.shape"
800 | ]
801 | },
802 | {
803 | "cell_type": "code",
804 | "execution_count": 15,
805 | "metadata": {},
806 | "outputs": [],
807 | "source": [
808 | "# step 3 - specify the classifier\n",
809 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
810 | "\n",
811 | "# example 1 - 3-NN with simple dynamic time warping distance (requires numba)\n",
812 | "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3)\n",
813 | "\n",
814 | "# example 2 - custom distance:\n",
815 | "# 3-nearest neighbour classifier with Euclidean distance (on flattened time series)\n",
816 | "# (requires scipy)\n",
817 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
818 | "from sktime.dists_kernels.compose_tab_to_panel import FlatDist\n",
819 | "from sktime.dists_kernels import ScipyDist\n",
820 | "\n",
821 | "eucl_dist = FlatDist(ScipyDist())\n",
822 | "clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)"
823 | ]
824 | },
825 | {
826 | "attachments": {},
827 | "cell_type": "markdown",
828 | "metadata": {},
829 | "source": [
830 | "we could specify any `sktime` classifier here - the rest remains the same!"
831 | ]
832 | },
833 | {
834 | "cell_type": "code",
835 | "execution_count": 16,
836 | "metadata": {},
837 | "outputs": [
838 | {
839 | "data": {
840 | "text/plain": [
841 | "{'algorithm': 'brute',\n",
842 | " 'distance': FlatDist(transformer=ScipyDist()),\n",
843 | " 'distance_mtype': None,\n",
844 | " 'distance_params': None,\n",
845 | " 'leaf_size': 30,\n",
846 | " 'n_jobs': None,\n",
847 | " 'n_neighbors': 3,\n",
848 | " 'pass_train_distances': False,\n",
849 | " 'weights': 'uniform',\n",
850 | " 'distance__transformer': ScipyDist(),\n",
851 | " 'distance__transformer__colalign': 'intersect',\n",
852 | " 'distance__transformer__metric': 'euclidean',\n",
853 | " 'distance__transformer__metric_kwargs': None,\n",
854 | " 'distance__transformer__p': 2,\n",
855 | " 'distance__transformer__var_weights': None}"
856 | ]
857 | },
858 | "execution_count": 16,
859 | "metadata": {},
860 | "output_type": "execute_result"
861 | }
862 | ],
863 | "source": [
864 | "# all classifiers is scikit-learn / scikit-base compatible!\n",
865 | "# nested parameter interface via get_params, set_params\n",
866 | "clf.get_params()"
867 | ]
868 | },
869 | {
870 | "cell_type": "code",
871 | "execution_count": 17,
872 | "metadata": {},
873 | "outputs": [
874 | {
875 | "data": {
876 | "text/html": [
877 | "KNeighborsTimeSeriesClassifier(distance=FlatDist(transformer=ScipyDist()),\n",
878 | " n_neighbors=3)
Please rerun this cell to show the HTML repr or trust the notebook. "
880 | ],
881 | "text/plain": [
882 | "KNeighborsTimeSeriesClassifier(distance=FlatDist(transformer=ScipyDist()),\n",
883 | " n_neighbors=3)"
884 | ]
885 | },
886 | "execution_count": 17,
887 | "metadata": {},
888 | "output_type": "execute_result"
889 | }
890 | ],
891 | "source": [
892 | "# step 4 - fit/train the classifier\n",
893 | "clf.fit(X_train, y_train)"
894 | ]
895 | },
896 | {
897 | "cell_type": "code",
898 | "execution_count": 18,
899 | "metadata": {},
900 | "outputs": [
901 | {
902 | "data": {
903 | "text/plain": [
904 | "True"
905 | ]
906 | },
907 | "execution_count": 18,
908 | "metadata": {},
909 | "output_type": "execute_result"
910 | }
911 | ],
912 | "source": [
913 | "# the classifier is now fitted\n",
914 | "clf.is_fitted"
915 | ]
916 | },
917 | {
918 | "cell_type": "code",
919 | "execution_count": 19,
920 | "metadata": {},
921 | "outputs": [
922 | {
923 | "data": {
924 | "text/plain": [
925 | "{'classes': array(['1', '2'], dtype='\n",
1289 | "\n",
1302 | "\n",
1303 | " \n",
1304 | " \n",
1305 | " | \n",
1306 | " name | \n",
1307 | " scitype | \n",
1308 | " type | \n",
1309 | " description | \n",
1310 | "
\n",
1311 | " \n",
1312 | " \n",
1313 | " \n",
1314 | " 0 | \n",
1315 | " capability:contractable | \n",
1316 | " classifier | \n",
1317 | " bool | \n",
1318 | " contract time setting, does the estimator supp... | \n",
1319 | "
\n",
1320 | " \n",
1321 | " 1 | \n",
1322 | " capability:missing_values | \n",
1323 | " [classifier, early_classifier, param_est, regr... | \n",
1324 | " bool | \n",
1325 | " can the classifier handle missing data (NA, np... | \n",
1326 | "
\n",
1327 | " \n",
1328 | " 2 | \n",
1329 | " capability:multithreading | \n",
1330 | " [classifier, early_classifier] | \n",
1331 | " bool | \n",
1332 | " can the classifier set n_jobs to use multiple ... | \n",
1333 | "
\n",
1334 | " \n",
1335 | " 3 | \n",
1336 | " capability:multivariate | \n",
1337 | " [classifier, early_classifier, param_est, regr... | \n",
1338 | " bool | \n",
1339 | " can the classifier classify time series with 2... | \n",
1340 | "
\n",
1341 | " \n",
1342 | " 4 | \n",
1343 | " capability:predict_proba | \n",
1344 | " classifier | \n",
1345 | " bool | \n",
1346 | " does the classifier implement a non-default pr... | \n",
1347 | "
\n",
1348 | " \n",
1349 | " 5 | \n",
1350 | " capability:train_estimate | \n",
1351 | " classifier | \n",
1352 | " bool | \n",
1353 | " can the classifier estimate its performance on... | \n",
1354 | "
\n",
1355 | " \n",
1356 | " 6 | \n",
1357 | " capability:unequal_length | \n",
1358 | " [classifier, early_classifier, regressor, tran... | \n",
1359 | " bool | \n",
1360 | " can the estimator handle unequal length time s... | \n",
1361 | "
\n",
1362 | " \n",
1363 | " 7 | \n",
1364 | " classifier_type | \n",
1365 | " classifier | \n",
1366 | " (list, [dictionary, distance, feature, hybrid,... | \n",
1367 | " which type the classifier falls under in the t... | \n",
1368 | "
\n",
1369 | " \n",
1370 | "
\n",
1371 | ""
1372 | ],
1373 | "text/plain": [
1374 | " name scitype type description\n",
1375 | "0 capability:contractable classifier bool contract time setting, does the estimator supp...\n",
1376 | "1 capability:missing_values [classifier, early_classifier, param_est, regr... bool can the classifier handle missing data (NA, np...\n",
1377 | "2 capability:multithreading [classifier, early_classifier] bool can the classifier set n_jobs to use multiple ...\n",
1378 | "3 capability:multivariate [classifier, early_classifier, param_est, regr... bool can the classifier classify time series with 2...\n",
1379 | "4 capability:predict_proba classifier bool does the classifier implement a non-default pr...\n",
1380 | "5 capability:train_estimate classifier bool can the classifier estimate its performance on...\n",
1381 | "6 capability:unequal_length [classifier, early_classifier, regressor, tran... bool can the estimator handle unequal length time s...\n",
1382 | "7 classifier_type classifier (list, [dictionary, distance, feature, hybrid,... which type the classifier falls under in the t..."
1383 | ]
1384 | },
1385 | "execution_count": 28,
1386 | "metadata": {},
1387 | "output_type": "execute_result"
1388 | }
1389 | ],
1390 | "source": [
1391 | "from sktime.registry import all_tags\n",
1392 | "\n",
1393 | "all_tags(\"classifier\", as_dataframe=True)"
1394 | ]
1395 | },
1396 | {
1397 | "attachments": {},
1398 | "cell_type": "markdown",
1399 | "metadata": {},
1400 | "source": [
1401 | "valid estimator types are listed in the `all_tags` docstring, or `sktime.registry.BASE_CLASS_REGISTER`"
1402 | ]
1403 | },
1404 | {
1405 | "cell_type": "code",
1406 | "execution_count": 29,
1407 | "metadata": {},
1408 | "outputs": [
1409 | {
1410 | "data": {
1411 | "text/plain": [
1412 | "('object',\n",
1413 | " 'estimator',\n",
1414 | " 'aligner',\n",
1415 | " 'classifier',\n",
1416 | " 'clusterer',\n",
1417 | " 'early_classifier',\n",
1418 | " 'forecaster',\n",
1419 | " 'metric',\n",
1420 | " 'network',\n",
1421 | " 'param_est',\n",
1422 | " 'regressor',\n",
1423 | " 'series-annotator',\n",
1424 | " 'splitter',\n",
1425 | " 'transformer',\n",
1426 | " 'transformer-pairwise',\n",
1427 | " 'transformer-pairwise-panel',\n",
1428 | " 'distribution')"
1429 | ]
1430 | },
1431 | "execution_count": 29,
1432 | "metadata": {},
1433 | "output_type": "execute_result"
1434 | }
1435 | ],
1436 | "source": [
1437 | "from sktime.registry import BASE_CLASS_REGISTER\n",
1438 | "\n",
1439 | "# get only fist table column, the list of types\n",
1440 | "list(zip(*BASE_CLASS_REGISTER))[0]"
1441 | ]
1442 | },
1443 | {
1444 | "attachments": {},
1445 | "cell_type": "markdown",
1446 | "metadata": {},
1447 | "source": [
1448 | "to find all estimators of a certain type, use `sktime.registry.all_estimators`"
1449 | ]
1450 | },
1451 | {
1452 | "cell_type": "code",
1453 | "execution_count": 30,
1454 | "metadata": {},
1455 | "outputs": [
1456 | {
1457 | "data": {
1458 | "text/html": [
1459 | "\n",
1460 | "\n",
1473 | "
\n",
1474 | " \n",
1475 | " \n",
1476 | " | \n",
1477 | " name | \n",
1478 | " object | \n",
1479 | "
\n",
1480 | " \n",
1481 | " \n",
1482 | " \n",
1483 | " 0 | \n",
1484 | " Arsenal | \n",
1485 | " <class 'sktime.classification.kernel_based._ar... | \n",
1486 | "
\n",
1487 | " \n",
1488 | " 1 | \n",
1489 | " BOSSEnsemble | \n",
1490 | " <class 'sktime.classification.dictionary_based... | \n",
1491 | "
\n",
1492 | " \n",
1493 | " 2 | \n",
1494 | " BaggingClassifier | \n",
1495 | " <class 'sktime.classification.ensemble._baggin... | \n",
1496 | "
\n",
1497 | " \n",
1498 | " 3 | \n",
1499 | " CNNClassifier | \n",
1500 | " <class 'sktime.classification.deep_learning.cn... | \n",
1501 | "
\n",
1502 | " \n",
1503 | " 4 | \n",
1504 | " CanonicalIntervalForest | \n",
1505 | " <class 'sktime.classification.interval_based._... | \n",
1506 | "
\n",
1507 | " \n",
1508 | " 5 | \n",
1509 | " Catch22Classifier | \n",
1510 | " <class 'sktime.classification.feature_based._c... | \n",
1511 | "
\n",
1512 | " \n",
1513 | " 6 | \n",
1514 | " ClassifierPipeline | \n",
1515 | " <class 'sktime.classification.compose._pipelin... | \n",
1516 | "
\n",
1517 | " \n",
1518 | " 7 | \n",
1519 | " ColumnEnsembleClassifier | \n",
1520 | " <class 'sktime.classification.compose._column_... | \n",
1521 | "
\n",
1522 | " \n",
1523 | " 8 | \n",
1524 | " ComposableTimeSeriesForestClassifier | \n",
1525 | " <class 'sktime.classification.ensemble._ctsf.C... | \n",
1526 | "
\n",
1527 | " \n",
1528 | " 9 | \n",
1529 | " ContractableBOSS | \n",
1530 | " <class 'sktime.classification.dictionary_based... | \n",
1531 | "
\n",
1532 | " \n",
1533 | " 10 | \n",
1534 | " DrCIF | \n",
1535 | " <class 'sktime.classification.interval_based._... | \n",
1536 | "
\n",
1537 | " \n",
1538 | " 11 | \n",
1539 | " DummyClassifier | \n",
1540 | " <class 'sktime.classification.dummy._dummy.Dum... | \n",
1541 | "
\n",
1542 | " \n",
1543 | " 12 | \n",
1544 | " ElasticEnsemble | \n",
1545 | " <class 'sktime.classification.distance_based._... | \n",
1546 | "
\n",
1547 | " \n",
1548 | " 13 | \n",
1549 | " FCNClassifier | \n",
1550 | " <class 'sktime.classification.deep_learning.fc... | \n",
1551 | "
\n",
1552 | " \n",
1553 | " 14 | \n",
1554 | " FreshPRINCE | \n",
1555 | " <class 'sktime.classification.feature_based._f... | \n",
1556 | "
\n",
1557 | " \n",
1558 | " 15 | \n",
1559 | " HIVECOTEV1 | \n",
1560 | " <class 'sktime.classification.hybrid._hivecote... | \n",
1561 | "
\n",
1562 | " \n",
1563 | " 16 | \n",
1564 | " HIVECOTEV2 | \n",
1565 | " <class 'sktime.classification.hybrid._hivecote... | \n",
1566 | "
\n",
1567 | " \n",
1568 | " 17 | \n",
1569 | " InceptionTimeClassifier | \n",
1570 | " <class 'sktime.classification.deep_learning.in... | \n",
1571 | "
\n",
1572 | " \n",
1573 | " 18 | \n",
1574 | " IndividualBOSS | \n",
1575 | " <class 'sktime.classification.dictionary_based... | \n",
1576 | "
\n",
1577 | " \n",
1578 | " 19 | \n",
1579 | " IndividualTDE | \n",
1580 | " <class 'sktime.classification.dictionary_based... | \n",
1581 | "
\n",
1582 | " \n",
1583 | " 20 | \n",
1584 | " KNeighborsTimeSeriesClassifier | \n",
1585 | " <class 'sktime.classification.distance_based._... | \n",
1586 | "
\n",
1587 | " \n",
1588 | " 21 | \n",
1589 | " LSTMFCNClassifier | \n",
1590 | " <class 'sktime.classification.deep_learning.ls... | \n",
1591 | "
\n",
1592 | " \n",
1593 | " 22 | \n",
1594 | " MACNNClassifier | \n",
1595 | " <class 'sktime.classification.deep_learning.ma... | \n",
1596 | "
\n",
1597 | " \n",
1598 | " 23 | \n",
1599 | " MLPClassifier | \n",
1600 | " <class 'sktime.classification.deep_learning.ml... | \n",
1601 | "
\n",
1602 | " \n",
1603 | " 24 | \n",
1604 | " MUSE | \n",
1605 | " <class 'sktime.classification.dictionary_based... | \n",
1606 | "
\n",
1607 | " \n",
1608 | " 25 | \n",
1609 | " MatrixProfileClassifier | \n",
1610 | " <class 'sktime.classification.feature_based._m... | \n",
1611 | "
\n",
1612 | " \n",
1613 | " 26 | \n",
1614 | " MrSQM | \n",
1615 | " <class 'sktime.classification.shapelet_based._... | \n",
1616 | "
\n",
1617 | " \n",
1618 | " 27 | \n",
1619 | " ProbabilityThresholdEarlyClassifier | \n",
1620 | " <class 'sktime.classification.early_classifica... | \n",
1621 | "
\n",
1622 | " \n",
1623 | " 28 | \n",
1624 | " ProximityForest | \n",
1625 | " <class 'sktime.classification.distance_based._... | \n",
1626 | "
\n",
1627 | " \n",
1628 | " 29 | \n",
1629 | " ProximityStump | \n",
1630 | " <class 'sktime.classification.distance_based._... | \n",
1631 | "
\n",
1632 | " \n",
1633 | " 30 | \n",
1634 | " ProximityTree | \n",
1635 | " <class 'sktime.classification.distance_based._... | \n",
1636 | "
\n",
1637 | " \n",
1638 | " 31 | \n",
1639 | " RandomIntervalClassifier | \n",
1640 | " <class 'sktime.classification.feature_based._r... | \n",
1641 | "
\n",
1642 | " \n",
1643 | " 32 | \n",
1644 | " RandomIntervalSpectralEnsemble | \n",
1645 | " <class 'sktime.classification.interval_based._... | \n",
1646 | "
\n",
1647 | " \n",
1648 | " 33 | \n",
1649 | " ResNetClassifier | \n",
1650 | " <class 'sktime.classification.deep_learning.re... | \n",
1651 | "
\n",
1652 | " \n",
1653 | " 34 | \n",
1654 | " RocketClassifier | \n",
1655 | " <class 'sktime.classification.kernel_based._ro... | \n",
1656 | "
\n",
1657 | " \n",
1658 | " 35 | \n",
1659 | " ShapeDTW | \n",
1660 | " <class 'sktime.classification.distance_based._... | \n",
1661 | "
\n",
1662 | " \n",
1663 | " 36 | \n",
1664 | " ShapeletTransformClassifier | \n",
1665 | " <class 'sktime.classification.shapelet_based._... | \n",
1666 | "
\n",
1667 | " \n",
1668 | " 37 | \n",
1669 | " SignatureClassifier | \n",
1670 | " <class 'sktime.classification.feature_based._s... | \n",
1671 | "
\n",
1672 | " \n",
1673 | " 38 | \n",
1674 | " SimpleRNNClassifier | \n",
1675 | " <class 'sktime.classification.deep_learning.rn... | \n",
1676 | "
\n",
1677 | " \n",
1678 | " 39 | \n",
1679 | " SklearnClassifierPipeline | \n",
1680 | " <class 'sktime.classification.compose._pipelin... | \n",
1681 | "
\n",
1682 | " \n",
1683 | " 40 | \n",
1684 | " SummaryClassifier | \n",
1685 | " <class 'sktime.classification.feature_based._s... | \n",
1686 | "
\n",
1687 | " \n",
1688 | " 41 | \n",
1689 | " SupervisedTimeSeriesForest | \n",
1690 | " <class 'sktime.classification.interval_based._... | \n",
1691 | "
\n",
1692 | " \n",
1693 | " 42 | \n",
1694 | " TSFreshClassifier | \n",
1695 | " <class 'sktime.classification.feature_based._t... | \n",
1696 | "
\n",
1697 | " \n",
1698 | " 43 | \n",
1699 | " TapNetClassifier | \n",
1700 | " <class 'sktime.classification.deep_learning.ta... | \n",
1701 | "
\n",
1702 | " \n",
1703 | " 44 | \n",
1704 | " TemporalDictionaryEnsemble | \n",
1705 | " <class 'sktime.classification.dictionary_based... | \n",
1706 | "
\n",
1707 | " \n",
1708 | " 45 | \n",
1709 | " TimeSeriesForestClassifier | \n",
1710 | " <class 'sktime.classification.interval_based._... | \n",
1711 | "
\n",
1712 | " \n",
1713 | " 46 | \n",
1714 | " TimeSeriesSVC | \n",
1715 | " <class 'sktime.classification.kernel_based._sv... | \n",
1716 | "
\n",
1717 | " \n",
1718 | " 47 | \n",
1719 | " WEASEL | \n",
1720 | " <class 'sktime.classification.dictionary_based... | \n",
1721 | "
\n",
1722 | " \n",
1723 | " 48 | \n",
1724 | " WeightedEnsembleClassifier | \n",
1725 | " <class 'sktime.classification.ensemble._weight... | \n",
1726 | "
\n",
1727 | " \n",
1728 | "
\n",
1729 | "
"
1730 | ],
1731 | "text/plain": [
1732 | " name object\n",
1733 | "0 Arsenal \n",
1814 | "\n",
1827 | "\n",
1828 | " \n",
1829 | " \n",
1830 | " | \n",
1831 | " name | \n",
1832 | " object | \n",
1833 | "
\n",
1834 | " \n",
1835 | " \n",
1836 | " \n",
1837 | " 0 | \n",
1838 | " BaggingClassifier | \n",
1839 | " <class 'sktime.classification.ensemble._baggin... | \n",
1840 | "
\n",
1841 | " \n",
1842 | " 1 | \n",
1843 | " DummyClassifier | \n",
1844 | " <class 'sktime.classification.dummy._dummy.Dum... | \n",
1845 | "
\n",
1846 | " \n",
1847 | " 2 | \n",
1848 | " KNeighborsTimeSeriesClassifier | \n",
1849 | " <class 'sktime.classification.distance_based._... | \n",
1850 | "
\n",
1851 | " \n",
1852 | " 3 | \n",
1853 | " SklearnClassifierPipeline | \n",
1854 | " <class 'sktime.classification.compose._pipelin... | \n",
1855 | "
\n",
1856 | " \n",
1857 | " 4 | \n",
1858 | " TimeSeriesSVC | \n",
1859 | " <class 'sktime.classification.kernel_based._sv... | \n",
1860 | "
\n",
1861 | " \n",
1862 | " 5 | \n",
1863 | " WeightedEnsembleClassifier | \n",
1864 | " <class 'sktime.classification.ensemble._weight... | \n",
1865 | "
\n",
1866 | " \n",
1867 | "
\n",
1868 | ""
1869 | ],
1870 | "text/plain": [
1871 | " name object\n",
1872 | "0 BaggingClassifier \n",
1957 | "\n",
1970 | "\n",
1971 | " \n",
1972 | " \n",
1973 | " | \n",
1974 | " | \n",
1975 | " dim_0 | \n",
1976 | "
\n",
1977 | " \n",
1978 | " | \n",
1979 | " timepoints | \n",
1980 | " | \n",
1981 | "
\n",
1982 | " \n",
1983 | " \n",
1984 | " \n",
1985 | " 0 | \n",
1986 | " 0 | \n",
1987 | " 0.267711 | \n",
1988 | "
\n",
1989 | " \n",
1990 | " 1 | \n",
1991 | " -0.290155 | \n",
1992 | "
\n",
1993 | " \n",
1994 | " 2 | \n",
1995 | " -0.564339 | \n",
1996 | "
\n",
1997 | " \n",
1998 | " 3 | \n",
1999 | " -0.870044 | \n",
2000 | "
\n",
2001 | " \n",
2002 | " 4 | \n",
2003 | " -0.829027 | \n",
2004 | "
\n",
2005 | " \n",
2006 | " ... | \n",
2007 | " ... | \n",
2008 | " ... | \n",
2009 | "
\n",
2010 | " \n",
2011 | " 1095 | \n",
2012 | " 19 | \n",
2013 | " -0.425904 | \n",
2014 | "
\n",
2015 | " \n",
2016 | " 20 | \n",
2017 | " -0.781304 | \n",
2018 | "
\n",
2019 | " \n",
2020 | " 21 | \n",
2021 | " -0.038512 | \n",
2022 | "
\n",
2023 | " \n",
2024 | " 22 | \n",
2025 | " -0.637956 | \n",
2026 | "
\n",
2027 | " \n",
2028 | " 23 | \n",
2029 | " -0.932346 | \n",
2030 | "
\n",
2031 | " \n",
2032 | "
\n",
2033 | "26304 rows × 1 columns
\n",
2034 | ""
2035 | ],
2036 | "text/plain": [
2037 | " dim_0\n",
2038 | " timepoints \n",
2039 | "0 0 0.267711\n",
2040 | " 1 -0.290155\n",
2041 | " 2 -0.564339\n",
2042 | " 3 -0.870044\n",
2043 | " 4 -0.829027\n",
2044 | "... ...\n",
2045 | "1095 19 -0.425904\n",
2046 | " 20 -0.781304\n",
2047 | " 21 -0.038512\n",
2048 | " 22 -0.637956\n",
2049 | " 23 -0.932346\n",
2050 | "\n",
2051 | "[26304 rows x 1 columns]"
2052 | ]
2053 | },
2054 | "execution_count": 32,
2055 | "metadata": {},
2056 | "output_type": "execute_result"
2057 | }
2058 | ],
2059 | "source": [
2060 | "from sktime.datasets import load_italy_power_demand\n",
2061 | "from sktime.transformations.series.detrend import Detrender\n",
2062 | "\n",
2063 | "# load some panel data\n",
2064 | "X, _ = load_italy_power_demand(return_type=\"pd-multiindex\")\n",
2065 | "\n",
2066 | "# specify a linear detrender\n",
2067 | "detrender = Detrender()\n",
2068 | "\n",
2069 | "# detrend X by removing linear trend from each instance\n",
2070 | "X_detrended = detrender.fit_transform(X)\n",
2071 | "X_detrended"
2072 | ]
2073 | },
2074 | {
2075 | "attachments": {},
2076 | "cell_type": "markdown",
2077 | "metadata": {},
2078 | "source": [
2079 | "for panel tasks such as TSC, TSR, clustering, there are two distinctions to be aware of:\n",
2080 | "\n",
2081 | "* series-to-series transformers transform individual series to series, panels to panels. E.g., instance-wise detrender above\n",
2082 | "* series-to-primitive transformers transform individual series to a set of tabular features. E>g., summary feature extractor\n",
2083 | "\n",
2084 | "either type of transform can be instance-wise:\n",
2085 | "\n",
2086 | "* instance-wise transforms use only the i-th series to transform the i-th series. E.g., instance-wise detrender\n",
2087 | "* non-instance-wise transforms train on all series to transform the i-th series. E.g., PCA, overall mean detrender"
2088 | ]
2089 | },
2090 | {
2091 | "cell_type": "code",
2092 | "execution_count": 33,
2093 | "metadata": {},
2094 | "outputs": [
2095 | {
2096 | "data": {
2097 | "text/html": [
2098 | "\n",
2099 | "\n",
2112 | "
\n",
2113 | " \n",
2114 | " \n",
2115 | " | \n",
2116 | " mean | \n",
2117 | " std | \n",
2118 | " min | \n",
2119 | " max | \n",
2120 | " 0.1 | \n",
2121 | " 0.25 | \n",
2122 | " 0.5 | \n",
2123 | " 0.75 | \n",
2124 | " 0.9 | \n",
2125 | "
\n",
2126 | " \n",
2127 | " \n",
2128 | " \n",
2129 | " 0 | \n",
2130 | " -1.041667e-09 | \n",
2131 | " 1.0 | \n",
2132 | " -1.593083 | \n",
2133 | " 1.464375 | \n",
2134 | " -1.372442 | \n",
2135 | " -0.805078 | \n",
2136 | " 0.030207 | \n",
2137 | " 0.936412 | \n",
2138 | " 1.218518 | \n",
2139 | "
\n",
2140 | " \n",
2141 | " 1 | \n",
2142 | " -1.958333e-09 | \n",
2143 | " 1.0 | \n",
2144 | " -1.630917 | \n",
2145 | " 1.201393 | \n",
2146 | " -1.533955 | \n",
2147 | " -0.999388 | \n",
2148 | " 0.384871 | \n",
2149 | " 0.735720 | \n",
2150 | " 1.084018 | \n",
2151 | "
\n",
2152 | " \n",
2153 | " 2 | \n",
2154 | " -1.775000e-09 | \n",
2155 | " 1.0 | \n",
2156 | " -1.397118 | \n",
2157 | " 2.349344 | \n",
2158 | " -1.003740 | \n",
2159 | " -0.741487 | \n",
2160 | " -0.132687 | \n",
2161 | " 0.265374 | \n",
2162 | " 1.515756 | \n",
2163 | "
\n",
2164 | " \n",
2165 | " 3 | \n",
2166 | " -8.541667e-10 | \n",
2167 | " 1.0 | \n",
2168 | " -1.646458 | \n",
2169 | " 1.344487 | \n",
2170 | " -1.476779 | \n",
2171 | " -0.898722 | \n",
2172 | " 0.266022 | \n",
2173 | " 0.776495 | \n",
2174 | " 1.039641 | \n",
2175 | "
\n",
2176 | " \n",
2177 | " 4 | \n",
2178 | " -3.416667e-09 | \n",
2179 | " 1.0 | \n",
2180 | " -1.620240 | \n",
2181 | " 1.303502 | \n",
2182 | " -1.511644 | \n",
2183 | " -0.978061 | \n",
2184 | " 0.405495 | \n",
2185 | " 0.692648 | \n",
2186 | " 1.061249 | \n",
2187 | "
\n",
2188 | " \n",
2189 | " ... | \n",
2190 | " ... | \n",
2191 | " ... | \n",
2192 | " ... | \n",
2193 | " ... | \n",
2194 | " ... | \n",
2195 | " ... | \n",
2196 | " ... | \n",
2197 | " ... | \n",
2198 | " ... | \n",
2199 | "
\n",
2200 | " \n",
2201 | " 1091 | \n",
2202 | " -1.041667e-09 | \n",
2203 | " 1.0 | \n",
2204 | " -1.817799 | \n",
2205 | " 1.630397 | \n",
2206 | " -1.323058 | \n",
2207 | " -0.643414 | \n",
2208 | " 0.081208 | \n",
2209 | " 0.568453 | \n",
2210 | " 1.390523 | \n",
2211 | "
\n",
2212 | " \n",
2213 | " 1092 | \n",
2214 | " -4.166666e-10 | \n",
2215 | " 1.0 | \n",
2216 | " -1.550077 | \n",
2217 | " 1.513605 | \n",
2218 | " -1.343747 | \n",
2219 | " -0.768526 | \n",
2220 | " 0.075550 | \n",
2221 | " 0.857101 | \n",
2222 | " 1.276013 | \n",
2223 | "
\n",
2224 | " \n",
2225 | " 1093 | \n",
2226 | " 4.166667e-09 | \n",
2227 | " 1.0 | \n",
2228 | " -1.706992 | \n",
2229 | " 1.052255 | \n",
2230 | " -1.498879 | \n",
2231 | " -1.139943 | \n",
2232 | " 0.467669 | \n",
2233 | " 0.713195 | \n",
2234 | " 0.993797 | \n",
2235 | "
\n",
2236 | " \n",
2237 | " 1094 | \n",
2238 | " 1.583333e-09 | \n",
2239 | " 1.0 | \n",
2240 | " -1.673857 | \n",
2241 | " 2.420163 | \n",
2242 | " -0.744173 | \n",
2243 | " -0.479768 | \n",
2244 | " -0.266538 | \n",
2245 | " 0.159923 | \n",
2246 | " 1.550184 | \n",
2247 | "
\n",
2248 | " \n",
2249 | " 1095 | \n",
2250 | " 3.495833e-09 | \n",
2251 | " 1.0 | \n",
2252 | " -1.680337 | \n",
2253 | " 1.461716 | \n",
2254 | " -1.488154 | \n",
2255 | " -0.810934 | \n",
2256 | " 0.241501 | \n",
2257 | " 0.645697 | \n",
2258 | " 1.184117 | \n",
2259 | "
\n",
2260 | " \n",
2261 | "
\n",
2262 | "
1096 rows × 9 columns
\n",
2263 | "
"
2264 | ],
2265 | "text/plain": [
2266 | " mean std min max 0.1 0.25 0.5 0.75 0.9\n",
2267 | "0 -1.041667e-09 1.0 -1.593083 1.464375 -1.372442 -0.805078 0.030207 0.936412 1.218518\n",
2268 | "1 -1.958333e-09 1.0 -1.630917 1.201393 -1.533955 -0.999388 0.384871 0.735720 1.084018\n",
2269 | "2 -1.775000e-09 1.0 -1.397118 2.349344 -1.003740 -0.741487 -0.132687 0.265374 1.515756\n",
2270 | "3 -8.541667e-10 1.0 -1.646458 1.344487 -1.476779 -0.898722 0.266022 0.776495 1.039641\n",
2271 | "4 -3.416667e-09 1.0 -1.620240 1.303502 -1.511644 -0.978061 0.405495 0.692648 1.061249\n",
2272 | "... ... ... ... ... ... ... ... ... ...\n",
2273 | "1091 -1.041667e-09 1.0 -1.817799 1.630397 -1.323058 -0.643414 0.081208 0.568453 1.390523\n",
2274 | "1092 -4.166666e-10 1.0 -1.550077 1.513605 -1.343747 -0.768526 0.075550 0.857101 1.276013\n",
2275 | "1093 4.166667e-09 1.0 -1.706992 1.052255 -1.498879 -1.139943 0.467669 0.713195 0.993797\n",
2276 | "1094 1.583333e-09 1.0 -1.673857 2.420163 -0.744173 -0.479768 -0.266538 0.159923 1.550184\n",
2277 | "1095 3.495833e-09 1.0 -1.680337 1.461716 -1.488154 -0.810934 0.241501 0.645697 1.184117\n",
2278 | "\n",
2279 | "[1096 rows x 9 columns]"
2280 | ]
2281 | },
2282 | "execution_count": 33,
2283 | "metadata": {},
2284 | "output_type": "execute_result"
2285 | }
2286 | ],
2287 | "source": [
2288 | "# example of a series-to-primitive transformer\n",
2289 | "from sktime.transformations.series.summarize import SummaryTransformer\n",
2290 | "\n",
2291 | "# specify summary transformer\n",
2292 | "summary_trafo = SummaryTransformer()\n",
2293 | "\n",
2294 | "# extract summary features - one per instance in the panel\n",
2295 | "X_summaries = summary_trafo.fit_transform(X)\n",
2296 | "X_summaries"
2297 | ]
2298 | },
2299 | {
2300 | "attachments": {},
2301 | "cell_type": "markdown",
2302 | "metadata": {},
2303 | "source": [
2304 | "just like classifiers, we can search for transformers of either type via the right tag:\n",
2305 | "\n",
2306 | "* `\"scitype:transform-input\"` and `\"scitype:transform-output\"` define input and output, e.g., \"series-to-series\" (both are scitype strings)\n",
2307 | "* `\"scitype:instancewise\"` is boolean and tells us whether the transform is instance-wise"
2308 | ]
2309 | },
2310 | {
2311 | "cell_type": "code",
2312 | "execution_count": 34,
2313 | "metadata": {},
2314 | "outputs": [
2315 | {
2316 | "data": {
2317 | "text/html": [
2318 | "\n",
2319 | "\n",
2332 | "
\n",
2333 | " \n",
2334 | " \n",
2335 | " | \n",
2336 | " name | \n",
2337 | " object | \n",
2338 | "
\n",
2339 | " \n",
2340 | " \n",
2341 | " \n",
2342 | " 0 | \n",
2343 | " Catch22 | \n",
2344 | " <class 'sktime.transformations.panel.catch22.C... | \n",
2345 | "
\n",
2346 | " \n",
2347 | " 1 | \n",
2348 | " Catch22Wrapper | \n",
2349 | " <class 'sktime.transformations.panel.catch22wr... | \n",
2350 | "
\n",
2351 | " \n",
2352 | " 2 | \n",
2353 | " FittedParamExtractor | \n",
2354 | " <class 'sktime.transformations.panel.summarize... | \n",
2355 | "
\n",
2356 | " \n",
2357 | " 3 | \n",
2358 | " RandomIntervalFeatureExtractor | \n",
2359 | " <class 'sktime.transformations.panel.summarize... | \n",
2360 | "
\n",
2361 | " \n",
2362 | " 4 | \n",
2363 | " RandomIntervals | \n",
2364 | " <class 'sktime.transformations.panel.random_in... | \n",
2365 | "
\n",
2366 | " \n",
2367 | " 5 | \n",
2368 | " RandomShapeletTransform | \n",
2369 | " <class 'sktime.transformations.panel.shapelet_... | \n",
2370 | "
\n",
2371 | " \n",
2372 | " 6 | \n",
2373 | " SignatureTransformer | \n",
2374 | " <class 'sktime.transformations.panel.signature... | \n",
2375 | "
\n",
2376 | " \n",
2377 | " 7 | \n",
2378 | " SummaryTransformer | \n",
2379 | " <class 'sktime.transformations.series.summariz... | \n",
2380 | "
\n",
2381 | " \n",
2382 | " 8 | \n",
2383 | " TSFreshFeatureExtractor | \n",
2384 | " <class 'sktime.transformations.panel.tsfresh.T... | \n",
2385 | "
\n",
2386 | " \n",
2387 | " 9 | \n",
2388 | " Tabularizer | \n",
2389 | " <class 'sktime.transformations.panel.reduce.Ta... | \n",
2390 | "
\n",
2391 | " \n",
2392 | " 10 | \n",
2393 | " TimeBinner | \n",
2394 | " <class 'sktime.transformations.panel.reduce.Ti... | \n",
2395 | "
\n",
2396 | " \n",
2397 | "
\n",
2398 | "
"
2399 | ],
2400 | "text/plain": [
2401 | " name object\n",
2402 | "0 Catch22 #sk-6e583018-f2e4-47c7-a55a-f306e319e22c {color: black;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c pre{padding: 0;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable {background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-estimator:hover {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-item {z-index: 1;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-parallel-item:only-child::after {width: 0;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-6e583018-f2e4-47c7-a55a-f306e319e22c div.sk-text-repr-fallback {display: none;}ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2477 | " transformers=[ExponentTransformer()])
Please rerun this cell to show the HTML repr or trust the notebook. "
2479 | ],
2480 | "text/plain": [
2481 | "ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2482 | " transformers=[ExponentTransformer()])"
2483 | ]
2484 | },
2485 | "execution_count": 35,
2486 | "metadata": {},
2487 | "output_type": "execute_result"
2488 | }
2489 | ],
2490 | "source": [
2491 | "from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier\n",
2492 | "from sktime.transformations.series.exponent import ExponentTransformer\n",
2493 | "\n",
2494 | "pipe = ExponentTransformer() * KNeighborsTimeSeriesClassifier()\n",
2495 | "\n",
2496 | "# this constructs a ClassifierPipeline, which is also a classifier\n",
2497 | "pipe"
2498 | ]
2499 | },
2500 | {
2501 | "cell_type": "code",
2502 | "execution_count": 36,
2503 | "metadata": {},
2504 | "outputs": [],
2505 | "source": [
2506 | "# alternative to construct:\n",
2507 | "from sktime.pipeline import make_pipeline\n",
2508 | "\n",
2509 | "pipe = make_pipeline(ExponentTransformer(), KNeighborsTimeSeriesClassifier())"
2510 | ]
2511 | },
2512 | {
2513 | "cell_type": "code",
2514 | "execution_count": 37,
2515 | "metadata": {},
2516 | "outputs": [
2517 | {
2518 | "data": {
2519 | "text/html": [
2520 | "ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2521 | " transformers=[ExponentTransformer()])
Please rerun this cell to show the HTML repr or trust the notebook. "
2523 | ],
2524 | "text/plain": [
2525 | "ClassifierPipeline(classifier=KNeighborsTimeSeriesClassifier(),\n",
2526 | " transformers=[ExponentTransformer()])"
2527 | ]
2528 | },
2529 | "execution_count": 37,
2530 | "metadata": {},
2531 | "output_type": "execute_result"
2532 | }
2533 | ],
2534 | "source": [
2535 | "from sktime.datasets import load_unit_test\n",
2536 | "\n",
2537 | "X_train, y_train = load_unit_test(split=\"TRAIN\")\n",
2538 | "X_test, _ = load_unit_test(split=\"TEST\")\n",
2539 | "\n",
2540 | "# this is a forecaster with the same interface as knn-classifier\n",
2541 | "# first applies exponent transform, then knn-classifier\n",
2542 | "pipe.fit(X_train, y_train)"
2543 | ]
2544 | },
2545 | {
2546 | "attachments": {},
2547 | "cell_type": "markdown",
2548 | "metadata": {},
2549 | "source": [
2550 | "`sktime` transformers pipeline with `sklearn` classifiers!\n",
2551 | "\n",
2552 | "This allows to build \"time series feature extraction then `sklearn` classify`\" pipelines:"
2553 | ]
2554 | },
2555 | {
2556 | "cell_type": "code",
2557 | "execution_count": 38,
2558 | "metadata": {},
2559 | "outputs": [
2560 | {
2561 | "data": {
2562 | "text/html": [
2563 | "SklearnClassifierPipeline(classifier=RandomForestClassifier(),\n",
2564 | " transformers=[SummaryTransformer()])
Please rerun this cell to show the HTML repr or trust the notebook. "
2566 | ],
2567 | "text/plain": [
2568 | "SklearnClassifierPipeline(classifier=RandomForestClassifier(),\n",
2569 | " transformers=[SummaryTransformer()])"
2570 | ]
2571 | },
2572 | "execution_count": 38,
2573 | "metadata": {},
2574 | "output_type": "execute_result"
2575 | }
2576 | ],
2577 | "source": [
2578 | "from sklearn.ensemble import RandomForestClassifier\n",
2579 | "from sktime.transformations.series.summarize import SummaryTransformer\n",
2580 | "\n",
2581 | "# specify summary transformer\n",
2582 | "summary_rf = SummaryTransformer() * RandomForestClassifier()\n",
2583 | "\n",
2584 | "summary_rf.fit(X_train, y_train)"
2585 | ]
2586 | },
2587 | {
2588 | "attachments": {},
2589 | "cell_type": "markdown",
2590 | "metadata": {},
2591 | "source": [
2592 | "### 5.4.3 Using transformers to deal with unequal length or missing values"
2593 | ]
2594 | },
2595 | {
2596 | "attachments": {},
2597 | "cell_type": "markdown",
2598 | "metadata": {},
2599 | "source": [
2600 | "pro tip: useful transformers to pipeline are those that \"improve\" capabilities!\n",
2601 | "\n",
2602 | "Search for these transformer tags:\n",
2603 | "\n",
2604 | "* `\"capability:unequal_length:removes\"` - ensures all instances in the panel have equal length afterwards. Examples: padding, cutting, resampling.\n",
2605 | "* `\"capability:missing_values:removes\"` - removes all missing values from the data (e.g., series, panel) passed to it. Example: mean imputation"
2606 | ]
2607 | },
2608 | {
2609 | "cell_type": "code",
2610 | "execution_count": 39,
2611 | "metadata": {},
2612 | "outputs": [
2613 | {
2614 | "data": {
2615 | "text/html": [
2616 | "\n",
2617 | "\n",
2630 | "
\n",
2631 | " \n",
2632 | " \n",
2633 | " | \n",
2634 | " name | \n",
2635 | " object | \n",
2636 | "
\n",
2637 | " \n",
2638 | " \n",
2639 | " \n",
2640 | " 0 | \n",
2641 | " ClearSky | \n",
2642 | " <class 'sktime.transformations.series.clear_sk... | \n",
2643 | "
\n",
2644 | " \n",
2645 | " 1 | \n",
2646 | " IntervalSegmenter | \n",
2647 | " <class 'sktime.transformations.panel.segment.I... | \n",
2648 | "
\n",
2649 | " \n",
2650 | " 2 | \n",
2651 | " PaddingTransformer | \n",
2652 | " <class 'sktime.transformations.panel.padder.Pa... | \n",
2653 | "
\n",
2654 | " \n",
2655 | " 3 | \n",
2656 | " RandomIntervalSegmenter | \n",
2657 | " <class 'sktime.transformations.panel.segment.R... | \n",
2658 | "
\n",
2659 | " \n",
2660 | " 4 | \n",
2661 | " SlopeTransformer | \n",
2662 | " <class 'sktime.transformations.panel.slope.Slo... | \n",
2663 | "
\n",
2664 | " \n",
2665 | " 5 | \n",
2666 | " TimeBinAggregate | \n",
2667 | " <class 'sktime.transformations.series.binning.... | \n",
2668 | "
\n",
2669 | " \n",
2670 | " 6 | \n",
2671 | " TruncationTransformer | \n",
2672 | " <class 'sktime.transformations.panel.truncatio... | \n",
2673 | "
\n",
2674 | " \n",
2675 | "
\n",
2676 | "
"
2677 | ],
2678 | "text/plain": [
2679 | " name object\n",
2680 | "0 ClearSky \n",
2710 | "\n",
2723 | "\n",
2724 | " \n",
2725 | " \n",
2726 | " | \n",
2727 | " name | \n",
2728 | " object | \n",
2729 | "
\n",
2730 | " \n",
2731 | " \n",
2732 | " \n",
2733 | " 0 | \n",
2734 | " ClearSky | \n",
2735 | " <class 'sktime.transformations.series.clear_sk... | \n",
2736 | "
\n",
2737 | " \n",
2738 | " 1 | \n",
2739 | " Imputer | \n",
2740 | " <class 'sktime.transformations.series.impute.I... | \n",
2741 | "
\n",
2742 | " \n",
2743 | "
\n",
2744 | ""
2745 | ],
2746 | "text/plain": [
2747 | " name object\n",
2748 | "0 ClearSky /model --env-manager local --host 127.0.0.1\n",
300 | "```\n",
301 | "\n",
302 | "with `run_id` as obtained in the \"save\" step.\n",
303 | "\n",
304 | "Then, run the below model scoring script to request a prediction from the served model."
305 | ]
306 | },
307 | {
308 | "attachments": {},
309 | "cell_type": "markdown",
310 | "id": "d889b702",
311 | "metadata": {},
312 | "source": [
313 | "for serving the model to an **endpoint in the cloud** (e.g. Azure ML, AWS SageMaker, etc.):\n",
314 | "\n",
315 | "use [MLflow deployment tools](https://mlflow.org/docs/latest/models.html#built-in-deployment-tools)):"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "id": "58c11c04",
322 | "metadata": {},
323 | "outputs": [],
324 | "source": [
325 | "import pandas as pd\n",
326 | "import requests\n",
327 | "from sktime.datasets import load_longley\n",
328 | "from sktime.forecasting.model_selection import temporal_train_test_split\n",
329 | "\n",
330 | "y, X = load_longley()\n",
331 | "y_train, y_test, X_train, X_test = temporal_train_test_split(y, X)\n",
332 | "\n",
333 | "# Define local host and endpoint url\n",
334 | "host = \"127.0.0.1\"\n",
335 | "url = f\"http://{host}:5000/invocations\"\n",
336 | "\n",
337 | "# Model scoring via REST API requires transforming the configuration DataFrame\n",
338 | "# into JSON format. As numpy ndarray type is not JSON serializable we need to\n",
339 | "# convert the exogenous regressor into a list. The wrapper instance will convert\n",
340 | "# the list back to ndarray type as required by sktime predict methods. For more\n",
341 | "# details read the MLflow deployment API reference.\n",
342 | "# (https://mlflow.org/docs/latest/models.html#deploy-mlflow-models)\n",
343 | "X_test_list = X_test.to_numpy().tolist()\n",
344 | "predict_conf = pd.DataFrame(\n",
345 | " [\n",
346 | " {\n",
347 | " \"fh\": [1, 2, 3],\n",
348 | " \"predict_method\": \"predict_interval\",\n",
349 | " \"coverage\": [0.9, 0.95],\n",
350 | " \"X\": X_test_list,\n",
351 | " }\n",
352 | " ]\n",
353 | ")\n",
354 | "\n",
355 | "# Create dictionary with pandas DataFrame in the split orientation\n",
356 | "json_data = {\"dataframe_split\": predict_conf.to_dict(orient=\"split\")}\n",
357 | "\n",
358 | "# Score model\n",
359 | "response = requests.post(url, json=json_data)\n",
360 | "print(response.json())"
361 | ]
362 | },
363 | {
364 | "attachments": {},
365 | "cell_type": "markdown",
366 | "id": "2d1c59a2",
367 | "metadata": {},
368 | "source": [
369 | "---\n",
370 | "### Credits: notebook 6 - deploy to production with mlflow / mlflavors\n",
371 | "\n",
372 | "notebook creation: benjaminbluhm\n",
373 | "\n",
374 | "minor rearranging by fkiraly\n",
375 | "\n",
376 | "mlflavors, `sktime` mlflow interface: benjaminbluhm"
377 | ]
378 | }
379 | ],
380 | "metadata": {
381 | "kernelspec": {
382 | "display_name": "Python 3 (ipykernel)",
383 | "language": "python",
384 | "name": "python3"
385 | },
386 | "language_info": {
387 | "codemirror_mode": {
388 | "name": "ipython",
389 | "version": 3
390 | },
391 | "file_extension": ".py",
392 | "mimetype": "text/x-python",
393 | "name": "python",
394 | "nbconvert_exporter": "python",
395 | "pygments_lexer": "ipython3",
396 | "version": "3.10.11"
397 | }
398 | },
399 | "nbformat": 4,
400 | "nbformat_minor": 5
401 | }
402 |
--------------------------------------------------------------------------------
/notebooks/09_outro.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "id": "37997396",
7 | "metadata": {},
8 | "source": [
9 | "# Summary & get involved!"
10 | ]
11 | },
12 | {
13 | "attachments": {},
14 | "cell_type": "markdown",
15 | "id": "9936be02",
16 | "metadata": {},
17 | "source": [
18 | "- `sklearn` / `sktime` interface:\n",
19 | " - unified interface for objects/estimators\n",
20 | " - modular design, strategy pattern\n",
21 | " - composable, composites are interface homogenous\n",
22 | " - simple specification language and parameter interface\n",
23 | " - visually informative pretty printing"
24 | ]
25 | },
26 | {
27 | "attachments": {},
28 | "cell_type": "markdown",
29 | "id": "7820cc3c",
30 | "metadata": {},
31 | "source": [
32 | "* Forecasting module:\n",
33 | " - Univariate/multivariate forecasting (stats and ML)\n",
34 | " - Use of exogeneous data\n",
35 | " - Probabilistic forecasting\n",
36 | " - Hierarchical forecasting"
37 | ]
38 | },
39 | {
40 | "attachments": {},
41 | "cell_type": "markdown",
42 | "id": "cd081e8c",
43 | "metadata": {},
44 | "source": [
45 | "* Transformers & pipelines\n",
46 | "\n",
47 | " * sequential pipelines\n",
48 | " * feature engineering, postproc\n",
49 | " * dunders `*`, `+` etc\n",
50 | " * tuning: parameter est., backtest-gridsearch, autoML"
51 | ]
52 | },
53 | {
54 | "attachments": {},
55 | "cell_type": "markdown",
56 | "id": "4a55d803",
57 | "metadata": {},
58 | "source": [
59 | "* APIs, engineering & deployment\n",
60 | "\n",
61 | " * modular unified framework interface\n",
62 | " * multiple learning tasks, \"what is my task\" guide\n",
63 | " * dependency management at estimator level\n",
64 | " * 3rd party extensible via templates & test suite\n",
65 | " * deploy via `mlflow` / `mlflavors`"
66 | ]
67 | },
68 | {
69 | "attachments": {},
70 | "cell_type": "markdown",
71 | "id": "fbb8d3c5",
72 | "metadata": {},
73 | "source": [
74 | "Further reading:\n",
75 | "\n",
76 | "* main `sktime` [tutorials on binder](https://mybinder.org/v2/gh/sktime/sktime/main?filepath=examples)\n",
77 | "* recorded [video tutorials](https://www.youtube.com/playlist?list=PLKs3UgGjlWHqNzu0LEOeLKvnjvvest2d0)\n",
78 | "* find a bug or type? [tutorial feedback thread](https://github.com/sktime/sktime/issues/1447)"
79 | ]
80 | },
81 | {
82 | "attachments": {},
83 | "cell_type": "markdown",
84 | "id": "b66e01a8",
85 | "metadata": {},
86 | "source": [
87 | "---"
88 | ]
89 | },
90 | {
91 | "attachments": {},
92 | "cell_type": "markdown",
93 | "id": "b6da2ae3",
94 | "metadata": {},
95 | "source": [
96 | "## Join sktime!\n",
97 | "\n",
98 | "### Vision statement\n",
99 | "\n",
100 | "* an easy-to-use, easy-to-extend, comprehensive **python framework** for ML and AI with time series\n",
101 | "* **open source, permissive license, free to use**\n",
102 | "* **openly and transparently governed**\n",
103 | "* **friendly, responsive, kind and inclusive** community, with an active commitment to ensure fairness and equal opportunity\n",
104 | "* an academically and commercially **neutral space**, with an **ecosystem integration** ambition and neutral point of view\n",
105 | "* an **educational platform**, providing mentoring and upskilling opportunities for all career stages, especially early career\n",
106 | "\n",
107 | "https://opendatascience.com/sktime-python-toolbox-for-machine-learning-with-time-series/\n",
108 | "\n",
109 | "**EVERYONE CAN JOIN! EVERYONE CAN BECOME A COMMUNITY LEADER!**\n",
110 | "\n",
111 | "* join our community discord ([join link](https://discord.com/invite/54ACzaFsn7))!\n",
112 | " * **help-desk for Q&A** and getting started as a user!\n",
113 | " * **dev-chat** for help getting started with open source!\n",
114 | " * contributor [getting started guide](https://github.com/sktime/sktime/issues/1147)\n",
115 | " * [good first issues](https://github.com/sktime/sktime/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)\n",
116 | "* join `sktime`'s user representatives or governance working group\n",
117 | " * register as a user ([form](https://forms.gle/eVuzrCjKDRupxawL7)) - roadmap, bugfix prio, elections\n",
118 | " * [roadmap 2023-2024 planning](https://github.com/sktime/sktime/issues/4691)\n",
119 | " * join [council sessions](https://github.com/sktime/community-org/tree/main/community_council/previous_meetings) and give input\n",
120 | "\n",
121 | "Opportunities:\n",
122 | "\n",
123 | "* regular **job opportunities**, watch the jobs channel on discord\n",
124 | "* sktime **mentoring programme**: [link](github.com/sktime/mentoring)\n",
125 | " * applications on a rolling basis\n",
126 | " * summer programme is starting over next weeks!\n",
127 | "\n",
128 | "Events & meetups:\n",
129 | "\n",
130 | "* regular **community collaboration sessions**\n",
131 | " * meet-ups Fri 3pm UTC on [discord](https://discord.com/invite/54ACzaFsn7)\n",
132 | "* multiple **Sprints and Dev Days** per year\n",
133 | " * [EuroPython 2023, Prague](https://ep2023.europython.eu/), 22-23 July week-end:\n",
134 | " * new contributor **onboarding sprint**!\n",
135 | " * **user feedback session**!\n",
136 | " * **developer meet-up**!\n",
137 | "\n",
138 | "Support us if `sktime` has generated value for you!\n",
139 | "\n",
140 | "* star us on [GitHub](https://github.com/sktime/sktime)\n",
141 | "* follow us on [LinkedIn](https://www.linkedin.com/company/scikit-time/)\n",
142 | "* donate! Every cent helps the time series ecosystem ([GitHub sponsors](https://github.com/sponsors/sktime))"
143 | ]
144 | },
145 | {
146 | "attachments": {},
147 | "cell_type": "markdown",
148 | "id": "adfa5420",
149 | "metadata": {},
150 | "source": [
151 | "---"
152 | ]
153 | },
154 | {
155 | "attachments": {},
156 | "cell_type": "markdown",
157 | "id": "286d4c8d",
158 | "metadata": {},
159 | "source": [
160 | "\n",
161 | "## Thank you for your attention\n",
162 | "\n",
163 | "
"
164 | ]
165 | },
166 | {
167 | "attachments": {},
168 | "cell_type": "markdown",
169 | "id": "2d1c59a2",
170 | "metadata": {},
171 | "source": [
172 | "---\n",
173 | "### Credits: sktime\n",
174 | "\n",
175 | "#### many thanks to [all `sktime` contributors!](https://www.sktime.net/en/latest/about/contributors.html)\n",
176 | "\n",
177 | "Citations & credits in academic research papers:\n",
178 | "\n",
179 | "`sktime` toolbox:\n",
180 | " [sktime: A unified interface for machine learning with time series](https://arxiv.org/abs/1909.07872)\n",
181 | "\n",
182 | "`sktime` design principles: [Designing machine learning toolboxes: Concepts, principles and patterns](https://arxiv.org/abs/2101.04938)"
183 | ]
184 | }
185 | ],
186 | "metadata": {
187 | "kernelspec": {
188 | "display_name": "Python 3 (ipykernel)",
189 | "language": "python",
190 | "name": "python3"
191 | },
192 | "language_info": {
193 | "codemirror_mode": {
194 | "name": "ipython",
195 | "version": 3
196 | },
197 | "file_extension": ".py",
198 | "mimetype": "text/x-python",
199 | "name": "python",
200 | "nbconvert_exporter": "python",
201 | "pygments_lexer": "ipython3",
202 | "version": "3.10.11"
203 | }
204 | },
205 | "nbformat": 4,
206 | "nbformat_minor": 5
207 | }
208 |
--------------------------------------------------------------------------------
/notebooks/hierarchical_demo_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sktime.utils._testing.hierarchical import _make_hierarchical
3 |
4 |
5 | def load_product_hierarchy():
6 | # Get daily historic sales and rename columns and indexes according to hierarchy above
7 | n_years = 5
8 | y = (
9 | _make_hierarchical(
10 | hierarchy_levels=(2, 4),
11 | min_timepoints=365 * n_years,
12 | max_timepoints=365 * n_years,
13 | random_state=0,
14 | )
15 | .drop(
16 | index=[
17 | ("h0_0", "h1_2"),
18 | ("h0_0", "h1_3"),
19 | ("h0_1", "h1_0"),
20 | ("h0_1", "h1_1"),
21 | ]
22 | )
23 | .rename(
24 | index={
25 | "h0_0": "Food preparation",
26 | "h0_1": "Food preservation",
27 | "h1_0": "Hobs",
28 | "h1_1": "Ovens",
29 | "h1_2": "Fridges",
30 | "h1_3": "Freezers",
31 | }
32 | )
33 | .reset_index()
34 | .rename(
35 | columns={
36 | "h0": "Product line",
37 | "h1": "Product group",
38 | "time": "Date",
39 | "c0": "Sales",
40 | }
41 | )
42 | )
43 |
44 | # Set date as monthly as sales as int and aggregate date
45 | y["Date"] = y["Date"].dt.to_period("M")
46 | y = y.groupby(by=["Product line", "Product group", "Date"]).sum()
47 |
48 | # Add noise to have different time series
49 | noise = np.random.RandomState(seed=0).normal(1, 0.3, np.shape(y))
50 | y = (y * noise).round(0)
51 |
52 | return y
53 |
--------------------------------------------------------------------------------
/notebooks/img/ask_chatgpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ask_chatgpt.png
--------------------------------------------------------------------------------
/notebooks/img/estimator-conceptual-model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/estimator-conceptual-model.jpg
--------------------------------------------------------------------------------
/notebooks/img/implementing_estimators.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/implementing_estimators.jpg
--------------------------------------------------------------------------------
/notebooks/img/implementing_estimators.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/implementing_estimators.png
--------------------------------------------------------------------------------
/notebooks/img/sklearn-unified-interface.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/sklearn-unified-interface.jpg
--------------------------------------------------------------------------------
/notebooks/img/sktime-logo-text-horizontal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/sktime-logo-text-horizontal.jpg
--------------------------------------------------------------------------------
/notebooks/img/tasks-forecasting.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-forecasting.jpg
--------------------------------------------------------------------------------
/notebooks/img/tasks-forecasting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-forecasting.png
--------------------------------------------------------------------------------
/notebooks/img/tasks-tsc-large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-tsc-large.png
--------------------------------------------------------------------------------
/notebooks/img/tasks-tsc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/tasks-tsc.png
--------------------------------------------------------------------------------
/notebooks/img/ts-tasks.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ts-tasks.jpg
--------------------------------------------------------------------------------
/notebooks/img/ts-tasks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/ts-tasks.png
--------------------------------------------------------------------------------
/notebooks/img/unified_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/unified_framework.png
--------------------------------------------------------------------------------
/notebooks/img/verdena_shapelet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/sktime-tutorial-europython-2023/111677ab898cce36085fd4bc3a000cae95e2112b/notebooks/img/verdena_shapelet.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sktime[all_extras]==0.20.1
2 | mlflavors==0.1.0
3 |
--------------------------------------------------------------------------------