├── .gitignore
├── README.md
├── imgs
├── diagram1.png
├── diagram2.png
├── icon.png
└── livestream.png
├── scikit_churn
└── __init__.py
├── setup.py
└── why-scikit-churn.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | *.parquet
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | > This repository is currently mainly here to get the ball rolling on some ideas. If there is sufficient interest we'll gladly invest more time in it, but first we'd like to get some feedback.
4 | >
5 | > We're mainly eager to confirm if this line of work is relevant to industry.
6 |
7 | ## Livestream
8 |
9 |
10 | [](https://www.youtube.com/watch?v=uevp7zJTM_c)
11 |
12 | This work was part of a [YouTube livestream](https://www.youtube.com/watch?v=uevp7zJTM_c) and it may help to watch for some extra context.
13 |
14 | # scikit-churn
15 |
16 | This repository explores some moments where data leakage can occur in churn-related use-cases. You may need more than a scikit-learn pipeline to protect you against temporal data leaks.
17 |
18 | The approach for now is to construct a generator that can generate `X`,`y` pairs from temporal data. You'd still need to provide a function to construct features, but the generator can at least guarantee that you're only generating features that don't peek into the future.
19 |
20 | Imagine that you're interested in doing some churn research. That is to say, you're interested in predicting if somebody is going to be around in the future. Then you might want to construct your data using the following conceptual model.
21 |
22 | 
23 |
24 | There's a "checking" period where you're going to check who is still around. And this is information that you can use to construct a label. There's also an "input" period in which you're going to construct features for a machine learning model.
25 |
26 | But here is where you have to be very careful! You should only use the checking period to understand which customers are still around. If you use any data from this period to generete ML features, or from any part after the input period, then you are at risk of leaking data that your machine learning model does not have access to in real life.
27 |
28 | So one of the first features this library offers is to give you a safe way to generate these `X`, `y` pairs over time. Conceptually, it currently allows you to construct features from timeslots over time.
29 |
30 | 
31 |
32 | To do such things in this library, you might do something like:
33 |
34 | ```python
35 | from skchurn import dataset_generator
36 |
37 | gen = dataset_generator(
38 | df,
39 | user_id="player_id",
40 | time_col="datetime",
41 | info_period=90,
42 | checking_period=30,
43 | start_date=datetime(2007, 1, 1),
44 | end_date=datetime(2007, 12, 31),
45 | feature_pipeline=feature_pipeline
46 | )
47 |
48 | for X, y in gen:
49 | scorers = {
50 | "accuracy": make_scorer(accuracy_score),
51 | "precision": make_scorer(precision_score),
52 | "recall": make_scorer(recall_score)
53 | }
54 | # Cross validate your pipeline as you might normally. Maybe even gridsearch?
55 | print(pl.DataFrame(cross_validate(pipe, X, y, cv=5, scoring=scorers)))
56 | ```
57 |
58 | The `dataset_generator` has the following arguments.
59 |
60 | - `df`: a Polars dataframe that contains logs over time for users
61 | - `user_id`: the column name that depicts the user id
62 | - `feature_pipeline`: a Polars compatible function that generatres ML features to go in `X` from the inserted dataframe
63 | - `input_period`: the number of days that the input period lasts
64 | - `checking_period`: the number of days that the checking period lasts
65 | - `start_date`: the start date for X,y-pair generation
66 | - `end_date`: the end date for X,y-pair generation
67 | - `step`: stepsize over time for new X,y-pairs. defaults to a month.
68 | - `time_col`: column name that depicts the datetime stamp
69 |
70 | You can explore this function by copying it from the `scikit_churn/__init__.py` file.
71 |
72 | This repository may grow, but for now we're eager to understand industry problems around these kinds of churn analyses better. We're particularily interested to see what one might do to remedy these temporal leakage issues for scikit-learn applications.
73 |
--------------------------------------------------------------------------------
/imgs/diagram1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koaning/scikit-churn/f5452c594d24e2a816b98971a912632de04b92ba/imgs/diagram1.png
--------------------------------------------------------------------------------
/imgs/diagram2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koaning/scikit-churn/f5452c594d24e2a816b98971a912632de04b92ba/imgs/diagram2.png
--------------------------------------------------------------------------------
/imgs/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koaning/scikit-churn/f5452c594d24e2a816b98971a912632de04b92ba/imgs/icon.png
--------------------------------------------------------------------------------
/imgs/livestream.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koaning/scikit-churn/f5452c594d24e2a816b98971a912632de04b92ba/imgs/livestream.png
--------------------------------------------------------------------------------
/scikit_churn/__init__.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import polars as pl
3 | from datetime import datetime, timedelta
4 |
5 |
6 | def churn_dataset_generator(df, user_id, feature_pipeline,
7 | info_period=180,
8 | checking_period=180,
9 | start_date=datetime(2007, 1, 1),
10 | end_date=datetime(2007, 12, 31),
11 | step="1mo",
12 | time_col="datetime"):
13 | """
14 | Generates X,y pairs for churn related machine learning, with way less temporal data leaks to worry about.
15 |
16 | Arguments:
17 |
18 | - df: a Polars dataframe that contains logs over time for users
19 | - user_id: the column name that depicts the user id
20 | - feature_pipeline: a Polars compatible function that generatres ML features to go in `X`
21 | - input_period: the number of days that the input period lasts
22 | - checking_period: the number of days that the checking period lasts
23 | - start_date: the start date for X,y-pair generation
24 | - end_date: the end date for X,y-pair generation
25 | - step: stepsize over time for new X,y-pairs. defaults to a month.
26 | - time_col: column name that depicts the datetime stamp
27 | """
28 | cutoff_start = pl.datetime_range(start_date, end_date, step, eager=True).alias(time_col)
29 | min_date = df[time_col].min()
30 | max_date = df[time_col].max()
31 |
32 | for start in cutoff_start.to_list():
33 | info_period_start = start - timedelta(days=info_period)
34 | checking_period_end = start + timedelta(days=checking_period)
35 | if info_period_start < min_date:
36 | continue
37 | if checking_period_end > max_date:
38 | continue
39 | print(info_period_start, start, checking_period_end, min_date, max_date)
40 | train_info = df.filter(pl.col(time_col) < start, pl.col(time_col) >= (start - timedelta(days=info_period)))
41 | valid_info = df.filter(pl.col(time_col) >= start, pl.col(time_col) < (start + timedelta(days=checking_period)))
42 |
43 |
44 | target = valid_info.select("player_id").unique().with_columns(target=True)
45 |
46 | ml_df = (train_info
47 | .pipe(feature_pipeline)
48 | .join(target, on=user_id, how="left")
49 | .with_columns(target=pl.when(pl.col("target")).then(True).otherwise(False)))
50 |
51 | X = ml_df.drop("target", "player_id")
52 | y = np.array(ml_df["target"]).astype(int)
53 |
54 | yield X, y
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from setuptools import setup, find_packages
3 |
4 |
5 | base_packages = [
6 | "scikit-learn", "polars"
7 | ]
8 |
9 | def read(fname):
10 | return open(os.path.join(os.path.dirname(__file__), fname)).read()
11 |
12 |
13 | setup(
14 | name="scikit-churn",
15 | version="0.0.1",
16 | description="Remedy temporal data-leaks",
17 | author="Vincent D. Warmerdam",
18 | packages=find_packages(exclude=["notebooks"]),
19 | package_data={},
20 | long_description=read("README.md"),
21 | long_description_content_type="text/markdown",
22 | install_requires=base_packages,
23 | classifiers=[
24 | "Intended Audience :: Developers",
25 | "Intended Audience :: Science/Research",
26 | "Programming Language :: Python :: 3",
27 | "Programming Language :: Python :: 3.8",
28 | "Programming Language :: Python :: 3.9",
29 | "Programming Language :: Python :: 3.10",
30 | "Programming Language :: Python :: 3.11",
31 | "Programming Language :: Python :: 3.12",
32 | "License :: OSI Approved :: MIT License",
33 | "Topic :: Scientific/Engineering",
34 | ],
35 | license_files=["LICENSE"],
36 | )
37 |
38 |
--------------------------------------------------------------------------------
/why-scikit-churn.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "f013ee40-4c76-4347-94d2-b14b8b725eea",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import polars as pl\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "id": "d801dad1",
17 | "metadata": {},
18 | "source": [
19 | "Dataset link: [GitHub - koaning/wow-avatar-datasets](https://github.com/koaning/wow-avatar-datasets/blob/main/wow-full.parquet)"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "id": "3541e18e-da7b-4452-a465-1e15653dd898",
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "df = pl.read_parquet(\"wow-full.parquet\")"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 3,
35 | "id": "619df8f5-40fc-4443-8c92-a78ead790398",
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/html": [
41 | "
\n",
48 | "
shape: (5, 7)player_id | guild | level | race | class | where | datetime |
---|
i32 | f64 | i8 | cat | cat | cat | datetime[ms] |
46340 | 53.0 | 26 | "Orc" | "Hunter" | "Razorfen Kraul… | 2007-06-12 03:17:48 |
31887 | null | 10 | "Orc" | "Hunter" | "Durotar" | 2007-06-12 03:17:48 |
47258 | null | 15 | "Orc" | "Warrior" | "The Barrens" | 2007-06-12 03:17:48 |
17448 | null | 43 | "Orc" | "Hunter" | "Silverpine For… | 2007-06-12 03:17:48 |
45159 | 104.0 | 57 | "Orc" | "Warlock" | "Winterspring" | 2007-06-12 03:17:53 |
"
49 | ],
50 | "text/plain": [
51 | "shape: (5, 7)\n",
52 | "┌───────────┬───────┬───────┬──────┬─────────┬───────────────────┬─────────────────────┐\n",
53 | "│ player_id ┆ guild ┆ level ┆ race ┆ class ┆ where ┆ datetime │\n",
54 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
55 | "│ i32 ┆ f64 ┆ i8 ┆ cat ┆ cat ┆ cat ┆ datetime[ms] │\n",
56 | "╞═══════════╪═══════╪═══════╪══════╪═════════╪═══════════════════╪═════════════════════╡\n",
57 | "│ 46340 ┆ 53.0 ┆ 26 ┆ Orc ┆ Hunter ┆ Razorfen Kraul ┆ 2007-06-12 03:17:48 │\n",
58 | "│ 31887 ┆ null ┆ 10 ┆ Orc ┆ Hunter ┆ Durotar ┆ 2007-06-12 03:17:48 │\n",
59 | "│ 47258 ┆ null ┆ 15 ┆ Orc ┆ Warrior ┆ The Barrens ┆ 2007-06-12 03:17:48 │\n",
60 | "│ 17448 ┆ null ┆ 43 ┆ Orc ┆ Hunter ┆ Silverpine Forest ┆ 2007-06-12 03:17:48 │\n",
61 | "│ 45159 ┆ 104.0 ┆ 57 ┆ Orc ┆ Warlock ┆ Winterspring ┆ 2007-06-12 03:17:53 │\n",
62 | "└───────────┴───────┴───────┴──────┴─────────┴───────────────────┴─────────────────────┘"
63 | ]
64 | },
65 | "execution_count": 3,
66 | "metadata": {},
67 | "output_type": "execute_result"
68 | }
69 | ],
70 | "source": [
71 | "df.head()"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "id": "e617ba91-f77a-4149-b777-04f68cb0edf8",
78 | "metadata": {
79 | "scrolled": true
80 | },
81 | "outputs": [
82 | {
83 | "data": {
84 | "application/javascript": [
85 | "(function(root) {\n",
86 | " function now() {\n",
87 | " return new Date();\n",
88 | " }\n",
89 | "\n",
90 | " var force = true;\n",
91 | " var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n",
92 | " var reloading = false;\n",
93 | " var Bokeh = root.Bokeh;\n",
94 | "\n",
95 | " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n",
96 | " root._bokeh_timeout = Date.now() + 5000;\n",
97 | " root._bokeh_failed_load = false;\n",
98 | " }\n",
99 | "\n",
100 | " function run_callbacks() {\n",
101 | " try {\n",
102 | " root._bokeh_onload_callbacks.forEach(function(callback) {\n",
103 | " if (callback != null)\n",
104 | " callback();\n",
105 | " });\n",
106 | " } finally {\n",
107 | " delete root._bokeh_onload_callbacks;\n",
108 | " }\n",
109 | " console.debug(\"Bokeh: all callbacks have finished\");\n",
110 | " }\n",
111 | "\n",
112 | " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n",
113 | " if (css_urls == null) css_urls = [];\n",
114 | " if (js_urls == null) js_urls = [];\n",
115 | " if (js_modules == null) js_modules = [];\n",
116 | " if (js_exports == null) js_exports = {};\n",
117 | "\n",
118 | " root._bokeh_onload_callbacks.push(callback);\n",
119 | "\n",
120 | " if (root._bokeh_is_loading > 0) {\n",
121 | " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
122 | " return null;\n",
123 | " }\n",
124 | " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n",
125 | " run_callbacks();\n",
126 | " return null;\n",
127 | " }\n",
128 | " if (!reloading) {\n",
129 | " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
130 | " }\n",
131 | "\n",
132 | " function on_load() {\n",
133 | " root._bokeh_is_loading--;\n",
134 | " if (root._bokeh_is_loading === 0) {\n",
135 | " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
136 | " run_callbacks()\n",
137 | " }\n",
138 | " }\n",
139 | " window._bokeh_on_load = on_load\n",
140 | "\n",
141 | " function on_error() {\n",
142 | " console.error(\"failed to load \" + url);\n",
143 | " }\n",
144 | "\n",
145 | " var skip = [];\n",
146 | " if (window.requirejs) {\n",
147 | " window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n",
148 | " require([\"jspanel\"], function(jsPanel) {\n",
149 | "\twindow.jsPanel = jsPanel\n",
150 | "\ton_load()\n",
151 | " })\n",
152 | " require([\"jspanel-modal\"], function() {\n",
153 | "\ton_load()\n",
154 | " })\n",
155 | " require([\"jspanel-tooltip\"], function() {\n",
156 | "\ton_load()\n",
157 | " })\n",
158 | " require([\"jspanel-hint\"], function() {\n",
159 | "\ton_load()\n",
160 | " })\n",
161 | " require([\"jspanel-layout\"], function() {\n",
162 | "\ton_load()\n",
163 | " })\n",
164 | " require([\"jspanel-contextmenu\"], function() {\n",
165 | "\ton_load()\n",
166 | " })\n",
167 | " require([\"jspanel-dock\"], function() {\n",
168 | "\ton_load()\n",
169 | " })\n",
170 | " require([\"gridstack\"], function(GridStack) {\n",
171 | "\twindow.GridStack = GridStack\n",
172 | "\ton_load()\n",
173 | " })\n",
174 | " require([\"notyf\"], function() {\n",
175 | "\ton_load()\n",
176 | " })\n",
177 | " root._bokeh_is_loading = css_urls.length + 9;\n",
178 | " } else {\n",
179 | " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n",
180 | " }\n",
181 | "\n",
182 | " var existing_stylesheets = []\n",
183 | " var links = document.getElementsByTagName('link')\n",
184 | " for (var i = 0; i < links.length; i++) {\n",
185 | " var link = links[i]\n",
186 | " if (link.href != null) {\n",
187 | "\texisting_stylesheets.push(link.href)\n",
188 | " }\n",
189 | " }\n",
190 | " for (var i = 0; i < css_urls.length; i++) {\n",
191 | " var url = css_urls[i];\n",
192 | " if (existing_stylesheets.indexOf(url) !== -1) {\n",
193 | "\ton_load()\n",
194 | "\tcontinue;\n",
195 | " }\n",
196 | " const element = document.createElement(\"link\");\n",
197 | " element.onload = on_load;\n",
198 | " element.onerror = on_error;\n",
199 | " element.rel = \"stylesheet\";\n",
200 | " element.type = \"text/css\";\n",
201 | " element.href = url;\n",
202 | " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
203 | " document.body.appendChild(element);\n",
204 | " } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n",
205 | " var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n",
206 | " for (var i = 0; i < urls.length; i++) {\n",
207 | " skip.push(urls[i])\n",
208 | " }\n",
209 | " } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n",
210 | " var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n",
211 | " for (var i = 0; i < urls.length; i++) {\n",
212 | " skip.push(urls[i])\n",
213 | " }\n",
214 | " } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n",
215 | " var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n",
216 | " for (var i = 0; i < urls.length; i++) {\n",
217 | " skip.push(urls[i])\n",
218 | " }\n",
219 | " } var existing_scripts = []\n",
220 | " var scripts = document.getElementsByTagName('script')\n",
221 | " for (var i = 0; i < scripts.length; i++) {\n",
222 | " var script = scripts[i]\n",
223 | " if (script.src != null) {\n",
224 | "\texisting_scripts.push(script.src)\n",
225 | " }\n",
226 | " }\n",
227 | " for (var i = 0; i < js_urls.length; i++) {\n",
228 | " var url = js_urls[i];\n",
229 | " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n",
230 | "\tif (!window.requirejs) {\n",
231 | "\t on_load();\n",
232 | "\t}\n",
233 | "\tcontinue;\n",
234 | " }\n",
235 | " var element = document.createElement('script');\n",
236 | " element.onload = on_load;\n",
237 | " element.onerror = on_error;\n",
238 | " element.async = false;\n",
239 | " element.src = url;\n",
240 | " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
241 | " document.head.appendChild(element);\n",
242 | " }\n",
243 | " for (var i = 0; i < js_modules.length; i++) {\n",
244 | " var url = js_modules[i];\n",
245 | " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n",
246 | "\tif (!window.requirejs) {\n",
247 | "\t on_load();\n",
248 | "\t}\n",
249 | "\tcontinue;\n",
250 | " }\n",
251 | " var element = document.createElement('script');\n",
252 | " element.onload = on_load;\n",
253 | " element.onerror = on_error;\n",
254 | " element.async = false;\n",
255 | " element.src = url;\n",
256 | " element.type = \"module\";\n",
257 | " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
258 | " document.head.appendChild(element);\n",
259 | " }\n",
260 | " for (const name in js_exports) {\n",
261 | " var url = js_exports[name];\n",
262 | " if (skip.indexOf(url) >= 0 || root[name] != null) {\n",
263 | "\tif (!window.requirejs) {\n",
264 | "\t on_load();\n",
265 | "\t}\n",
266 | "\tcontinue;\n",
267 | " }\n",
268 | " var element = document.createElement('script');\n",
269 | " element.onerror = on_error;\n",
270 | " element.async = false;\n",
271 | " element.type = \"module\";\n",
272 | " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
273 | " element.textContent = `\n",
274 | " import ${name} from \"${url}\"\n",
275 | " window.${name} = ${name}\n",
276 | " window._bokeh_on_load()\n",
277 | " `\n",
278 | " document.head.appendChild(element);\n",
279 | " }\n",
280 | " if (!js_urls.length && !js_modules.length) {\n",
281 | " on_load()\n",
282 | " }\n",
283 | " };\n",
284 | "\n",
285 | " function inject_raw_css(css) {\n",
286 | " const element = document.createElement(\"style\");\n",
287 | " element.appendChild(document.createTextNode(css));\n",
288 | " document.body.appendChild(element);\n",
289 | " }\n",
290 | "\n",
291 | " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.3.4.min.js\", \"https://cdn.holoviz.org/panel/1.3.8/dist/panel.min.js\"];\n",
292 | " var js_modules = [];\n",
293 | " var js_exports = {};\n",
294 | " var css_urls = [];\n",
295 | " var inline_js = [ function(Bokeh) {\n",
296 | " Bokeh.set_log_level(\"info\");\n",
297 | " },\n",
298 | "function(Bokeh) {} // ensure no trailing comma for IE\n",
299 | " ];\n",
300 | "\n",
301 | " function run_inline_js() {\n",
302 | " if ((root.Bokeh !== undefined) || (force === true)) {\n",
303 | " for (var i = 0; i < inline_js.length; i++) {\n",
304 | "\ttry {\n",
305 | " inline_js[i].call(root, root.Bokeh);\n",
306 | "\t} catch(e) {\n",
307 | "\t if (!reloading) {\n",
308 | "\t throw e;\n",
309 | "\t }\n",
310 | "\t}\n",
311 | " }\n",
312 | " // Cache old bokeh versions\n",
313 | " if (Bokeh != undefined && !reloading) {\n",
314 | "\tvar NewBokeh = root.Bokeh;\n",
315 | "\tif (Bokeh.versions === undefined) {\n",
316 | "\t Bokeh.versions = new Map();\n",
317 | "\t}\n",
318 | "\tif (NewBokeh.version !== Bokeh.version) {\n",
319 | "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n",
320 | "\t}\n",
321 | "\troot.Bokeh = Bokeh;\n",
322 | " }} else if (Date.now() < root._bokeh_timeout) {\n",
323 | " setTimeout(run_inline_js, 100);\n",
324 | " } else if (!root._bokeh_failed_load) {\n",
325 | " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
326 | " root._bokeh_failed_load = true;\n",
327 | " }\n",
328 | " root._bokeh_is_initializing = false\n",
329 | " }\n",
330 | "\n",
331 | " function load_or_wait() {\n",
332 | " // Implement a backoff loop that tries to ensure we do not load multiple\n",
333 | " // versions of Bokeh and its dependencies at the same time.\n",
334 | " // In recent versions we use the root._bokeh_is_initializing flag\n",
335 | " // to determine whether there is an ongoing attempt to initialize\n",
336 | " // bokeh, however for backward compatibility we also try to ensure\n",
337 | " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n",
338 | " // before older versions are fully initialized.\n",
339 | " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n",
340 | " root._bokeh_is_initializing = false;\n",
341 | " root._bokeh_onload_callbacks = undefined;\n",
342 | " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n",
343 | " load_or_wait();\n",
344 | " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n",
345 | " setTimeout(load_or_wait, 100);\n",
346 | " } else {\n",
347 | " root._bokeh_is_initializing = true\n",
348 | " root._bokeh_onload_callbacks = []\n",
349 | " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n",
350 | " if (!reloading && !bokeh_loaded) {\n",
351 | "\troot.Bokeh = undefined;\n",
352 | " }\n",
353 | " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n",
354 | "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
355 | "\trun_inline_js();\n",
356 | " });\n",
357 | " }\n",
358 | " }\n",
359 | " // Give older versions of the autoload script a head-start to ensure\n",
360 | " // they initialize before we start loading newer version.\n",
361 | " setTimeout(load_or_wait, 100)\n",
362 | "}(window));"
363 | ],
364 | "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n require([\"jspanel\"], function(jsPanel) {\n\twindow.jsPanel = jsPanel\n\ton_load()\n })\n require([\"jspanel-modal\"], function() {\n\ton_load()\n })\n require([\"jspanel-tooltip\"], function() {\n\ton_load()\n })\n require([\"jspanel-hint\"], function() {\n\ton_load()\n })\n require([\"jspanel-layout\"], function() {\n\ton_load()\n })\n require([\"jspanel-contextmenu\"], function() {\n\ton_load()\n })\n require([\"jspanel-dock\"], function() {\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 9;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.3.4.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.3.4.min.js\", \"https://cdn.holoviz.org/panel/1.3.8/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));"
365 | },
366 | "metadata": {},
367 | "output_type": "display_data"
368 | },
369 | {
370 | "data": {
371 | "application/javascript": [
372 | "\n",
373 | "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n",
374 | " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n",
375 | "}\n",
376 | "\n",
377 | "\n",
378 | " function JupyterCommManager() {\n",
379 | " }\n",
380 | "\n",
381 | " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n",
382 | " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n",
383 | " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n",
384 | " comm_manager.register_target(comm_id, function(comm) {\n",
385 | " comm.on_msg(msg_handler);\n",
386 | " });\n",
387 | " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n",
388 | " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n",
389 | " comm.onMsg = msg_handler;\n",
390 | " });\n",
391 | " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n",
392 | " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n",
393 | " var messages = comm.messages[Symbol.asyncIterator]();\n",
394 | " function processIteratorResult(result) {\n",
395 | " var message = result.value;\n",
396 | " console.log(message)\n",
397 | " var content = {data: message.data, comm_id};\n",
398 | " var buffers = []\n",
399 | " for (var buffer of message.buffers || []) {\n",
400 | " buffers.push(new DataView(buffer))\n",
401 | " }\n",
402 | " var metadata = message.metadata || {};\n",
403 | " var msg = {content, buffers, metadata}\n",
404 | " msg_handler(msg);\n",
405 | " return messages.next().then(processIteratorResult);\n",
406 | " }\n",
407 | " return messages.next().then(processIteratorResult);\n",
408 | " })\n",
409 | " }\n",
410 | " }\n",
411 | "\n",
412 | " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n",
413 | " if (comm_id in window.PyViz.comms) {\n",
414 | " return window.PyViz.comms[comm_id];\n",
415 | " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n",
416 | " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n",
417 | " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n",
418 | " if (msg_handler) {\n",
419 | " comm.on_msg(msg_handler);\n",
420 | " }\n",
421 | " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n",
422 | " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n",
423 | " comm.open();\n",
424 | " if (msg_handler) {\n",
425 | " comm.onMsg = msg_handler;\n",
426 | " }\n",
427 | " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n",
428 | " var comm_promise = google.colab.kernel.comms.open(comm_id)\n",
429 | " comm_promise.then((comm) => {\n",
430 | " window.PyViz.comms[comm_id] = comm;\n",
431 | " if (msg_handler) {\n",
432 | " var messages = comm.messages[Symbol.asyncIterator]();\n",
433 | " function processIteratorResult(result) {\n",
434 | " var message = result.value;\n",
435 | " var content = {data: message.data};\n",
436 | " var metadata = message.metadata || {comm_id};\n",
437 | " var msg = {content, metadata}\n",
438 | " msg_handler(msg);\n",
439 | " return messages.next().then(processIteratorResult);\n",
440 | " }\n",
441 | " return messages.next().then(processIteratorResult);\n",
442 | " }\n",
443 | " }) \n",
444 | " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n",
445 | " return comm_promise.then((comm) => {\n",
446 | " comm.send(data, metadata, buffers, disposeOnDone);\n",
447 | " });\n",
448 | " };\n",
449 | " var comm = {\n",
450 | " send: sendClosure\n",
451 | " };\n",
452 | " }\n",
453 | " window.PyViz.comms[comm_id] = comm;\n",
454 | " return comm;\n",
455 | " }\n",
456 | " window.PyViz.comm_manager = new JupyterCommManager();\n",
457 | " \n",
458 | "\n",
459 | "\n",
460 | "var JS_MIME_TYPE = 'application/javascript';\n",
461 | "var HTML_MIME_TYPE = 'text/html';\n",
462 | "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n",
463 | "var CLASS_NAME = 'output';\n",
464 | "\n",
465 | "/**\n",
466 | " * Render data to the DOM node\n",
467 | " */\n",
468 | "function render(props, node) {\n",
469 | " var div = document.createElement(\"div\");\n",
470 | " var script = document.createElement(\"script\");\n",
471 | " node.appendChild(div);\n",
472 | " node.appendChild(script);\n",
473 | "}\n",
474 | "\n",
475 | "/**\n",
476 | " * Handle when a new output is added\n",
477 | " */\n",
478 | "function handle_add_output(event, handle) {\n",
479 | " var output_area = handle.output_area;\n",
480 | " var output = handle.output;\n",
481 | " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
482 | " return\n",
483 | " }\n",
484 | " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
485 | " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
486 | " if (id !== undefined) {\n",
487 | " var nchildren = toinsert.length;\n",
488 | " var html_node = toinsert[nchildren-1].children[0];\n",
489 | " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n",
490 | " var scripts = [];\n",
491 | " var nodelist = html_node.querySelectorAll(\"script\");\n",
492 | " for (var i in nodelist) {\n",
493 | " if (nodelist.hasOwnProperty(i)) {\n",
494 | " scripts.push(nodelist[i])\n",
495 | " }\n",
496 | " }\n",
497 | "\n",
498 | " scripts.forEach( function (oldScript) {\n",
499 | " var newScript = document.createElement(\"script\");\n",
500 | " var attrs = [];\n",
501 | " var nodemap = oldScript.attributes;\n",
502 | " for (var j in nodemap) {\n",
503 | " if (nodemap.hasOwnProperty(j)) {\n",
504 | " attrs.push(nodemap[j])\n",
505 | " }\n",
506 | " }\n",
507 | " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n",
508 | " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n",
509 | " oldScript.parentNode.replaceChild(newScript, oldScript);\n",
510 | " });\n",
511 | " if (JS_MIME_TYPE in output.data) {\n",
512 | " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n",
513 | " }\n",
514 | " output_area._hv_plot_id = id;\n",
515 | " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n",
516 | " window.PyViz.plot_index[id] = Bokeh.index[id];\n",
517 | " } else {\n",
518 | " window.PyViz.plot_index[id] = null;\n",
519 | " }\n",
520 | " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
521 | " var bk_div = document.createElement(\"div\");\n",
522 | " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
523 | " var script_attrs = bk_div.children[0].attributes;\n",
524 | " for (var i = 0; i < script_attrs.length; i++) {\n",
525 | " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
526 | " }\n",
527 | " // store reference to server id on output_area\n",
528 | " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
529 | " }\n",
530 | "}\n",
531 | "\n",
532 | "/**\n",
533 | " * Handle when an output is cleared or removed\n",
534 | " */\n",
535 | "function handle_clear_output(event, handle) {\n",
536 | " var id = handle.cell.output_area._hv_plot_id;\n",
537 | " var server_id = handle.cell.output_area._bokeh_server_id;\n",
538 | " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n",
539 | " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n",
540 | " if (server_id !== null) {\n",
541 | " comm.send({event_type: 'server_delete', 'id': server_id});\n",
542 | " return;\n",
543 | " } else if (comm !== null) {\n",
544 | " comm.send({event_type: 'delete', 'id': id});\n",
545 | " }\n",
546 | " delete PyViz.plot_index[id];\n",
547 | " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n",
548 | " var doc = window.Bokeh.index[id].model.document\n",
549 | " doc.clear();\n",
550 | " const i = window.Bokeh.documents.indexOf(doc);\n",
551 | " if (i > -1) {\n",
552 | " window.Bokeh.documents.splice(i, 1);\n",
553 | " }\n",
554 | " }\n",
555 | "}\n",
556 | "\n",
557 | "/**\n",
558 | " * Handle kernel restart event\n",
559 | " */\n",
560 | "function handle_kernel_cleanup(event, handle) {\n",
561 | " delete PyViz.comms[\"hv-extension-comm\"];\n",
562 | " window.PyViz.plot_index = {}\n",
563 | "}\n",
564 | "\n",
565 | "/**\n",
566 | " * Handle update_display_data messages\n",
567 | " */\n",
568 | "function handle_update_output(event, handle) {\n",
569 | " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n",
570 | " handle_add_output(event, handle)\n",
571 | "}\n",
572 | "\n",
573 | "function register_renderer(events, OutputArea) {\n",
574 | " function append_mime(data, metadata, element) {\n",
575 | " // create a DOM node to render to\n",
576 | " var toinsert = this.create_output_subarea(\n",
577 | " metadata,\n",
578 | " CLASS_NAME,\n",
579 | " EXEC_MIME_TYPE\n",
580 | " );\n",
581 | " this.keyboard_manager.register_events(toinsert);\n",
582 | " // Render to node\n",
583 | " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
584 | " render(props, toinsert[0]);\n",
585 | " element.append(toinsert);\n",
586 | " return toinsert\n",
587 | " }\n",
588 | "\n",
589 | " events.on('output_added.OutputArea', handle_add_output);\n",
590 | " events.on('output_updated.OutputArea', handle_update_output);\n",
591 | " events.on('clear_output.CodeCell', handle_clear_output);\n",
592 | " events.on('delete.Cell', handle_clear_output);\n",
593 | " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n",
594 | "\n",
595 | " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
596 | " safe: true,\n",
597 | " index: 0\n",
598 | " });\n",
599 | "}\n",
600 | "\n",
601 | "if (window.Jupyter !== undefined) {\n",
602 | " try {\n",
603 | " var events = require('base/js/events');\n",
604 | " var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
605 | " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
606 | " register_renderer(events, OutputArea);\n",
607 | " }\n",
608 | " } catch(err) {\n",
609 | " }\n",
610 | "}\n"
611 | ],
612 | "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n"
613 | },
614 | "metadata": {},
615 | "output_type": "display_data"
616 | },
617 | {
618 | "data": {
619 | "text/html": [
620 | ""
636 | ]
637 | },
638 | "metadata": {},
639 | "output_type": "display_data"
640 | },
641 | {
642 | "data": {
643 | "application/vnd.holoviews_exec.v0+json": "",
644 | "text/html": [
645 | "\n",
646 | "
\n",
647 | "
\n",
648 | ""
710 | ]
711 | },
712 | "metadata": {
713 | "application/vnd.holoviews_exec.v0+json": {
714 | "id": "p1002"
715 | }
716 | },
717 | "output_type": "display_data"
718 | },
719 | {
720 | "data": {},
721 | "metadata": {},
722 | "output_type": "display_data"
723 | },
724 | {
725 | "data": {
726 | "application/vnd.holoviews_exec.v0+json": "",
727 | "text/html": [
728 | "\n",
729 | "
\n",
730 | "
\n",
731 | ""
793 | ],
794 | "text/plain": [
795 | ":Curve [date] (len)"
796 | ]
797 | },
798 | "execution_count": 4,
799 | "metadata": {
800 | "application/vnd.holoviews_exec.v0+json": {
801 | "id": "p1004"
802 | }
803 | },
804 | "output_type": "execute_result"
805 | }
806 | ],
807 | "source": [
808 | "(df\n",
809 | " .with_columns(date=pl.col(\"datetime\").dt.date())\n",
810 | " .group_by(\"date\")\n",
811 | " .len()\n",
812 | " .plot(\"date\", \"len\"))"
813 | ]
814 | },
815 | {
816 | "cell_type": "markdown",
817 | "id": "d8a3b40f-6b27-4742-938e-378f69e9d648",
818 | "metadata": {},
819 | "source": [
820 | "Let us now move towards Machine Learning."
821 | ]
822 | },
823 | {
824 | "cell_type": "code",
825 | "execution_count": 5,
826 | "id": "5d19a584-3c94-4c04-b99e-3bc50a3cf4ea",
827 | "metadata": {},
828 | "outputs": [],
829 | "source": [
830 | "df_target = (df\n",
831 | " .group_by(\"player_id\")\n",
832 | " .agg(pl.col(\"datetime\").max())\n",
833 | " .with_columns(target=pl.col(\"datetime\").dt.year() >= 2007 )\n",
834 | " .drop(\"datetime\"))"
835 | ]
836 | },
837 | {
838 | "cell_type": "code",
839 | "execution_count": 6,
840 | "id": "b855bfea-2e88-4ae7-920c-4f84d8191d43",
841 | "metadata": {},
842 | "outputs": [],
843 | "source": [
844 | "ml_df = (df\n",
845 | " .group_by(\"player_id\")\n",
846 | " .agg(\n",
847 | " pl.col(\"level\").max(),\n",
848 | " pl.col(\"class\").first(),\n",
849 | " pl.col(\"race\").first(),\n",
850 | " pl.len().alias(\"n_row\"),\n",
851 | " )\n",
852 | " .join(df_target, on=\"player_id\")\n",
853 | " .drop(\"player_id\")\n",
854 | " .filter(pl.col(\"n_row\") > 10)\n",
855 | ")"
856 | ]
857 | },
858 | {
859 | "cell_type": "code",
860 | "execution_count": 7,
861 | "id": "fb897ee0-91f3-4292-a911-643c8e051ac5",
862 | "metadata": {},
863 | "outputs": [
864 | {
865 | "data": {
866 | "text/html": [
867 | "\n",
874 | "
shape: (5, 4)level | class | race | n_row |
---|
i8 | cat | cat | u32 |
18 | "Hunter" | "Troll" | 106 |
57 | "Death Knight" | "Blood Elf" | 16 |
8 | "Hunter" | "Troll" | 30 |
70 | "Priest" | "Undead" | 4289 |
12 | "Paladin" | "Blood Elf" | 31 |
"
875 | ],
876 | "text/plain": [
877 | "shape: (5, 4)\n",
878 | "┌───────┬──────────────┬───────────┬───────┐\n",
879 | "│ level ┆ class ┆ race ┆ n_row │\n",
880 | "│ --- ┆ --- ┆ --- ┆ --- │\n",
881 | "│ i8 ┆ cat ┆ cat ┆ u32 │\n",
882 | "╞═══════╪══════════════╪═══════════╪═══════╡\n",
883 | "│ 18 ┆ Hunter ┆ Troll ┆ 106 │\n",
884 | "│ 57 ┆ Death Knight ┆ Blood Elf ┆ 16 │\n",
885 | "│ 8 ┆ Hunter ┆ Troll ┆ 30 │\n",
886 | "│ 70 ┆ Priest ┆ Undead ┆ 4289 │\n",
887 | "│ 12 ┆ Paladin ┆ Blood Elf ┆ 31 │\n",
888 | "└───────┴──────────────┴───────────┴───────┘"
889 | ]
890 | },
891 | "execution_count": 7,
892 | "metadata": {},
893 | "output_type": "execute_result"
894 | }
895 | ],
896 | "source": [
897 | "y = np.array(ml_df['target'])\n",
898 | "X = ml_df.drop(\"target\")\n",
899 | "X.head()"
900 | ]
901 | },
902 | {
903 | "cell_type": "code",
904 | "execution_count": 8,
905 | "id": "94e670be-d177-45ab-8a6f-99fdf8d31ec7",
906 | "metadata": {},
907 | "outputs": [],
908 | "source": [
909 | "from skrub import SelectCols\n",
910 | "from sklearn.pipeline import make_pipeline, make_union\n",
911 | "from sklearn.preprocessing import OneHotEncoder\n",
912 | "from sklearn.linear_model import LogisticRegression\n",
913 | "from sklearn.model_selection import cross_validate\n",
914 | "from sklearn.metrics import make_scorer, recall_score, precision_score, accuracy_score"
915 | ]
916 | },
917 | {
918 | "cell_type": "code",
919 | "execution_count": 9,
920 | "id": "1de987f5-6de9-47f8-93c9-8200bc167d5b",
921 | "metadata": {},
922 | "outputs": [],
923 | "source": [
924 | "pipe = make_pipeline(\n",
925 | " make_union(\n",
926 | " make_pipeline(\n",
927 | " SelectCols([\"class\", \"race\"]),\n",
928 | " OneHotEncoder(handle_unknown=\"infrequent_if_exist\")\n",
929 | " ),\n",
930 | " make_pipeline(\n",
931 | " SelectCols([\"level\"]),\n",
932 | " )\n",
933 | " ),\n",
934 | " LogisticRegression(max_iter=2_000)\n",
935 | ")"
936 | ]
937 | },
938 | {
939 | "cell_type": "code",
940 | "execution_count": 10,
941 | "id": "e4e329ad-6b91-4644-b532-49e38056f926",
942 | "metadata": {},
943 | "outputs": [
944 | {
945 | "data": {
946 | "text/plain": [
947 | "0.8308071286730513"
948 | ]
949 | },
950 | "execution_count": 10,
951 | "metadata": {},
952 | "output_type": "execute_result"
953 | }
954 | ],
955 | "source": [
956 | "import numpy as np\n",
957 | "\n",
958 | "np.mean(pipe.fit(X, y).predict(X) == y)"
959 | ]
960 | },
961 | {
962 | "cell_type": "code",
963 | "execution_count": 11,
964 | "id": "63b69b6e-88e0-41ba-9588-c04d3beb36a7",
965 | "metadata": {},
966 | "outputs": [
967 | {
968 | "data": {
969 | "text/plain": [
970 | "{'fit_time': array([0.1542871 , 0.08812308, 0.21422696, 0.1478188 , 0.13812685]),\n",
971 | " 'score_time': array([0.00765991, 0.00752473, 0.00674915, 0.00745916, 0.00724912]),\n",
972 | " 'test_accuracy': array([0.83084077, 0.83084077, 0.83071509, 0.83081951, 0.83081951]),\n",
973 | " 'test_precision': array([0.83084077, 0.83084077, 0.83071509, 0.83081951, 0.83081951]),\n",
974 | " 'test_recall': array([1., 1., 1., 1., 1.])}"
975 | ]
976 | },
977 | "execution_count": 11,
978 | "metadata": {},
979 | "output_type": "execute_result"
980 | }
981 | ],
982 | "source": [
983 | "from sklearn.metrics import precision_score, accuracy_score, recall_score, make_scorer\n",
984 | "from sklearn.model_selection import cross_validate\n",
985 | "\n",
986 | "y = np.array(ml_df['target'])\n",
987 | "X = ml_df.drop(\"target\")\n",
988 | "\n",
989 | "scorers = {\n",
990 | " \"accuracy\": make_scorer(accuracy_score), \n",
991 | " \"precision\": make_scorer(precision_score), \n",
992 | " \"recall\": make_scorer(recall_score)\n",
993 | "}\n",
994 | "cross_validate(pipe, X, y, cv=5, scoring=scorers)"
995 | ]
996 | },
997 | {
998 | "cell_type": "markdown",
999 | "id": "07f6c225-f27e-46b9-ba67-067e4b50a342",
1000 | "metadata": {},
1001 | "source": [
1002 | "Ah yes. These numbers all give us confidence ... but alas ... this is a dataleak that can become a black hole. "
1003 | ]
1004 | },
1005 | {
1006 | "cell_type": "code",
1007 | "execution_count": 12,
1008 | "id": "48632e3a-605f-4b0e-acd6-ca6fec9704eb",
1009 | "metadata": {},
1010 | "outputs": [
1011 | {
1012 | "data": {
1013 | "text/plain": [
1014 | "(0.8308071286730513, (39783,))"
1015 | ]
1016 | },
1017 | "execution_count": 12,
1018 | "metadata": {},
1019 | "output_type": "execute_result"
1020 | }
1021 | ],
1022 | "source": [
1023 | "np.mean(y), y.shape"
1024 | ]
1025 | },
1026 | {
1027 | "cell_type": "markdown",
1028 | "id": "3d373658-a72d-4601-b9a3-2b8c8d5ad6bc",
1029 | "metadata": {},
1030 | "source": [
1031 | "Let's write a safety mechanism now."
1032 | ]
1033 | },
1034 | {
1035 | "cell_type": "code",
1036 | "execution_count": 13,
1037 | "id": "51ac8774-7638-400c-83ce-a4a11a41c01b",
1038 | "metadata": {},
1039 | "outputs": [],
1040 | "source": [
1041 | "from datetime import datetime, timedelta\n",
1042 | "\n",
1043 | "def churn_dataset_generator(df, user_id, feature_pipeline, \n",
1044 | " info_period=180, \n",
1045 | " checking_period=180, \n",
1046 | " start_date=datetime(2007, 1, 1), \n",
1047 | " end_date=datetime(2007, 12, 31), \n",
1048 | " step=\"1mo\", \n",
1049 | " time_col=\"datetime\"):\n",
1050 | " \"\"\"\n",
1051 | " Generates X,y pairs for churn related machine learning, with way less temporal data leaks to worry about. \n",
1052 | "\n",
1053 | " Arguments:\n",
1054 | "\n",
1055 | " - df: a Polars dataframe that contains logs over time for users\n",
1056 | " - user_id: the column name that depicts the user id\n",
1057 | " - feature_pipeline: a Polars compatible function that generatres ML features to go in `X`\n",
1058 | " - input_period: the number of days that the input period lasts\n",
1059 | " - checking_period: the number of days that the checking period lasts\n",
1060 | " - start_date: the start date for X,y-pair generation\n",
1061 | " - end_date: the end date for X,y-pair generation\n",
1062 | " - step: stepsize over time for new X,y-pairs. defaults to a month. \n",
1063 | " - time_col: column name that depicts the datetime stamp\n",
1064 | " \"\"\"\n",
1065 | " cutoff_start = pl.datetime_range(start_date, end_date, step, eager=True).alias(time_col)\n",
1066 | " min_date = df[time_col].min()\n",
1067 | " max_date = df[time_col].max()\n",
1068 | " \n",
1069 | " for start in cutoff_start.to_list():\n",
1070 | " info_period_start = start - timedelta(days=info_period)\n",
1071 | " checking_period_end = start + timedelta(days=checking_period)\n",
1072 | " if info_period_start < min_date:\n",
1073 | " continue\n",
1074 | " if checking_period_end > max_date:\n",
1075 | " continue\n",
1076 | " print(info_period_start, start, checking_period_end, min_date, max_date)\n",
1077 | " train_info = df.filter(pl.col(time_col) < start, pl.col(time_col) >= (start - timedelta(days=info_period)))\n",
1078 | " valid_info = df.filter(pl.col(time_col) >= start, pl.col(time_col) < (start + timedelta(days=checking_period)))\n",
1079 | " \n",
1080 | " \n",
1081 | " target = valid_info.select(\"player_id\").unique().with_columns(target=True)\n",
1082 | "\n",
1083 | " ml_df = (train_info\n",
1084 | " .pipe(feature_pipeline)\n",
1085 | " .join(target, on=user_id, how=\"left\")\n",
1086 | " .with_columns(target=pl.when(pl.col(\"target\")).then(True).otherwise(False)))\n",
1087 | " \n",
1088 | " X = ml_df.drop(\"target\", \"player_id\")\n",
1089 | " y = np.array(ml_df[\"target\"]).astype(int)\n",
1090 | " \n",
1091 | " yield X, y"
1092 | ]
1093 | },
1094 | {
1095 | "cell_type": "markdown",
1096 | "id": "e56c34d9-175b-4ac7-9cf5-daedcc8714e9",
1097 | "metadata": {},
1098 | "source": [
1099 | "Now, when we run the aggregation, we won't steal data from the future that we can't use"
1100 | ]
1101 | },
1102 | {
1103 | "cell_type": "code",
1104 | "execution_count": 14,
1105 | "id": "9e14641c-3ee3-4f8d-b3c7-581363db4207",
1106 | "metadata": {},
1107 | "outputs": [
1108 | {
1109 | "name": "stdout",
1110 | "output_type": "stream",
1111 | "text": [
1112 | "2006-09-03 00:00:00 2007-01-01 00:00:00 2007-05-01 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1113 | "shape: (5, 5)\n",
1114 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1115 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1116 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1117 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1118 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1119 | "│ 0.027518 ┆ 0.003589 ┆ 0.700935 ┆ 0.762815 ┆ 0.639476 │\n",
1120 | "│ 0.026381 ┆ 0.003645 ┆ 0.708333 ┆ 0.767123 ┆ 0.652586 │\n",
1121 | "│ 0.023318 ┆ 0.003554 ┆ 0.702882 ┆ 0.767075 ┆ 0.638019 │\n",
1122 | "│ 0.021533 ┆ 0.004087 ┆ 0.716511 ┆ 0.768719 ┆ 0.672489 │\n",
1123 | "│ 0.025888 ┆ 0.003689 ┆ 0.718458 ┆ 0.77892 ┆ 0.661572 │\n",
1124 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1125 | "2006-10-04 00:00:00 2007-02-01 00:00:00 2007-06-01 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1126 | "shape: (5, 5)\n",
1127 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1128 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1129 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1130 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1131 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1132 | "│ 0.022433 ┆ 0.003778 ┆ 0.716697 ┆ 0.776 ┆ 0.626705 │\n",
1133 | "│ 0.029459 ┆ 0.004338 ┆ 0.712922 ┆ 0.772809 ┆ 0.62069 │\n",
1134 | "│ 0.022286 ┆ 0.003783 ┆ 0.700073 ┆ 0.757386 ┆ 0.607759 │\n",
1135 | "│ 0.020075 ┆ 0.004238 ┆ 0.713656 ┆ 0.760204 ┆ 0.642241 │\n",
1136 | "│ 0.026213 ┆ 0.003821 ┆ 0.720631 ┆ 0.774109 ┆ 0.640086 │\n",
1137 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1138 | "2006-11-01 00:00:00 2007-03-01 00:00:00 2007-06-29 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1139 | "shape: (5, 5)\n",
1140 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1141 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1142 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1143 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1144 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1145 | "│ 0.02656 ┆ 0.003851 ┆ 0.728537 ┆ 0.7802 ┆ 0.614449 │\n",
1146 | "│ 0.031687 ┆ 0.003921 ┆ 0.71975 ┆ 0.768116 ┆ 0.606581 │\n",
1147 | "│ 0.034845 ┆ 0.003556 ┆ 0.720097 ┆ 0.772769 ┆ 0.600858 │\n",
1148 | "│ 0.027189 ┆ 0.004105 ┆ 0.727399 ┆ 0.766031 ┆ 0.632332 │\n",
1149 | "│ 0.033105 ┆ 0.003992 ┆ 0.718011 ┆ 0.762288 ┆ 0.610157 │\n",
1150 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1151 | "2006-12-02 00:00:00 2007-04-01 00:00:00 2007-07-30 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1152 | "shape: (5, 5)\n",
1153 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1154 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1155 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1156 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1157 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1158 | "│ 0.04597 ┆ 0.005289 ┆ 0.740741 ┆ 0.781631 ┆ 0.614591 │\n",
1159 | "│ 0.032287 ┆ 0.009541 ┆ 0.731046 ┆ 0.776062 ┆ 0.59292 │\n",
1160 | "│ 0.027807 ┆ 0.003864 ┆ 0.73036 ┆ 0.77113 ┆ 0.598379 │\n",
1161 | "│ 0.051411 ┆ 0.004183 ┆ 0.736535 ┆ 0.769936 ┆ 0.619013 │\n",
1162 | "│ 0.029864 ┆ 0.00384 ┆ 0.719039 ┆ 0.750933 ┆ 0.59322 │\n",
1163 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1164 | "2007-01-01 00:00:00 2007-05-01 00:00:00 2007-08-29 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1165 | "shape: (5, 5)\n",
1166 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1167 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1168 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1169 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1170 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1171 | "│ 0.080514 ┆ 0.005606 ┆ 0.737787 ┆ 0.752617 ┆ 0.469436 │\n",
1172 | "│ 0.036413 ┆ 0.005756 ┆ 0.735969 ┆ 0.743948 ┆ 0.473903 │\n",
1173 | "│ 0.035378 ┆ 0.004893 ┆ 0.737332 ┆ 0.747664 ┆ 0.474496 │\n",
1174 | "│ 0.036143 ┆ 0.005316 ┆ 0.731425 ┆ 0.731193 ┆ 0.472716 │\n",
1175 | "│ 0.038411 ┆ 0.004761 ┆ 0.74 ┆ 0.743913 ┆ 0.489614 │\n",
1176 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1177 | "2007-02-01 00:00:00 2007-06-01 00:00:00 2007-09-29 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1178 | "shape: (5, 5)\n",
1179 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1180 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1181 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1182 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1183 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1184 | "│ 0.043345 ┆ 0.0059 ┆ 0.748093 ┆ 0.73487 ┆ 0.4559 │\n",
1185 | "│ 0.090296 ┆ 0.005925 ┆ 0.749788 ┆ 0.735992 ┆ 0.461859 │\n",
1186 | "│ 0.050923 ┆ 0.005866 ┆ 0.750159 ┆ 0.740347 ┆ 0.457364 │\n",
1187 | "│ 0.059539 ┆ 0.005827 ┆ 0.753761 ┆ 0.729297 ┆ 0.488372 │\n",
1188 | "│ 0.094732 ┆ 0.008577 ┆ 0.751642 ┆ 0.737983 ┆ 0.466905 │\n",
1189 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1190 | "2007-03-03 00:00:00 2007-07-01 00:00:00 2007-10-29 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1191 | "shape: (5, 5)\n",
1192 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1193 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1194 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1195 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1196 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1197 | "│ 0.036745 ┆ 0.006585 ┆ 0.75852 ┆ 0.720574 ┆ 0.442681 │\n",
1198 | "│ 0.05794 ┆ 0.006206 ┆ 0.756767 ┆ 0.714015 ┆ 0.443269 │\n",
1199 | "│ 0.045632 ┆ 0.007245 ┆ 0.761636 ┆ 0.726066 ┆ 0.450323 │\n",
1200 | "│ 0.059822 ┆ 0.005823 ┆ 0.765336 ┆ 0.727523 ┆ 0.466196 │\n",
1201 | "│ 0.045603 ┆ 0.005517 ┆ 0.758862 ┆ 0.723671 ┆ 0.440329 │\n",
1202 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1203 | "2007-04-03 00:00:00 2007-08-01 00:00:00 2007-11-29 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1204 | "shape: (5, 5)\n",
1205 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1206 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1207 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1208 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1209 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1210 | "│ 0.051568 ┆ 0.006031 ┆ 0.770666 ┆ 0.724568 ┆ 0.444903 │\n",
1211 | "│ 0.051029 ┆ 0.005851 ┆ 0.780556 ┆ 0.741876 ┆ 0.470831 │\n",
1212 | "│ 0.036348 ┆ 0.006044 ┆ 0.76913 ┆ 0.72158 ┆ 0.441367 │\n",
1213 | "│ 0.06095 ┆ 0.006783 ┆ 0.763345 ┆ 0.699907 ┆ 0.442546 │\n",
1214 | "│ 0.04729 ┆ 0.005771 ┆ 0.774729 ┆ 0.728545 ┆ 0.460224 │\n",
1215 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1216 | "2007-05-04 00:00:00 2007-09-01 00:00:00 2007-12-30 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1217 | "shape: (5, 5)\n",
1218 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1219 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1220 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1221 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1222 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1223 | "│ 0.045014 ┆ 0.007876 ┆ 0.763756 ┆ 0.766059 ┆ 0.479304 │\n",
1224 | "│ 0.046918 ┆ 0.008443 ┆ 0.76843 ┆ 0.763952 ┆ 0.5009 │\n",
1225 | "│ 0.041252 ┆ 0.005527 ┆ 0.759295 ┆ 0.751887 ┆ 0.478104 │\n",
1226 | "│ 0.044612 ┆ 0.007882 ┆ 0.762856 ┆ 0.748871 ┆ 0.497301 │\n",
1227 | "│ 0.049095 ┆ 0.005729 ┆ 0.756481 ┆ 0.742777 ┆ 0.478104 │\n",
1228 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1229 | "2007-06-03 00:00:00 2007-10-01 00:00:00 2008-01-29 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1230 | "shape: (5, 5)\n",
1231 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1232 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1233 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1234 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1235 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1236 | "│ 0.046044 ┆ 0.005438 ┆ 0.761076 ┆ 0.771845 ┆ 0.483577 │\n",
1237 | "│ 0.053092 ┆ 0.005279 ┆ 0.751543 ┆ 0.742481 ┆ 0.480828 │\n",
1238 | "│ 0.092787 ┆ 0.011915 ┆ 0.763448 ┆ 0.760989 ┆ 0.505782 │\n",
1239 | "│ 0.071995 ┆ 0.005879 ┆ 0.759259 ┆ 0.749773 ┆ 0.503348 │\n",
1240 | "│ 0.045665 ┆ 0.005584 ┆ 0.752646 ┆ 0.743231 ┆ 0.48448 │\n",
1241 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1242 | "2007-07-04 00:00:00 2007-11-01 00:00:00 2008-02-29 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1243 | "shape: (5, 5)\n",
1244 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1245 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1246 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1247 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1248 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1249 | "│ 0.040068 ┆ 0.004758 ┆ 0.751118 ┆ 0.782359 ┆ 0.495747 │\n",
1250 | "│ 0.044423 ┆ 0.005457 ┆ 0.744996 ┆ 0.761374 ┆ 0.498177 │\n",
1251 | "│ 0.047837 ┆ 0.005099 ┆ 0.751118 ┆ 0.761566 ┆ 0.520365 │\n",
1252 | "│ 0.045359 ┆ 0.005647 ┆ 0.741229 ┆ 0.742021 ┆ 0.508815 │\n",
1253 | "│ 0.03557 ┆ 0.004908 ┆ 0.747587 ┆ 0.761167 ┆ 0.507599 │\n",
1254 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n",
1255 | "2007-08-03 00:00:00 2007-12-01 00:00:00 2008-03-30 00:00:00 2005-12-31 23:59:46 2009-01-10 05:08:59\n",
1256 | "shape: (5, 5)\n",
1257 | "┌──────────┬────────────┬───────────────┬────────────────┬─────────────┐\n",
1258 | "│ fit_time ┆ score_time ┆ test_accuracy ┆ test_precision ┆ test_recall │\n",
1259 | "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
1260 | "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
1261 | "╞══════════╪════════════╪═══════════════╪════════════════╪═════════════╡\n",
1262 | "│ 0.039326 ┆ 0.004498 ┆ 0.727571 ┆ 0.760512 ┆ 0.503937 │\n",
1263 | "│ 0.029854 ┆ 0.004492 ┆ 0.731603 ┆ 0.757923 ┆ 0.521502 │\n",
1264 | "│ 0.030006 ┆ 0.004545 ┆ 0.742692 ┆ 0.765599 ┆ 0.54997 │\n",
1265 | "│ 0.038146 ┆ 0.004963 ┆ 0.737837 ┆ 0.764298 ┆ 0.534545 │\n",
1266 | "│ 0.04888 ┆ 0.004644 ┆ 0.733552 ┆ 0.766607 ┆ 0.517262 │\n",
1267 | "└──────────┴────────────┴───────────────┴────────────────┴─────────────┘\n"
1268 | ]
1269 | }
1270 | ],
1271 | "source": [
1272 | "def feature_pipeline(dataf):\n",
1273 | " return (dataf\n",
1274 | " .group_by(\"player_id\")\n",
1275 | " .agg(\n",
1276 | " pl.col(\"race\").first(), \n",
1277 | " pl.col(\"class\").first(), \n",
1278 | " pl.col(\"level\").max(), \n",
1279 | " pl.len().alias(\"n_row\")))\n",
1280 | "\n",
1281 | "gen = churn_dataset_generator(df, user_id=\"player_id\", info_period=120, checking_period=120, feature_pipeline=feature_pipeline)\n",
1282 | "\n",
1283 | "for X, y in gen:\n",
1284 | " scorers = {\n",
1285 | " \"accuracy\": make_scorer(accuracy_score), \n",
1286 | " \"precision\": make_scorer(precision_score), \n",
1287 | " \"recall\": make_scorer(recall_score)\n",
1288 | " }\n",
1289 | " print(pl.DataFrame(cross_validate(pipe, X, y, cv=5, scoring=scorers)))"
1290 | ]
1291 | },
1292 | {
1293 | "cell_type": "code",
1294 | "execution_count": null,
1295 | "id": "2814d967-b0bc-4c42-8de4-530f52acb496",
1296 | "metadata": {},
1297 | "outputs": [],
1298 | "source": []
1299 | }
1300 | ],
1301 | "metadata": {
1302 | "kernelspec": {
1303 | "display_name": "Python 3 (ipykernel)",
1304 | "language": "python",
1305 | "name": "python3"
1306 | },
1307 | "language_info": {
1308 | "codemirror_mode": {
1309 | "name": "ipython",
1310 | "version": 3
1311 | },
1312 | "file_extension": ".py",
1313 | "mimetype": "text/x-python",
1314 | "name": "python",
1315 | "nbconvert_exporter": "python",
1316 | "pygments_lexer": "ipython3",
1317 | "version": "3.11.5"
1318 | }
1319 | },
1320 | "nbformat": 4,
1321 | "nbformat_minor": 5
1322 | }
1323 |
--------------------------------------------------------------------------------