├── .gitignore
├── BUILDING.md
├── LICENSE
├── Makefile
├── README.md
├── assets
    ├── auto-render.min.js
    ├── fonts
    │   ├── KaTeX_AMS-Regular.eot
    │   ├── KaTeX_AMS-Regular.ttf
    │   ├── KaTeX_AMS-Regular.woff
    │   ├── KaTeX_AMS-Regular.woff2
    │   ├── KaTeX_Caligraphic-Bold.eot
    │   ├── KaTeX_Caligraphic-Bold.ttf
    │   ├── KaTeX_Caligraphic-Bold.woff
    │   ├── KaTeX_Caligraphic-Bold.woff2
    │   ├── KaTeX_Caligraphic-Regular.eot
    │   ├── KaTeX_Caligraphic-Regular.ttf
    │   ├── KaTeX_Caligraphic-Regular.woff
    │   ├── KaTeX_Caligraphic-Regular.woff2
    │   ├── KaTeX_Fraktur-Bold.eot
    │   ├── KaTeX_Fraktur-Bold.ttf
    │   ├── KaTeX_Fraktur-Bold.woff
    │   ├── KaTeX_Fraktur-Bold.woff2
    │   ├── KaTeX_Fraktur-Regular.eot
    │   ├── KaTeX_Fraktur-Regular.ttf
    │   ├── KaTeX_Fraktur-Regular.woff
    │   ├── KaTeX_Fraktur-Regular.woff2
    │   ├── KaTeX_Main-Bold.eot
    │   ├── KaTeX_Main-Bold.ttf
    │   ├── KaTeX_Main-Bold.woff
    │   ├── KaTeX_Main-Bold.woff2
    │   ├── KaTeX_Main-Italic.eot
    │   ├── KaTeX_Main-Italic.ttf
    │   ├── KaTeX_Main-Italic.woff
    │   ├── KaTeX_Main-Italic.woff2
    │   ├── KaTeX_Main-Regular.eot
    │   ├── KaTeX_Main-Regular.ttf
    │   ├── KaTeX_Main-Regular.woff
    │   ├── KaTeX_Main-Regular.woff2
    │   ├── KaTeX_Math-BoldItalic.eot
    │   ├── KaTeX_Math-BoldItalic.ttf
    │   ├── KaTeX_Math-BoldItalic.woff
    │   ├── KaTeX_Math-BoldItalic.woff2
    │   ├── KaTeX_Math-Italic.eot
    │   ├── KaTeX_Math-Italic.ttf
    │   ├── KaTeX_Math-Italic.woff
    │   ├── KaTeX_Math-Italic.woff2
    │   ├── KaTeX_Math-Regular.eot
    │   ├── KaTeX_Math-Regular.ttf
    │   ├── KaTeX_Math-Regular.woff
    │   ├── KaTeX_Math-Regular.woff2
    │   ├── KaTeX_SansSerif-Bold.eot
    │   ├── KaTeX_SansSerif-Bold.ttf
    │   ├── KaTeX_SansSerif-Bold.woff
    │   ├── KaTeX_SansSerif-Bold.woff2
    │   ├── KaTeX_SansSerif-Italic.eot
    │   ├── KaTeX_SansSerif-Italic.ttf
    │   ├── KaTeX_SansSerif-Italic.woff
    │   ├── KaTeX_SansSerif-Italic.woff2
    │   ├── KaTeX_SansSerif-Regular.eot
    │   ├── KaTeX_SansSerif-Regular.ttf
    │   ├── KaTeX_SansSerif-Regular.woff
    │   ├── KaTeX_SansSerif-Regular.woff2
    │   ├── KaTeX_Script-Regular.eot
    │   ├── KaTeX_Script-Regular.ttf
    │   ├── KaTeX_Script-Regular.woff
    │   ├── KaTeX_Script-Regular.woff2
    │   ├── KaTeX_Size1-Regular.eot
    │   ├── KaTeX_Size1-Regular.ttf
    │   ├── KaTeX_Size1-Regular.woff
    │   ├── KaTeX_Size1-Regular.woff2
    │   ├── KaTeX_Size2-Regular.eot
    │   ├── KaTeX_Size2-Regular.ttf
    │   ├── KaTeX_Size2-Regular.woff
    │   ├── KaTeX_Size2-Regular.woff2
    │   ├── KaTeX_Size3-Regular.eot
    │   ├── KaTeX_Size3-Regular.ttf
    │   ├── KaTeX_Size3-Regular.woff
    │   ├── KaTeX_Size3-Regular.woff2
    │   ├── KaTeX_Size4-Regular.eot
    │   ├── KaTeX_Size4-Regular.ttf
    │   ├── KaTeX_Size4-Regular.woff
    │   ├── KaTeX_Size4-Regular.woff2
    │   ├── KaTeX_Typewriter-Regular.eot
    │   ├── KaTeX_Typewriter-Regular.ttf
    │   ├── KaTeX_Typewriter-Regular.woff
    │   └── KaTeX_Typewriter-Regular.woff2
    ├── github.svg
    ├── index.html.jinja
    ├── katex.min.css
    ├── katex.min.js
    ├── remark.min.js
    ├── requirements-slides.txt
    ├── style.css
    └── twitter.svg
├── environment.yml
├── images
    ├── .gitkeep
    ├── bag_of_words.png
    ├── calibration-regression.svg
    ├── calibration_regression.png
    ├── countvectorizer-ngrams.png
    ├── countvectorizer-ngrams.svg
    ├── countvectorizer.png
    ├── countvectorizer.svg
    ├── favicon_org.png
    ├── glm_unit_deviance.png
    ├── pipeline.svg
    ├── poisson_gamma_tweedie_distributions.png
    ├── resampling_approches.png
    ├── scikit-learn-logo-notext.png
    └── single_words.png
├── index.html
├── maint_tools
    └── check_notebooks.sh
├── make.py
├── notebooks
    ├── .gitkeep
    ├── 01-text-data.ipynb
    ├── 02-imbalanced-data.ipynb
    ├── 03-poisson-regression.ipynb
    ├── data
    │   ├── .gitkeep
    │   ├── claims.csv
    │   ├── london_bikes.csv
    │   └── review_polarity.tar.gz
    ├── images
    │   ├── kfold_cv.svg
    │   └── smote_generated.png
    └── solutions
    │   ├── 01-ex01-solutions.py
    │   ├── 01-ex02-solutions.py
    │   ├── 01-ex03-solutions.py
    │   ├── 02-ex01-solutions.py
    │   ├── 02-ex02-solutions.py
    │   ├── 02-ex03-solutions.py
    │   ├── 03-ex01-solutions.py
    │   ├── 03-ex02-solutions.py
    │   └── 03-ex03-solutions.py
├── requirements.txt
└── slides.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | notebooks/data/train
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | cover/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | .pybuilder/
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | #   For a library or package, you might want to ignore these files since the code is
 89 | #   intended to run in multiple environments; otherwise, check them in:
 90 | # .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 | 
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 | 
106 | # SageMath parsed files
107 | *.sage.py
108 | 
109 | # Environments
110 | .env
111 | .venv
112 | env/
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 | 
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 | 
122 | # Rope project settings
123 | .ropeproject
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 | 
133 | # Pyre type checker
134 | .pyre/
135 | 
136 | # pytype static type analyzer
137 | .pytype/
138 | 
139 | # Cython debug symbols
140 | cython_debug/
141 | 
142 | notebooks/data/txt_sentoken
143 | 


--------------------------------------------------------------------------------
/BUILDING.md:
--------------------------------------------------------------------------------
 1 | # FAQ
 2 | 
 3 | ## How do I build slides?
 4 | 
 5 | Install the dependencies: `pip install -r assets/requirements-slides.txt`.
 6 | 
 7 | ```py
 8 | python make.py build
 9 | ```
10 | 
11 | Remember to rebuild when `slides.md` get updated.
12 | 
13 | ## How do I develop and live reload?
14 | 
15 | ```py
16 | python make.py live
17 | ```
18 | 
19 | ## How to host on github pages?
20 | 
21 | 1. Go to settings.
22 | 2. Enable GitHub Pages.
23 | 
24 | ## How to change my favicon?
25 | 
26 | Replace favicon with something else
27 | 
28 | ## How to save as pdf?
29 | 
30 | 1. Install decktape
31 | 
32 | ```bash
33 | npm install -g decktape
34 | ```
35 | 
36 | 2. Run decktape
37 | 
38 | ```bash
39 | decktape "http://localhost:5500" slides.pdf
40 | ```
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Thomas Fan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean check
2 | 
3 | clean:
4 | 	jupyter nbconvert --clear-output --inplace notebooks/0*.ipynb
5 | 
6 | check:
7 | 	bash maint_tools/check_notebooks.sh
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Advanced Machine Learning with scikit-learn
 2 | ### Text Data, Imbalanced Data, and Poisson Regression
 3 | 
 4 | *By Thomas J. Fan*
 5 | 
 6 | [Link to slides](https://thomasjpfan.github.io/ml-workshop-advanced/)
 7 | 
 8 | Scikit-learn is a Python machine learning library used by data science practitioners from many disciplines. During this training, we will learn about processing text data, working with imbalanced data, and Poisson regression. We will start by processing text data with scikit-learn's vectorizers. Since the output of these vectorizers is sparse, we will also review scikit-learn estimators that can handle sparse data. We will look at estimators with class weights, resampling techniques provided by imbalanced-learn, and using a bagging classifier with balancing. Next, we will explore how to work with imbalanced data where one of the classes appears more frequently than the others. Finally, we will learn about generalized linear models focusing on Poisson regression. Poisson regression models target distributions that are counts or relative frequencies. We will use tree-based models such as Histogram-based Gradient Boosted Trees with a Poisson loss to model relative frequencies.
 9 | 
10 | ## Obtaining the Material
11 | 
12 | ### With git
13 | 
14 | The most convenient way to download the material is with git:
15 | 
16 | ```bash
17 | git clone https://github.com/thomasjpfan/ml-workshop-advanced
18 | ```
19 | 
20 | Please note that I may add and improve the material until shortly before the session. You can update your copy by running:
21 | 
22 | ```bash
23 | git pull origin master
24 | ```
25 | 
26 | ### Download zip
27 | 
28 | If you are not familiar with git, you can download this repository as a zip file at: [github.com/thomasjpfan/ml-workshop-advanced/archive/master.zip](https://github.com/thomasjpfan/ml-workshop-advanced/archive/master.zip). Please note that I may add and improve the material until shortly before the session. To update your copy please re-download the material a day before the session.
29 | 
30 | ## Running the notebooks
31 | 
32 | ### Local Installation
33 | 
34 | Local installation requires `conda` to be installed on your machine. The simplest way to install `conda` is to install `miniconda` by using an installer for your operating system provided at [docs.conda.io/en/latest/miniconda.html](https://docs.conda.io/en/latest/miniconda.html). After `conda` is installed, navigate to this repository on your local machine:
35 | 
36 | ```bash
37 | cd ml-workshop-advanced
38 | ```
39 | 
40 | Then download and install the dependencies:
41 | 
42 | ```bash
43 | conda env create -f environment.yml
44 | ```
45 | 
46 | This will create a virtual environment named `ml-workshop-advanced`. To activate this environment:
47 | 
48 | ```bash
49 | conda activate ml-workshop-advanced
50 | ```
51 | 
52 | Finally, to start `jupyterlab` run:
53 | 
54 | ```bash
55 | jupyter lab
56 | ```
57 | 
58 | This should open a browser window with the `jupterlab` interface.
59 | 
60 | ### Run with Google's Colab
61 | 
62 | If you have any issues with installing `conda` or running `jupyter` on your local computer, then you can run the notebooks on Google's Colab:
63 | 
64 | 1. [Text Data](https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/01-text-data.ipynb)
65 | 2. [Imbalanced data](https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/02-imbalanced-data.ipynb)
66 | 3. [Poisson regression](https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/03-poisson-regression.ipynb)
67 | 
68 | ## License
69 | 
70 | This repo is under the [MIT License](LICENSE).
71 | 


--------------------------------------------------------------------------------
/assets/auto-render.min.js:
--------------------------------------------------------------------------------
1 | (function(e){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=e()}else if(typeof define==="function"&&define.amd){define([],e)}else{var t;if(typeof window!=="undefined"){t=window}else if(typeof global!=="undefined"){t=global}else if(typeof self!=="undefined"){t=self}else{t=this}t.renderMathInElement=e()}})(function(){var e,t,n;return function e(t,n,r){function o(c,a){if(!n[c]){if(!t[c]){var u=typeof require=="function"&&require;if(!a&&u)return u(c,!0);if(i)return i(c,!0);var f=new Error("Cannot find module '"+c+"'");throw f.code="MODULE_NOT_FOUND",f}var s=n[c]={exports:{}};t[c][0].call(s.exports,function(e){var n=t[c][1][e];return o(n?n:e)},s,s.exports,e,t,n,r)}return n[c].exports}var i=typeof require=="function"&&require;for(var c=0;c<r.length;c++)o(r[c]);return o}({1:[function(e,t,n){"use strict";var r=e("babel-runtime/core-js/object/assign");var o=i(r);function i(e){return e&&e.__esModule?e:{default:e}}var c=e("./splitAtDelimiters");var a=function e(t,n){var r=[{type:"text",data:t}];for(var o=0;o<n.length;o++){var i=n[o];r=c(r,i.left,i.right,i.display||false)}return r};var u=function e(t,n){var r=a(t,n.delimiters);var o=document.createDocumentFragment();for(var i=0;i<r.length;i++){if(r[i].type==="text"){o.appendChild(document.createTextNode(r[i].data))}else{var c=document.createElement("span");var u=r[i].data;n.displayMode=r[i].display;try{katex.render(u,c,n)}catch(e){if(!(e instanceof katex.ParseError)){throw e}n.errorCallback("KaTeX auto-render: Failed to parse `"+r[i].data+"` with ",e);o.appendChild(document.createTextNode(r[i].rawData));continue}o.appendChild(c)}}return o};var f=function e(t,n){for(var r=0;r<t.childNodes.length;r++){var o=t.childNodes[r];if(o.nodeType===3){var i=u(o.textContent,n);r+=i.childNodes.length-1;t.replaceChild(i,o)}else if(o.nodeType===1){var c=n.ignoredTags.indexOf(o.nodeName.toLowerCase())===-1;if(c){e(o,n)}}}};var s={delimiters:[{left:"$$",right:"$$",display:true},{left:"\\[",right:"\\]",display:true},{left:"\\(",right:"\\)",display:false}],ignoredTags:["script","noscript","style","textarea","pre","code"],errorCallback:function e(t,n){console.error(t,n)}};var l=function e(t,n){if(!t){throw new Error("No element provided to render")}var r=(0,o.default)({},s,n);f(t,r)};t.exports=l},{"./splitAtDelimiters":2,"babel-runtime/core-js/object/assign":3}],2:[function(e,t,n){"use strict";var r=function e(t,n,r){var o=r;var i=0;var c=t.length;while(o<n.length){var a=n[o];if(i<=0&&n.slice(o,o+c)===t){return o}else if(a==="\\"){o++}else if(a==="{"){i++}else if(a==="}"){i--}o++}return-1};var o=function e(t,n,o,i){var c=[];for(var a=0;a<t.length;a++){if(t[a].type==="text"){var u=t[a].data;var f=true;var s=0;var l=void 0;l=u.indexOf(n);if(l!==-1){s=l;c.push({type:"text",data:u.slice(0,s)});f=false}while(true){if(f){l=u.indexOf(n,s);if(l===-1){break}c.push({type:"text",data:u.slice(s,l)});s=l}else{l=r(o,u,s+n.length);if(l===-1){break}c.push({type:"math",data:u.slice(s+n.length,l),rawData:u.slice(s,l+o.length),display:i});s=l+o.length}f=!f}c.push({type:"text",data:u.slice(s)})}else{c.push(t[a])}}return c};t.exports=o},{}],3:[function(e,t,n){t.exports={default:e("core-js/library/fn/object/assign"),__esModule:true}},{"core-js/library/fn/object/assign":4}],4:[function(e,t,n){e("../../modules/es6.object.assign");t.exports=e("../../modules/_core").Object.assign},{"../../modules/_core":9,"../../modules/es6.object.assign":39}],5:[function(e,t,n){t.exports=function(e){if(typeof e!="function")throw TypeError(e+" is not a function!");return e}},{}],6:[function(e,t,n){var r=e("./_is-object");t.exports=function(e){if(!r(e))throw TypeError(e+" is not an object!");return e}},{"./_is-object":22}],7:[function(e,t,n){var r=e("./_to-iobject"),o=e("./_to-length"),i=e("./_to-index");t.exports=function(e){return function(t,n,c){var a=r(t),u=o(a.length),f=i(c,u),s;if(e&&n!=n)while(u>f){s=a[f++];if(s!=s)return true}else for(;u>f;f++)if(e||f in a){if(a[f]===n)return e||f||0}return!e&&-1}}},{"./_to-index":32,"./_to-iobject":34,"./_to-length":35}],8:[function(e,t,n){var r={}.toString;t.exports=function(e){return r.call(e).slice(8,-1)}},{}],9:[function(e,t,n){var r=t.exports={version:"2.4.0"};if(typeof __e=="number")__e=r},{}],10:[function(e,t,n){var r=e("./_a-function");t.exports=function(e,t,n){r(e);if(t===undefined)return e;switch(n){case 1:return function(n){return e.call(t,n)};case 2:return function(n,r){return e.call(t,n,r)};case 3:return function(n,r,o){return e.call(t,n,r,o)}}return function(){return e.apply(t,arguments)}}},{"./_a-function":5}],11:[function(e,t,n){t.exports=function(e){if(e==undefined)throw TypeError("Can't call method on  "+e);return e}},{}],12:[function(e,t,n){t.exports=!e("./_fails")(function(){return Object.defineProperty({},"a",{get:function(){return 7}}).a!=7})},{"./_fails":16}],13:[function(e,t,n){var r=e("./_is-object"),o=e("./_global").document,i=r(o)&&r(o.createElement);t.exports=function(e){return i?o.createElement(e):{}}},{"./_global":17,"./_is-object":22}],14:[function(e,t,n){t.exports="constructor,hasOwnProperty,isPrototypeOf,propertyIsEnumerable,toLocaleString,toString,valueOf".split(",")},{}],15:[function(e,t,n){var r=e("./_global"),o=e("./_core"),i=e("./_ctx"),c=e("./_hide"),a="prototype";var u=function(e,t,n){var f=e&u.F,s=e&u.G,l=e&u.S,p=e&u.P,d=e&u.B,_=e&u.W,v=s?o:o[t]||(o[t]={}),h=v[a],b=s?r:l?r[t]:(r[t]||{})[a],y,g,x;if(s)n=t;for(y in n){g=!f&&b&&b[y]!==undefined;if(g&&y in v)continue;x=g?b[y]:n[y];v[y]=s&&typeof b[y]!="function"?n[y]:d&&g?i(x,r):_&&b[y]==x?function(e){var t=function(t,n,r){if(this instanceof e){switch(arguments.length){case 0:return new e;case 1:return new e(t);case 2:return new e(t,n)}return new e(t,n,r)}return e.apply(this,arguments)};t[a]=e[a];return t}(x):p&&typeof x=="function"?i(Function.call,x):x;if(p){(v.virtual||(v.virtual={}))[y]=x;if(e&u.R&&h&&!h[y])c(h,y,x)}}};u.F=1;u.G=2;u.S=4;u.P=8;u.B=16;u.W=32;u.U=64;u.R=128;t.exports=u},{"./_core":9,"./_ctx":10,"./_global":17,"./_hide":19}],16:[function(e,t,n){t.exports=function(e){try{return!!e()}catch(e){return true}}},{}],17:[function(e,t,n){var r=t.exports=typeof window!="undefined"&&window.Math==Math?window:typeof self!="undefined"&&self.Math==Math?self:Function("return this")();if(typeof __g=="number")__g=r},{}],18:[function(e,t,n){var r={}.hasOwnProperty;t.exports=function(e,t){return r.call(e,t)}},{}],19:[function(e,t,n){var r=e("./_object-dp"),o=e("./_property-desc");t.exports=e("./_descriptors")?function(e,t,n){return r.f(e,t,o(1,n))}:function(e,t,n){e[t]=n;return e}},{"./_descriptors":12,"./_object-dp":24,"./_property-desc":29}],20:[function(e,t,n){t.exports=!e("./_descriptors")&&!e("./_fails")(function(){return Object.defineProperty(e("./_dom-create")("div"),"a",{get:function(){return 7}}).a!=7})},{"./_descriptors":12,"./_dom-create":13,"./_fails":16}],21:[function(e,t,n){var r=e("./_cof");t.exports=Object("z").propertyIsEnumerable(0)?Object:function(e){return r(e)=="String"?e.split(""):Object(e)}},{"./_cof":8}],22:[function(e,t,n){t.exports=function(e){return typeof e==="object"?e!==null:typeof e==="function"}},{}],23:[function(e,t,n){"use strict";var r=e("./_object-keys"),o=e("./_object-gops"),i=e("./_object-pie"),c=e("./_to-object"),a=e("./_iobject"),u=Object.assign;t.exports=!u||e("./_fails")(function(){var e={},t={},n=Symbol(),r="abcdefghijklmnopqrst";e[n]=7;r.split("").forEach(function(e){t[e]=e});return u({},e)[n]!=7||Object.keys(u({},t)).join("")!=r})?function e(t,n){var u=c(t),f=arguments.length,s=1,l=o.f,p=i.f;while(f>s){var d=a(arguments[s++]),_=l?r(d).concat(l(d)):r(d),v=_.length,h=0,b;while(v>h)if(p.call(d,b=_[h++]))u[b]=d[b]}return u}:u},{"./_fails":16,"./_iobject":21,"./_object-gops":25,"./_object-keys":27,"./_object-pie":28,"./_to-object":36}],24:[function(e,t,n){var r=e("./_an-object"),o=e("./_ie8-dom-define"),i=e("./_to-primitive"),c=Object.defineProperty;n.f=e("./_descriptors")?Object.defineProperty:function e(t,n,a){r(t);n=i(n,true);r(a);if(o)try{return c(t,n,a)}catch(e){}if("get"in a||"set"in a)throw TypeError("Accessors not supported!");if("value"in a)t[n]=a.value;return t}},{"./_an-object":6,"./_descriptors":12,"./_ie8-dom-define":20,"./_to-primitive":37}],25:[function(e,t,n){n.f=Object.getOwnPropertySymbols},{}],26:[function(e,t,n){var r=e("./_has"),o=e("./_to-iobject"),i=e("./_array-includes")(false),c=e("./_shared-key")("IE_PROTO");t.exports=function(e,t){var n=o(e),a=0,u=[],f;for(f in n)if(f!=c)r(n,f)&&u.push(f);while(t.length>a)if(r(n,f=t[a++])){~i(u,f)||u.push(f)}return u}},{"./_array-includes":7,"./_has":18,"./_shared-key":30,"./_to-iobject":34}],27:[function(e,t,n){var r=e("./_object-keys-internal"),o=e("./_enum-bug-keys");t.exports=Object.keys||function e(t){return r(t,o)}},{"./_enum-bug-keys":14,"./_object-keys-internal":26}],28:[function(e,t,n){n.f={}.propertyIsEnumerable},{}],29:[function(e,t,n){t.exports=function(e,t){return{enumerable:!(e&1),configurable:!(e&2),writable:!(e&4),value:t}}},{}],30:[function(e,t,n){var r=e("./_shared")("keys"),o=e("./_uid");t.exports=function(e){return r[e]||(r[e]=o(e))}},{"./_shared":31,"./_uid":38}],31:[function(e,t,n){var r=e("./_global"),o="__core-js_shared__",i=r[o]||(r[o]={});t.exports=function(e){return i[e]||(i[e]={})}},{"./_global":17}],32:[function(e,t,n){var r=e("./_to-integer"),o=Math.max,i=Math.min;t.exports=function(e,t){e=r(e);return e<0?o(e+t,0):i(e,t)}},{"./_to-integer":33}],33:[function(e,t,n){var r=Math.ceil,o=Math.floor;t.exports=function(e){return isNaN(e=+e)?0:(e>0?o:r)(e)}},{}],34:[function(e,t,n){var r=e("./_iobject"),o=e("./_defined");t.exports=function(e){return r(o(e))}},{"./_defined":11,"./_iobject":21}],35:[function(e,t,n){var r=e("./_to-integer"),o=Math.min;t.exports=function(e){return e>0?o(r(e),9007199254740991):0}},{"./_to-integer":33}],36:[function(e,t,n){var r=e("./_defined");t.exports=function(e){return Object(r(e))}},{"./_defined":11}],37:[function(e,t,n){var r=e("./_is-object");t.exports=function(e,t){if(!r(e))return e;var n,o;if(t&&typeof(n=e.toString)=="function"&&!r(o=n.call(e)))return o;if(typeof(n=e.valueOf)=="function"&&!r(o=n.call(e)))return o;if(!t&&typeof(n=e.toString)=="function"&&!r(o=n.call(e)))return o;throw TypeError("Can't convert object to primitive value")}},{"./_is-object":22}],38:[function(e,t,n){var r=0,o=Math.random();t.exports=function(e){return"Symbol(".concat(e===undefined?"":e,")_",(++r+o).toString(36))}},{}],39:[function(e,t,n){var r=e("./_export");r(r.S+r.F,"Object",{assign:e("./_object-assign")})},{"./_export":15,"./_object-assign":23}]},{},[1])(1)});
2 | 


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_AMS-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_AMS-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_AMS-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_AMS-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Caligraphic-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Fraktur-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Main-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-BoldItalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-BoldItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-BoldItalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-BoldItalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Math-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_SansSerif-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Script-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Script-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Script-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Script-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size1-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size1-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size1-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size1-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size2-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size2-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size2-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size2-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size3-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size3-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size3-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size3-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size4-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size4-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size4-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Size4-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.woff2


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Typewriter-Regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.eot


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Typewriter-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.ttf


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Typewriter-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.woff


--------------------------------------------------------------------------------
/assets/fonts/KaTeX_Typewriter-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.woff2


--------------------------------------------------------------------------------
/assets/github.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg width="100%" height="100%" viewBox="0 0 25 24" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
4 |     <g transform="matrix(1,0,0,1,0.0191268,0.228032)">
5 |         <path d="M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z" style="fill-rule:nonzero;"/>
6 |     </g>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/assets/index.html.jinja:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |   <title>{{ title }}</title>
 6 |   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
 7 |   <link rel="stylesheet" href="./assets/style.css">
 8 |   <link rel="shortcut icon" type="image/x-icon" href="favicon.ico">
 9 |   {% if use_katex %}
10 |   <link rel="stylesheet" href="./assets/katex.min.css">
11 |   <script src="./assets/katex.min.js"></script>
12 |   <script src="./assets/auto-render.min.js"></script>
13 |   {% endif %}
14 | </head>
15 | 
16 | <body>
17 |   <textarea id="source">{{ slides }}</textarea>
18 |   <script src="./assets/remark.min.js"></script>
19 |   <script>
20 |     var options = {
21 |       ratio: '16:9',
22 |       highlightLanguage: "python",
23 |       highlightStyle: "github",
24 |       highlightLines: true,
25 |       highlightSpans: true,
26 |       slideNumberFormat: '',
27 |     };
28 |     remark.macros['scale'] = function (percentage) {
29 |         var url = this;
30 |         return '<img src="' + url + '" style="width: ' + percentage + '" />';
31 |     };
32 |     {% if use_katex %}
33 |     var renderMath = function () {
34 |       renderMathInElement(document.body, {
35 |         delimiters: [
36 |           { left: "$$", right: "$$", display: true },
37 |           { left: "$", right: "$", display: false },
38 |           { left: "\\[", right: "\\]", display: true },
39 |           { left: "\\(", right: "\\)", display: false },
40 |         ]
41 |       });
42 |     }
43 |     var slideshow = remark.create(options, renderMath);
44 |     {% else %}
45 |     var slideshow = remark.create(options);
46 |     {% endif %}
47 |   </script>
48 | </body>
49 | 
50 | </html>
51 | 


--------------------------------------------------------------------------------
/assets/katex.min.css:
--------------------------------------------------------------------------------
1 | @font-face{font-family:KaTeX_AMS;src:url(fonts/KaTeX_AMS-Regular.woff2) format("woff2"),url(fonts/KaTeX_AMS-Regular.woff) format("woff"),url(fonts/KaTeX_AMS-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Caligraphic;src:url(fonts/KaTeX_Caligraphic-Bold.woff2) format("woff2"),url(fonts/KaTeX_Caligraphic-Bold.woff) format("woff"),url(fonts/KaTeX_Caligraphic-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Caligraphic;src:url(fonts/KaTeX_Caligraphic-Regular.woff2) format("woff2"),url(fonts/KaTeX_Caligraphic-Regular.woff) format("woff"),url(fonts/KaTeX_Caligraphic-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Fraktur;src:url(fonts/KaTeX_Fraktur-Bold.woff2) format("woff2"),url(fonts/KaTeX_Fraktur-Bold.woff) format("woff"),url(fonts/KaTeX_Fraktur-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Fraktur;src:url(fonts/KaTeX_Fraktur-Regular.woff2) format("woff2"),url(fonts/KaTeX_Fraktur-Regular.woff) format("woff"),url(fonts/KaTeX_Fraktur-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Bold.woff2) format("woff2"),url(fonts/KaTeX_Main-Bold.woff) format("woff"),url(fonts/KaTeX_Main-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-BoldItalic.woff2) format("woff2"),url(fonts/KaTeX_Main-BoldItalic.woff) format("woff"),url(fonts/KaTeX_Main-BoldItalic.ttf) format("truetype");font-weight:700;font-style:italic}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Italic.woff2) format("woff2"),url(fonts/KaTeX_Main-Italic.woff) format("woff"),url(fonts/KaTeX_Main-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Regular.woff2) format("woff2"),url(fonts/KaTeX_Main-Regular.woff) format("woff"),url(fonts/KaTeX_Main-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Math;src:url(fonts/KaTeX_Math-BoldItalic.woff2) format("woff2"),url(fonts/KaTeX_Math-BoldItalic.woff) format("woff"),url(fonts/KaTeX_Math-BoldItalic.ttf) format("truetype");font-weight:700;font-style:italic}@font-face{font-family:KaTeX_Math;src:url(fonts/KaTeX_Math-Italic.woff2) format("woff2"),url(fonts/KaTeX_Math-Italic.woff) format("woff"),url(fonts/KaTeX_Math-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Bold.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Bold.woff) format("woff"),url(fonts/KaTeX_SansSerif-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Italic.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Italic.woff) format("woff"),url(fonts/KaTeX_SansSerif-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Regular.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Regular.woff) format("woff"),url(fonts/KaTeX_SansSerif-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Script;src:url(fonts/KaTeX_Script-Regular.woff2) format("woff2"),url(fonts/KaTeX_Script-Regular.woff) format("woff"),url(fonts/KaTeX_Script-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size1;src:url(fonts/KaTeX_Size1-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size1-Regular.woff) format("woff"),url(fonts/KaTeX_Size1-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size2;src:url(fonts/KaTeX_Size2-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size2-Regular.woff) format("woff"),url(fonts/KaTeX_Size2-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size3;src:url(fonts/KaTeX_Size3-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size3-Regular.woff) format("woff"),url(fonts/KaTeX_Size3-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size4;src:url(fonts/KaTeX_Size4-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size4-Regular.woff) format("woff"),url(fonts/KaTeX_Size4-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Typewriter;src:url(fonts/KaTeX_Typewriter-Regular.woff2) format("woff2"),url(fonts/KaTeX_Typewriter-Regular.woff) format("woff"),url(fonts/KaTeX_Typewriter-Regular.ttf) format("truetype");font-weight:400;font-style:normal}.katex{font:normal 1.21em KaTeX_Main,Times New Roman,serif;line-height:1.2;text-indent:0;text-rendering:auto}.katex *{-ms-high-contrast-adjust:none!important}.katex .katex-version:after{content:"0.11.1"}.katex .katex-mathml{position:absolute;clip:rect(1px,1px,1px,1px);padding:0;border:0;height:1px;width:1px;overflow:hidden}.katex .katex-html>.newline{display:block}.katex .base{position:relative;white-space:nowrap;width:min-content}.katex .base,.katex .strut{display:inline-block}.katex .textbf{font-weight:700}.katex .textit{font-style:italic}.katex .textrm{font-family:KaTeX_Main}.katex .textsf{font-family:KaTeX_SansSerif}.katex .texttt{font-family:KaTeX_Typewriter}.katex .mathdefault{font-family:KaTeX_Math;font-style:italic}.katex .mathit{font-family:KaTeX_Main;font-style:italic}.katex .mathrm{font-style:normal}.katex .mathbf{font-family:KaTeX_Main;font-weight:700}.katex .boldsymbol{font-family:KaTeX_Math;font-weight:700;font-style:italic}.katex .amsrm,.katex .mathbb,.katex .textbb{font-family:KaTeX_AMS}.katex .mathcal{font-family:KaTeX_Caligraphic}.katex .mathfrak,.katex .textfrak{font-family:KaTeX_Fraktur}.katex .mathtt{font-family:KaTeX_Typewriter}.katex .mathscr,.katex .textscr{font-family:KaTeX_Script}.katex .mathsf,.katex .textsf{font-family:KaTeX_SansSerif}.katex .mathboldsf,.katex .textboldsf{font-family:KaTeX_SansSerif;font-weight:700}.katex .mathitsf,.katex .textitsf{font-family:KaTeX_SansSerif;font-style:italic}.katex .mainrm{font-family:KaTeX_Main;font-style:normal}.katex .vlist-t{display:inline-table;table-layout:fixed}.katex .vlist-r{display:table-row}.katex .vlist{display:table-cell;vertical-align:bottom;position:relative}.katex .vlist>span{display:block;height:0;position:relative}.katex .vlist>span>span{display:inline-block}.katex .vlist>span>.pstrut{overflow:hidden;width:0}.katex .vlist-t2{margin-right:-2px}.katex .vlist-s{display:table-cell;vertical-align:bottom;font-size:1px;width:2px;min-width:2px}.katex .msupsub{text-align:left}.katex .mfrac>span>span{text-align:center}.katex .mfrac .frac-line{display:inline-block;width:100%;border-bottom-style:solid}.katex .hdashline,.katex .hline,.katex .mfrac .frac-line,.katex .overline .overline-line,.katex .rule,.katex .underline .underline-line{min-height:1px}.katex .mspace{display:inline-block}.katex .clap,.katex .llap,.katex .rlap{width:0;position:relative}.katex .clap>.inner,.katex .llap>.inner,.katex .rlap>.inner{position:absolute}.katex .clap>.fix,.katex .llap>.fix,.katex .rlap>.fix{display:inline-block}.katex .llap>.inner{right:0}.katex .clap>.inner,.katex .rlap>.inner{left:0}.katex .clap>.inner>span{margin-left:-50%;margin-right:50%}.katex .rule{display:inline-block;border:0 solid;position:relative}.katex .hline,.katex .overline .overline-line,.katex .underline .underline-line{display:inline-block;width:100%;border-bottom-style:solid}.katex .hdashline{display:inline-block;width:100%;border-bottom-style:dashed}.katex .sqrt>.root{margin-left:.27777778em;margin-right:-.55555556em}.katex .fontsize-ensurer.reset-size1.size1,.katex .sizing.reset-size1.size1{font-size:1em}.katex .fontsize-ensurer.reset-size1.size2,.katex .sizing.reset-size1.size2{font-size:1.2em}.katex .fontsize-ensurer.reset-size1.size3,.katex .sizing.reset-size1.size3{font-size:1.4em}.katex .fontsize-ensurer.reset-size1.size4,.katex .sizing.reset-size1.size4{font-size:1.6em}.katex .fontsize-ensurer.reset-size1.size5,.katex .sizing.reset-size1.size5{font-size:1.8em}.katex .fontsize-ensurer.reset-size1.size6,.katex .sizing.reset-size1.size6{font-size:2em}.katex .fontsize-ensurer.reset-size1.size7,.katex .sizing.reset-size1.size7{font-size:2.4em}.katex .fontsize-ensurer.reset-size1.size8,.katex .sizing.reset-size1.size8{font-size:2.88em}.katex .fontsize-ensurer.reset-size1.size9,.katex .sizing.reset-size1.size9{font-size:3.456em}.katex .fontsize-ensurer.reset-size1.size10,.katex .sizing.reset-size1.size10{font-size:4.148em}.katex .fontsize-ensurer.reset-size1.size11,.katex .sizing.reset-size1.size11{font-size:4.976em}.katex .fontsize-ensurer.reset-size2.size1,.katex .sizing.reset-size2.size1{font-size:.83333333em}.katex .fontsize-ensurer.reset-size2.size2,.katex .sizing.reset-size2.size2{font-size:1em}.katex .fontsize-ensurer.reset-size2.size3,.katex .sizing.reset-size2.size3{font-size:1.16666667em}.katex .fontsize-ensurer.reset-size2.size4,.katex .sizing.reset-size2.size4{font-size:1.33333333em}.katex .fontsize-ensurer.reset-size2.size5,.katex .sizing.reset-size2.size5{font-size:1.5em}.katex .fontsize-ensurer.reset-size2.size6,.katex .sizing.reset-size2.size6{font-size:1.66666667em}.katex .fontsize-ensurer.reset-size2.size7,.katex .sizing.reset-size2.size7{font-size:2em}.katex .fontsize-ensurer.reset-size2.size8,.katex .sizing.reset-size2.size8{font-size:2.4em}.katex .fontsize-ensurer.reset-size2.size9,.katex .sizing.reset-size2.size9{font-size:2.88em}.katex .fontsize-ensurer.reset-size2.size10,.katex .sizing.reset-size2.size10{font-size:3.45666667em}.katex .fontsize-ensurer.reset-size2.size11,.katex .sizing.reset-size2.size11{font-size:4.14666667em}.katex .fontsize-ensurer.reset-size3.size1,.katex .sizing.reset-size3.size1{font-size:.71428571em}.katex .fontsize-ensurer.reset-size3.size2,.katex .sizing.reset-size3.size2{font-size:.85714286em}.katex .fontsize-ensurer.reset-size3.size3,.katex .sizing.reset-size3.size3{font-size:1em}.katex .fontsize-ensurer.reset-size3.size4,.katex .sizing.reset-size3.size4{font-size:1.14285714em}.katex .fontsize-ensurer.reset-size3.size5,.katex .sizing.reset-size3.size5{font-size:1.28571429em}.katex .fontsize-ensurer.reset-size3.size6,.katex .sizing.reset-size3.size6{font-size:1.42857143em}.katex .fontsize-ensurer.reset-size3.size7,.katex .sizing.reset-size3.size7{font-size:1.71428571em}.katex .fontsize-ensurer.reset-size3.size8,.katex .sizing.reset-size3.size8{font-size:2.05714286em}.katex .fontsize-ensurer.reset-size3.size9,.katex .sizing.reset-size3.size9{font-size:2.46857143em}.katex .fontsize-ensurer.reset-size3.size10,.katex .sizing.reset-size3.size10{font-size:2.96285714em}.katex .fontsize-ensurer.reset-size3.size11,.katex .sizing.reset-size3.size11{font-size:3.55428571em}.katex .fontsize-ensurer.reset-size4.size1,.katex .sizing.reset-size4.size1{font-size:.625em}.katex .fontsize-ensurer.reset-size4.size2,.katex .sizing.reset-size4.size2{font-size:.75em}.katex .fontsize-ensurer.reset-size4.size3,.katex .sizing.reset-size4.size3{font-size:.875em}.katex .fontsize-ensurer.reset-size4.size4,.katex .sizing.reset-size4.size4{font-size:1em}.katex .fontsize-ensurer.reset-size4.size5,.katex .sizing.reset-size4.size5{font-size:1.125em}.katex .fontsize-ensurer.reset-size4.size6,.katex .sizing.reset-size4.size6{font-size:1.25em}.katex .fontsize-ensurer.reset-size4.size7,.katex .sizing.reset-size4.size7{font-size:1.5em}.katex .fontsize-ensurer.reset-size4.size8,.katex .sizing.reset-size4.size8{font-size:1.8em}.katex .fontsize-ensurer.reset-size4.size9,.katex .sizing.reset-size4.size9{font-size:2.16em}.katex .fontsize-ensurer.reset-size4.size10,.katex .sizing.reset-size4.size10{font-size:2.5925em}.katex .fontsize-ensurer.reset-size4.size11,.katex .sizing.reset-size4.size11{font-size:3.11em}.katex .fontsize-ensurer.reset-size5.size1,.katex .sizing.reset-size5.size1{font-size:.55555556em}.katex .fontsize-ensurer.reset-size5.size2,.katex .sizing.reset-size5.size2{font-size:.66666667em}.katex .fontsize-ensurer.reset-size5.size3,.katex .sizing.reset-size5.size3{font-size:.77777778em}.katex .fontsize-ensurer.reset-size5.size4,.katex .sizing.reset-size5.size4{font-size:.88888889em}.katex .fontsize-ensurer.reset-size5.size5,.katex .sizing.reset-size5.size5{font-size:1em}.katex .fontsize-ensurer.reset-size5.size6,.katex .sizing.reset-size5.size6{font-size:1.11111111em}.katex .fontsize-ensurer.reset-size5.size7,.katex .sizing.reset-size5.size7{font-size:1.33333333em}.katex .fontsize-ensurer.reset-size5.size8,.katex .sizing.reset-size5.size8{font-size:1.6em}.katex .fontsize-ensurer.reset-size5.size9,.katex .sizing.reset-size5.size9{font-size:1.92em}.katex .fontsize-ensurer.reset-size5.size10,.katex .sizing.reset-size5.size10{font-size:2.30444444em}.katex .fontsize-ensurer.reset-size5.size11,.katex .sizing.reset-size5.size11{font-size:2.76444444em}.katex .fontsize-ensurer.reset-size6.size1,.katex .sizing.reset-size6.size1{font-size:.5em}.katex .fontsize-ensurer.reset-size6.size2,.katex .sizing.reset-size6.size2{font-size:.6em}.katex .fontsize-ensurer.reset-size6.size3,.katex .sizing.reset-size6.size3{font-size:.7em}.katex .fontsize-ensurer.reset-size6.size4,.katex .sizing.reset-size6.size4{font-size:.8em}.katex .fontsize-ensurer.reset-size6.size5,.katex .sizing.reset-size6.size5{font-size:.9em}.katex .fontsize-ensurer.reset-size6.size6,.katex .sizing.reset-size6.size6{font-size:1em}.katex .fontsize-ensurer.reset-size6.size7,.katex .sizing.reset-size6.size7{font-size:1.2em}.katex .fontsize-ensurer.reset-size6.size8,.katex .sizing.reset-size6.size8{font-size:1.44em}.katex .fontsize-ensurer.reset-size6.size9,.katex .sizing.reset-size6.size9{font-size:1.728em}.katex .fontsize-ensurer.reset-size6.size10,.katex .sizing.reset-size6.size10{font-size:2.074em}.katex .fontsize-ensurer.reset-size6.size11,.katex .sizing.reset-size6.size11{font-size:2.488em}.katex .fontsize-ensurer.reset-size7.size1,.katex .sizing.reset-size7.size1{font-size:.41666667em}.katex .fontsize-ensurer.reset-size7.size2,.katex .sizing.reset-size7.size2{font-size:.5em}.katex .fontsize-ensurer.reset-size7.size3,.katex .sizing.reset-size7.size3{font-size:.58333333em}.katex .fontsize-ensurer.reset-size7.size4,.katex .sizing.reset-size7.size4{font-size:.66666667em}.katex .fontsize-ensurer.reset-size7.size5,.katex .sizing.reset-size7.size5{font-size:.75em}.katex .fontsize-ensurer.reset-size7.size6,.katex .sizing.reset-size7.size6{font-size:.83333333em}.katex .fontsize-ensurer.reset-size7.size7,.katex .sizing.reset-size7.size7{font-size:1em}.katex .fontsize-ensurer.reset-size7.size8,.katex .sizing.reset-size7.size8{font-size:1.2em}.katex .fontsize-ensurer.reset-size7.size9,.katex .sizing.reset-size7.size9{font-size:1.44em}.katex .fontsize-ensurer.reset-size7.size10,.katex .sizing.reset-size7.size10{font-size:1.72833333em}.katex .fontsize-ensurer.reset-size7.size11,.katex .sizing.reset-size7.size11{font-size:2.07333333em}.katex .fontsize-ensurer.reset-size8.size1,.katex .sizing.reset-size8.size1{font-size:.34722222em}.katex .fontsize-ensurer.reset-size8.size2,.katex .sizing.reset-size8.size2{font-size:.41666667em}.katex .fontsize-ensurer.reset-size8.size3,.katex .sizing.reset-size8.size3{font-size:.48611111em}.katex .fontsize-ensurer.reset-size8.size4,.katex .sizing.reset-size8.size4{font-size:.55555556em}.katex .fontsize-ensurer.reset-size8.size5,.katex .sizing.reset-size8.size5{font-size:.625em}.katex .fontsize-ensurer.reset-size8.size6,.katex .sizing.reset-size8.size6{font-size:.69444444em}.katex .fontsize-ensurer.reset-size8.size7,.katex .sizing.reset-size8.size7{font-size:.83333333em}.katex .fontsize-ensurer.reset-size8.size8,.katex .sizing.reset-size8.size8{font-size:1em}.katex .fontsize-ensurer.reset-size8.size9,.katex .sizing.reset-size8.size9{font-size:1.2em}.katex .fontsize-ensurer.reset-size8.size10,.katex .sizing.reset-size8.size10{font-size:1.44027778em}.katex .fontsize-ensurer.reset-size8.size11,.katex .sizing.reset-size8.size11{font-size:1.72777778em}.katex .fontsize-ensurer.reset-size9.size1,.katex .sizing.reset-size9.size1{font-size:.28935185em}.katex .fontsize-ensurer.reset-size9.size2,.katex .sizing.reset-size9.size2{font-size:.34722222em}.katex .fontsize-ensurer.reset-size9.size3,.katex .sizing.reset-size9.size3{font-size:.40509259em}.katex .fontsize-ensurer.reset-size9.size4,.katex .sizing.reset-size9.size4{font-size:.46296296em}.katex .fontsize-ensurer.reset-size9.size5,.katex .sizing.reset-size9.size5{font-size:.52083333em}.katex .fontsize-ensurer.reset-size9.size6,.katex .sizing.reset-size9.size6{font-size:.5787037em}.katex .fontsize-ensurer.reset-size9.size7,.katex .sizing.reset-size9.size7{font-size:.69444444em}.katex .fontsize-ensurer.reset-size9.size8,.katex .sizing.reset-size9.size8{font-size:.83333333em}.katex .fontsize-ensurer.reset-size9.size9,.katex .sizing.reset-size9.size9{font-size:1em}.katex .fontsize-ensurer.reset-size9.size10,.katex .sizing.reset-size9.size10{font-size:1.20023148em}.katex .fontsize-ensurer.reset-size9.size11,.katex .sizing.reset-size9.size11{font-size:1.43981481em}.katex .fontsize-ensurer.reset-size10.size1,.katex .sizing.reset-size10.size1{font-size:.24108004em}.katex .fontsize-ensurer.reset-size10.size2,.katex .sizing.reset-size10.size2{font-size:.28929605em}.katex .fontsize-ensurer.reset-size10.size3,.katex .sizing.reset-size10.size3{font-size:.33751205em}.katex .fontsize-ensurer.reset-size10.size4,.katex .sizing.reset-size10.size4{font-size:.38572806em}.katex .fontsize-ensurer.reset-size10.size5,.katex .sizing.reset-size10.size5{font-size:.43394407em}.katex .fontsize-ensurer.reset-size10.size6,.katex .sizing.reset-size10.size6{font-size:.48216008em}.katex .fontsize-ensurer.reset-size10.size7,.katex .sizing.reset-size10.size7{font-size:.57859209em}.katex .fontsize-ensurer.reset-size10.size8,.katex .sizing.reset-size10.size8{font-size:.69431051em}.katex .fontsize-ensurer.reset-size10.size9,.katex .sizing.reset-size10.size9{font-size:.83317261em}.katex .fontsize-ensurer.reset-size10.size10,.katex .sizing.reset-size10.size10{font-size:1em}.katex .fontsize-ensurer.reset-size10.size11,.katex .sizing.reset-size10.size11{font-size:1.19961427em}.katex .fontsize-ensurer.reset-size11.size1,.katex .sizing.reset-size11.size1{font-size:.20096463em}.katex .fontsize-ensurer.reset-size11.size2,.katex .sizing.reset-size11.size2{font-size:.24115756em}.katex .fontsize-ensurer.reset-size11.size3,.katex .sizing.reset-size11.size3{font-size:.28135048em}.katex .fontsize-ensurer.reset-size11.size4,.katex .sizing.reset-size11.size4{font-size:.32154341em}.katex .fontsize-ensurer.reset-size11.size5,.katex .sizing.reset-size11.size5{font-size:.36173633em}.katex .fontsize-ensurer.reset-size11.size6,.katex .sizing.reset-size11.size6{font-size:.40192926em}.katex .fontsize-ensurer.reset-size11.size7,.katex .sizing.reset-size11.size7{font-size:.48231511em}.katex .fontsize-ensurer.reset-size11.size8,.katex .sizing.reset-size11.size8{font-size:.57877814em}.katex .fontsize-ensurer.reset-size11.size9,.katex .sizing.reset-size11.size9{font-size:.69453376em}.katex .fontsize-ensurer.reset-size11.size10,.katex .sizing.reset-size11.size10{font-size:.83360129em}.katex .fontsize-ensurer.reset-size11.size11,.katex .sizing.reset-size11.size11{font-size:1em}.katex .delimsizing.size1{font-family:KaTeX_Size1}.katex .delimsizing.size2{font-family:KaTeX_Size2}.katex .delimsizing.size3{font-family:KaTeX_Size3}.katex .delimsizing.size4{font-family:KaTeX_Size4}.katex .delimsizing.mult .delim-size1>span{font-family:KaTeX_Size1}.katex .delimsizing.mult .delim-size4>span{font-family:KaTeX_Size4}.katex .nulldelimiter{display:inline-block;width:.12em}.katex .delimcenter,.katex .op-symbol{position:relative}.katex .op-symbol.small-op{font-family:KaTeX_Size1}.katex .op-symbol.large-op{font-family:KaTeX_Size2}.katex .op-limits>.vlist-t{text-align:center}.katex .accent>.vlist-t{text-align:center}.katex .accent .accent-body{position:relative}.katex .accent .accent-body:not(.accent-full){width:0}.katex .overlay{display:block}.katex .mtable .vertical-separator{display:inline-block;min-width:1px}.katex .mtable .arraycolsep{display:inline-block}.katex .mtable .col-align-c>.vlist-t{text-align:center}.katex .mtable .col-align-l>.vlist-t{text-align:left}.katex .mtable .col-align-r>.vlist-t{text-align:right}.katex .svg-align{text-align:left}.katex svg{display:block;position:absolute;width:100%;height:inherit;fill:currentColor;stroke:currentColor;fill-rule:nonzero;fill-opacity:1;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1}.katex svg path{stroke:none}.katex img{border-style:none;min-width:0;min-height:0;max-width:none;max-height:none}.katex .stretchy{width:100%;display:block;position:relative;overflow:hidden}.katex .stretchy:after,.katex .stretchy:before{content:""}.katex .hide-tail{width:100%;position:relative;overflow:hidden}.katex .halfarrow-left{position:absolute;left:0;width:50.2%;overflow:hidden}.katex .halfarrow-right{position:absolute;right:0;width:50.2%;overflow:hidden}.katex .brace-left{position:absolute;left:0;width:25.1%;overflow:hidden}.katex .brace-center{position:absolute;left:25%;width:50%;overflow:hidden}.katex .brace-right{position:absolute;right:0;width:25.1%;overflow:hidden}.katex .x-arrow-pad{padding:0 .5em}.katex .mover,.katex .munder,.katex .x-arrow{text-align:center}.katex .boxpad{padding:0 .3em}.katex .fbox,.katex .fcolorbox{box-sizing:border-box;border:.04em solid}.katex .cancel-pad{padding:0 .2em}.katex .cancel-lap{margin-left:-.2em;margin-right:-.2em}.katex .sout{border-bottom-style:solid;border-bottom-width:.08em}.katex-display{display:block;margin:1em 0;text-align:center}.katex-display>.katex{display:block;text-align:center;white-space:nowrap}.katex-display>.katex>.katex-html{display:block;position:relative}.katex-display>.katex>.katex-html>.tag{position:absolute;right:0}.katex-display.leqno>.katex>.katex-html>.tag{left:0;right:auto}.katex-display.fleqn>.katex{text-align:left}
2 | 


--------------------------------------------------------------------------------
/assets/requirements-slides.txt:
--------------------------------------------------------------------------------
1 | certifi
2 | Jinja2
3 | livereload
4 | MarkupSafe
5 | six
6 | tornado
7 | 


--------------------------------------------------------------------------------
/assets/style.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   font-family: Helvetica, sans-serif;
  3 |   color: #354046;
  4 |   line-height: 1.4;
  5 |   font-size: 16pt;
  6 |   word-wrap: break-word;
  7 | }
  8 | 
  9 | 
 10 | /* Remark ------------------------------------------------------------------- */
 11 | 
 12 | .remark-container {
 13 |   background: #ddd;
 14 | }
 15 | 
 16 | .remark-slide-content {
 17 |   font-size: 1em;
 18 |   border-top: solid 5px #520b92;
 19 |   padding-top: 0.5em;
 20 |   vertical-align: middle;
 21 | }
 22 | 
 23 | .remark-slide-content h1 {
 24 |   font-weight: bold;
 25 |   font-size: 1.9em;
 26 |   margin: 0.25em 0;
 27 |   color: #520b92;
 28 | }
 29 | 
 30 | .remark-slide-content h2 {
 31 |   font-size: 1.5em;
 32 |   font-weight: bold;
 33 |   margin: 0.25em 0;
 34 | }
 35 | 
 36 | .remark-slide-content h3 {
 37 |   font-size: 1.2em;
 38 |   font-weight: bold;
 39 |   margin: 0.25em 0;
 40 | }
 41 | 
 42 | .remark-slide-content p,
 43 | .remark-slide-content ul,
 44 | .remark-slide-content ol {
 45 |   font-size: 1.2em;
 46 |   margin: 0.7em 0;
 47 | }
 48 | 
 49 | .remark-slide-number {
 50 |   font-size: 0.5em;
 51 |   bottom: 2em;
 52 | }
 53 | 
 54 | /* Styles ------------------------------------------------------------------- */
 55 | 
 56 | a {
 57 |   color: #008000;
 58 |   text-decoration: none;
 59 | }
 60 | 
 61 | a:hover {
 62 |   text-decoration: underline;
 63 | }
 64 | 
 65 | .italic {
 66 |   font-style: italic;
 67 | }
 68 | 
 69 | .bold {
 70 |   font-weight: bold;
 71 | }
 72 | 
 73 | em {
 74 |   font-style: italic;
 75 | }
 76 | 
 77 | strong {
 78 |   font-style: normal;
 79 |   font-weight: bold;
 80 | }
 81 | 
 82 | pre {
 83 |   margin: 0.3em;
 84 | }
 85 | 
 86 | .larger {
 87 |   font-size: 1.3em;
 88 | }
 89 | 
 90 | .smaller {
 91 |   font-size: 0.8em;
 92 | }
 93 | 
 94 | .smaller-x {
 95 |   font-size: 0.5em;
 96 | }
 97 | 
 98 | .katex {
 99 |   color: black;
100 | }
101 | 
102 | .black-slide .katex,
103 | .black-slide h1,
104 | .black-slide a {
105 |   color: white;
106 | }
107 | 
108 | ul>li,
109 | ol>li {
110 |   margin: 0.3em 0;
111 | }
112 | 
113 | li>p {
114 |   margin: 0.2em 0;
115 | }
116 | 
117 | ul>li>ul>li {
118 |   font-size: 0.8em;
119 | }
120 | 
121 | .caption {
122 |   display: block;
123 |   text-align: center;
124 |   font-size: 0.8em;
125 |   font-style: italic;
126 | }
127 | 
128 | .circle img {
129 |   border-radius: 50%;
130 | }
131 | 
132 | .stretch img {
133 |   width: 100%;
134 |   height: auto;
135 | }
136 | 
137 | .footnote {
138 |   position: absolute;
139 |   bottom: 0.5em;
140 |   font-size: 0.8em;
141 |   opacity: 0.9;
142 |   left: 1.5em;
143 | }
144 | 
145 | .footnote a {
146 |   color: #0288d1;
147 |   /*#20707e;*/
148 | }
149 | 
150 | .footnote-back {
151 |   position: absolute;
152 |   bottom: 0.5em;
153 |   font-size: 0.7em;
154 |   opacity: 0.9;
155 |   right: 1.5em;
156 | }
157 | 
158 | .inactive {
159 |   color: lightgray;
160 | }
161 | 
162 | .success {
163 |   background-color: rgba(40, 167, 69, 0.25);
164 |   border: 2px solid rgb(40, 167, 69);
165 |   border-radius: 5px;
166 |   width: 95%;
167 |   display: block;
168 |   padding: 1em;
169 | }
170 | 
171 | .alert {
172 |   color: #721c24;
173 |   background-color: #f8d7da;
174 |   border: 2px solid #f5c6cb;
175 |   border-radius: 5px;
176 |   width: 95%;
177 |   display: block;
178 |   padding: 1em;
179 | }
180 | 
181 | .hljs-github .hljs-comment {
182 |   color: #520b92;
183 | }
184 | 
185 | 
186 | /* Specials ----------------------------------------------------------------- */
187 | 
188 | .title-slide {
189 |   font-size: 1.1em;
190 |   text-align: center;
191 |   vertical-align: middle;
192 | }
193 | 
194 | .title-slide h1 {
195 |   margin-top: 1em;
196 | }
197 | 
198 | .chapter-slide {
199 |   background-color: #520b92;
200 |   color: white;
201 |   vertical-align: middle;
202 |   font-size: 1.2em;
203 |   text-align: center;
204 | }
205 | 
206 | .chapter-slide h1,
207 | .chapter-slide a {
208 |   color: white;
209 | }
210 | 
211 | .black-slide {
212 |   background-color: black;
213 |   color: white;
214 |   border-top: solid 0px;
215 | }
216 | 
217 | .red-slide {
218 |   background-color: red;
219 |   color: white;
220 |   border-top: solid 0px;
221 | }
222 | 
223 | .this-talk-link {
224 |   font-size: 0.8em;
225 |   display: block;
226 |   font-style: italic;
227 |   margin-top: 1em;
228 | }
229 | 
230 | 
231 | blockquote {
232 |   font-size: 1.4em;
233 |   width: 75%;
234 |   margin: 0 auto;
235 |   font-style: italic;
236 |   color: #555555;
237 |   padding: 1.2em 30px 1.2em 75px;
238 |   border-left: 8px solid #008000;
239 |   line-height: 1.6;
240 |   position: relative;
241 |   background: #EDEDED;
242 | }
243 | 
244 | 
245 | blockquote::before {
246 |   font-family: Arial;
247 |   content: "\201C";
248 |   color: #008000;
249 |   font-size: 4em;
250 |   position: absolute;
251 |   left: 0;
252 |   top: -10px;
253 | }
254 | 
255 | blockquote::after {
256 |   content: '';
257 | }
258 | 
259 | blockquote cite {
260 |   display: block;
261 |   color: #333333;
262 |   font-style: normal;
263 |   font-weight: bold;
264 |   margin-top: 1em;
265 | }
266 | 
267 | .icon {
268 |   background-size: cover;
269 |   display: inline-block;
270 |   transform: scale(1.5);
271 |   margin-top: 12px;
272 |   height: 24px;
273 |   width: 25px;
274 | }
275 | 
276 | .icon-left {
277 |   margin-right: 16px;
278 | }
279 | 
280 | .icon-twitter {
281 |   background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat;
282 | }
283 | 
284 | .icon-twitter-white {
285 |   background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill:white;fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat;
286 | 
287 | }
288 | 
289 | .icon-github-white {
290 |   background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0503063,0.209536)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill:white;fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat;
291 | }
292 | 
293 | .icon-github {
294 |   background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0191268,0.228032)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat;
295 | }
296 | 
297 | .icon-github:hover,
298 | .icon-github-white:hover {
299 |   background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0503063,0.209536)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill:rgb(0,128,0);fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat;
300 | }
301 | 
302 | .icon-twitter:hover,
303 | .icon-twitter-white:hover {
304 |   background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill:rgb(0,128,0);fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat;
305 | 
306 | }
307 | 
308 | .remark-slide table {
309 |   border: none;
310 |   border-collapse: collapse;
311 |   border-spacing: 0;
312 |   color: black;
313 |   table-layout: fixed;
314 | }
315 | 
316 | .remark-slide table thead {
317 |   border-bottom: 1px solid black;
318 |   vertical-align: bottom;
319 | }
320 | 
321 | .remark-slide table tr,
322 | .remark-slide table th,
323 | .remark-slide table td {
324 |   text-align: right;
325 |   vertical-align: middle;
326 |   padding: 0.5em 0.5em;
327 |   line-height: normal;
328 |   white-space: normal;
329 |   max-width: none;
330 |   border: none;
331 | }
332 | 
333 | .remark-slide table th {
334 |   font-weight: bold;
335 | }
336 | 
337 | .remark-slide table tbody tr:nth-child(odd) {
338 |   background: #f5f5f5;
339 | }
340 | 
341 | .remark-slide table tbody tr:hover {
342 |   background: rgba(66, 165, 245, 0.2);
343 | }
344 | 
345 | .remark-slide table.alternationg {
346 |   border: none;
347 |   border-collapse: collapse;
348 |   border-spacing: 0;
349 | }
350 | 
351 | .remark-slide table.alternating td,
352 | .remark-slide table.alternating tr {
353 |   border: none;
354 | }
355 | 
356 | .remark-slide table.alternating tbody tr:nth-child(odd) {
357 |   background: #f0f0f0;
358 | }
359 | 
360 | /* grid */
361 | 
362 | /* Uncomment and set these variables to customize the grid. */
363 | 
364 | .g {
365 |   box-sizing: border-box;
366 |   display: -ms-flexbox;
367 |   display: -webkit-box;
368 |   display: flex;
369 |   -ms-flex: 0 1 auto;
370 |   -webkit-box-flex: 0;
371 |   flex: 0 1 auto;
372 |   -ms-flex-direction: row;
373 |   -webkit-box-orient: horizontal;
374 |   -webkit-box-direction: normal;
375 |   flex-direction: row;
376 |   -ms-flex-wrap: wrap;
377 |   flex-wrap: wrap;
378 |   margin: 0.5rem -0.5rem;
379 | }
380 | 
381 | .g-1 .g-2,
382 | .g-3,
383 | .g-4,
384 | .g-5,
385 | .g-6,
386 | .g-7,
387 | .g-8,
388 | .g-9,
389 | .g-10,
390 | .g-11,
391 | .g-12 {
392 |   box-sizing: border-box;
393 |   -ms-flex: 0 0 auto;
394 |   -webkit-box-flex: 0;
395 |   flex: 0 0 auto;
396 |   padding-right: 1rem;
397 |   padding-left: 1rem;
398 | }
399 | 
400 | .g-1 {
401 |   -ms-flex-preferred-size: 8.333%;
402 |   flex-basis: 8.333%;
403 |   max-width: 8.333%;
404 | }
405 | 
406 | .g-2 {
407 |   -ms-flex-preferred-size: 16.667%;
408 |   flex-basis: 16.667%;
409 |   max-width: 16.667%;
410 | }
411 | 
412 | .g-3 {
413 |   -ms-flex-preferred-size: 25%;
414 |   flex-basis: 25%;
415 |   max-width: 25%;
416 | }
417 | 
418 | .g-4 {
419 |   -ms-flex-preferred-size: 33.333%;
420 |   flex-basis: 33.333%;
421 |   max-width: 33.333%;
422 | }
423 | 
424 | .g-5 {
425 |   -ms-flex-preferred-size: 41.667%;
426 |   flex-basis: 41.667%;
427 |   max-width: 41.667%;
428 | }
429 | 
430 | .g-6 {
431 |   -ms-flex-preferred-size: 50%;
432 |   flex-basis: 50%;
433 |   max-width: 50%;
434 | }
435 | 
436 | .g-7 {
437 |   -ms-flex-preferred-size: 58.333%;
438 |   flex-basis: 58.333%;
439 |   max-width: 58.333%;
440 | }
441 | 
442 | .g-8 {
443 |   -ms-flex-preferred-size: 66.667%;
444 |   flex-basis: 66.667%;
445 |   max-width: 66.667%;
446 | }
447 | 
448 | .g-9 {
449 |   -ms-flex-preferred-size: 75%;
450 |   flex-basis: 75%;
451 |   max-width: 75%;
452 | }
453 | 
454 | .g-10 {
455 |   -ms-flex-preferred-size: 83.333%;
456 |   flex-basis: 83.333%;
457 |   max-width: 83.333%;
458 | }
459 | 
460 | .g-11 {
461 |   -ms-flex-preferred-size: 91.667%;
462 |   flex-basis: 91.667%;
463 |   max-width: 91.667%;
464 | }
465 | 
466 | .g-12 {
467 |   -ms-flex-preferred-size: 100%;
468 |   flex-basis: 100%;
469 |   max-width: 100%;
470 | }
471 | 
472 | .g-start {
473 |   -ms-flex-pack: start;
474 |   -webkit-box-pack: start;
475 |   justify-content: flex-start;
476 |   text-align: start;
477 | }
478 | 
479 | .g-center {
480 |   -ms-flex-pack: center;
481 |   -webkit-box-pack: center;
482 |   justify-content: center;
483 |   text-align: center;
484 | }
485 | 
486 | .g-end {
487 |   -ms-flex-pack: end;
488 |   -webkit-box-pack: end;
489 |   justify-content: flex-end;
490 |   text-align: end;
491 | }
492 | 
493 | .g-top {
494 |   -ms-flex-align: start;
495 |   -webkit-box-align: start;
496 |   align-items: flex-start;
497 | }
498 | 
499 | .g-middle {
500 |   -ms-flex-align: center;
501 |   -webkit-box-align: center;
502 |   align-items: center;
503 | }
504 | 
505 | .g-bottom {
506 |   -ms-flex-align: end;
507 |   -webkit-box-align: end;
508 |   align-items: flex-end;
509 | }
510 | 
511 | .g-around {
512 |   -ms-flex-pack: distribute;
513 |   justify-content: space-around;
514 | }
515 | 
516 | .g-between {
517 |   -ms-flex-pack: justify;
518 |   -webkit-box-pack: justify;
519 |   justify-content: space-between;
520 | }
521 | 


--------------------------------------------------------------------------------
/assets/twitter.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg width="100%" height="100%" viewBox="0 0 24 24" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
4 |     <g transform="matrix(0.995894,0,0,0.995894,0.0514823,0.156485)">
5 |         <path d="M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z" style="fill-rule:nonzero;"/>
6 |     </g>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: ml-workshop-advanced
 2 | channels:
 3 |   - conda-forge
 4 |   - nodefaults
 5 | dependencies:
 6 |   - python=3.9.7
 7 |   - scikit-learn=1.0.*
 8 |   - seaborn=0.11.2
 9 |   - pandas
10 |   - jupyterlab=3.1.*
11 |   - imbalanced-learn=0.9.*
12 | 


--------------------------------------------------------------------------------
/images/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/.gitkeep


--------------------------------------------------------------------------------
/images/bag_of_words.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/bag_of_words.png


--------------------------------------------------------------------------------
/images/calibration_regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/calibration_regression.png


--------------------------------------------------------------------------------
/images/countvectorizer-ngrams.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/countvectorizer-ngrams.png


--------------------------------------------------------------------------------
/images/countvectorizer-ngrams.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="851px" height="351px" viewBox="-0.5 -0.5 851 351" style="background-color: rgb(255, 255, 255);"><defs/><g><path d="M 425 40 L 425 103.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 425 108.88 L 421.5 101.88 L 425 103.63 L 428.5 101.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><rect x="45" y="0" width="760" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 758px; height: 1px; padding-top: 20px; margin-left: 46px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">"Can we go to the mountain tomorrow?"</b></font></div></div></div></foreignObject><text x="425" y="27" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">"Can we go to the mountain tomorrow?"</text></switch></g><rect x="0" y="110" width="850" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 848px; height: 1px; padding-top: 130px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">["can", "we", "go", "to", "the", "mountain", "tomorrow"]</b></font></div></div></div></foreignObject><text x="425" y="137" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">["can", "we", "go", "to", "the", "mountain", "tomorrow"]</text></switch></g><rect x="305" y="60" width="240" height="30" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 75px; margin-left: 425px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 22px; font-family: &quot;Courier New&quot;; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;"><b style="font-size: 22px">Unigram tokenzier</b></div></div></div></foreignObject><text x="425" y="82" fill="rgba(0, 0, 0, 1)" font-family="Courier New" font-size="22px" text-anchor="middle">Unigram tokenzier</text></switch></g><path d="M 425 220 L 425 293.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 425 298.88 L 421.5 291.88 L 425 293.63 L 428.5 291.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><rect x="45" y="180" width="760" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 758px; height: 1px; padding-top: 200px; margin-left: 46px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">"Can we go to the mountain tomorrow?"</b></font></div></div></div></foreignObject><text x="425" y="207" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">"Can we go to the mountain tomorrow?"</text></switch></g><rect x="0" y="300" width="850" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 848px; height: 1px; padding-top: 320px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">["can we", "we go", "go to", "to the", "the mountain", "mountain tomorrow"]</b></font></div></div></div></foreignObject><text x="425" y="327" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">["can we", "we go", "go to", "to the", "the mountain", "mountain tomorr...</text></switch></g><rect x="310" y="240" width="230" height="30" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 255px; margin-left: 425px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 22px; font-family: &quot;Courier New&quot;; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;"><b style="font-size: 22px">Bigram tokenzier</b></div></div></div></foreignObject><text x="425" y="262" fill="rgba(0, 0, 0, 1)" font-family="Courier New" font-size="22px" text-anchor="middle">Bigram tokenzier</text></switch></g></g><switch><g requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"/><a transform="translate(0,-5)" xlink:href="https://www.diagrams.net/doc/faq/svg-export-text-problems" target="_blank"><text text-anchor="middle" font-size="10px" x="50%" y="100%">Viewer does not support full SVG 1.1</text></a></switch></svg>


--------------------------------------------------------------------------------
/images/countvectorizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/countvectorizer.png


--------------------------------------------------------------------------------
/images/countvectorizer.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="851px" height="401px" viewBox="-0.5 -0.5 851 401" style="background-color: rgb(255, 255, 255);"><defs/><g><path d="M 425 40 L 425 103.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 425 108.88 L 421.5 101.88 L 425 103.63 L 428.5 101.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><rect x="45" y="0" width="760" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 758px; height: 1px; padding-top: 20px; margin-left: 46px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">"Can we go to the mountain tomorrow?"</b></font></div></div></div></foreignObject><text x="425" y="27" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">"Can we go to the mountain tomorrow?"</text></switch></g><path d="M 425 150 L 425 223.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 425 228.88 L 421.5 221.88 L 425 223.63 L 428.5 221.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><rect x="0" y="110" width="850" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 848px; height: 1px; padding-top: 130px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">["can", "we", "go", "to", "the", "mountain", "tomorrow"]</b></font></div></div></div></foreignObject><text x="425" y="137" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">["can", "we", "go", "to", "the", "mountain", "tomorrow"]</text></switch></g><path d="M 425 320 L 425 353.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 425 358.88 L 421.5 351.88 L 425 353.63 L 428.5 351.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><rect x="0" y="230" width="850" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 848px; height: 1px; padding-top: 250px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">["ants", ..., "tomorrow", ..., "we", ..., "zoo"]</b></font></div></div></div></foreignObject><text x="425" y="257" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">["ants", ..., "tomorrow", ..., "we", ..., "zoo"]</text></switch></g><rect x="0" y="360" width="850" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 848px; height: 1px; padding-top: 380px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 24px" face="Courier New"><b style="font-size: 24px">[0,    ...,   0, 1, 0, ..., 0, 1, 0,  ... 0]</b></font></div></div></div></foreignObject><text x="425" y="387" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">[0,    ...,   0, 1, 0, ..., 0, 1, 0,  ... 0]</text></switch></g><rect x="360" y="60" width="130" height="30" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 75px; margin-left: 425px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 22px; font-family: &quot;Courier New&quot;; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;"><b style="font-size: 22px">tokenizer</b></div></div></div></foreignObject><text x="425" y="82" fill="rgba(0, 0, 0, 1)" font-family="Courier New" font-size="22px" text-anchor="middle">tokenizer</text></switch></g><rect x="230" y="170" width="410" height="30" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 185px; margin-left: 435px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 22px; font-family: &quot;Courier New&quot;; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;"><b style="font-size: 22px">Build vocab over all documents</b></div></div></div></foreignObject><text x="435" y="192" fill="rgba(0, 0, 0, 1)" font-family="Courier New" font-size="22px" text-anchor="middle">Build vocab over all documents</text></switch></g><rect x="270" y="290" width="310" height="30" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 305px; margin-left: 425px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 22px; font-family: &quot;Courier New&quot;; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;"><b>Sparse Matrix Encoding</b></div></div></div></foreignObject><text x="425" y="312" fill="rgba(0, 0, 0, 1)" font-family="Courier New" font-size="22px" text-anchor="middle">Sparse Matrix Encoding</text></switch></g><path d="M 425 270 L 425 290" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/></g><switch><g requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"/><a transform="translate(0,-5)" xlink:href="https://www.diagrams.net/doc/faq/svg-export-text-problems" target="_blank"><text text-anchor="middle" font-size="10px" x="50%" y="100%">Viewer does not support full SVG 1.1</text></a></switch></svg>


--------------------------------------------------------------------------------
/images/favicon_org.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/favicon_org.png


--------------------------------------------------------------------------------
/images/glm_unit_deviance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/glm_unit_deviance.png


--------------------------------------------------------------------------------
/images/pipeline.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="901px" height="441px" viewBox="-0.5 -0.5 901 441" style="background-color: rgb(255, 255, 255);"><defs/><g><rect x="0" y="0" width="700" height="60" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe flex-start; width: 698px; height: 1px; padding-top: 30px; margin-left: 2px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: left;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: normal; overflow-wrap: normal;"><font face="Courier New">pipe = make_pipeline(T1(), T2(), Estimator())<br />pipe.fit(X, y)<br /></font></div></div></div></foreignObject><text x="2" y="37" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" font-weight="bold">pipe = make_pipeline(T1(), T2(), Estimator())...</text></switch></g><rect x="245" y="155" width="40" height="40" fill="#d5e8d4" stroke="#82b366" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 38px; height: 1px; padding-top: 175px; margin-left: 246px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">T1</div></div></div></foreignObject><text x="265" y="182" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T1</text></switch></g><rect x="490" y="215" width="40" height="40" fill="#d5e8d4" stroke="#82b366" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 38px; height: 1px; padding-top: 235px; margin-left: 491px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">T2</div></div></div></foreignObject><text x="510" y="242" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T2</text></switch></g><rect x="770" y="275" width="130" height="40" fill="#d5e8d4" stroke="#82b366" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 128px; height: 1px; padding-top: 295px; margin-left: 771px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Estimator</div></div></div></foreignObject><text x="835" y="302" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">Estimator</text></switch></g><path d="M 50 170 L 147.5 170 L 147.5 175 L 238.63 175" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 243.88 175 L 236.88 178.5 L 238.63 175 L 236.88 171.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 171px; margin-left: 136px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">T1.fit(X, y)</div></div></div></foreignObject><text x="136" y="178" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T1.fit(X, y)</text></switch></g><path d="M 30 194.49 L 30 230 L 233.63 230" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 238.88 230 L 231.88 233.5 L 233.63 230 L 231.88 226.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 231px; margin-left: 131px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">T1.transform(X)</div></div></div></foreignObject><text x="131" y="238" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T1.transform(X)</text></switch></g><ellipse cx="25" cy="170" rx="25" ry="25" fill="#fff2cc" stroke="#d6b656" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 170px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">X</div></div></div></foreignObject><text x="25" y="177" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">X</text></switch></g><path d="M 50 110 L 265 110 L 265 148.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 265 153.88 L 261.5 146.88 L 265 148.63 L 268.5 146.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><path d="M 50 110 L 510 110 L 510 208.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 510 213.88 L 506.5 206.88 L 510 208.63 L 513.5 206.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><path d="M 50 110 L 835 110 L 835 268.63" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 835 273.88 L 831.5 266.88 L 835 268.63 L 838.5 266.88 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><ellipse cx="25" cy="110" rx="25" ry="25" fill="#fff2cc" stroke="#d6b656" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 110px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">y</div></div></div></foreignObject><text x="25" y="117" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">y</text></switch></g><path d="M 290 230 L 390 230 L 390 235 L 483.63 235" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 488.88 235 L 481.88 238.5 L 483.63 235 L 481.88 231.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 231px; margin-left: 383px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">T2.fit(X1, y)</div></div></div></foreignObject><text x="383" y="238" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T2.fit(X1, y)</text></switch></g><path d="M 270 254.49 L 270 295 L 485 295 L 483.63 295" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 488.88 295 L 481.88 298.5 L 483.63 295 L 481.88 291.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 291px; margin-left: 373px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">T2.transform(X1)</div></div></div></foreignObject><text x="373" y="298" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T2.transform(X1)</text></switch></g><ellipse cx="265" cy="230" rx="25" ry="25" fill="#fff2cc" stroke="#d6b656" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 230px; margin-left: 241px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">X1</div></div></div></foreignObject><text x="265" y="237" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">X1</text></switch></g><path d="M 540 295 L 763.63 295" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 768.88 295 L 761.88 298.5 L 763.63 295 L 761.88 291.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 295px; margin-left: 651px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">Estimator.fit(X2, y)</div></div></div></foreignObject><text x="651" y="302" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">Estimator.fit(X2, y)</text></switch></g><ellipse cx="515" cy="295" rx="25" ry="25" fill="#fff2cc" stroke="#d6b656" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 295px; margin-left: 491px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">X2</div></div></div></foreignObject><text x="515" y="302" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">X2</text></switch></g><rect x="0" y="340" width="270" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe flex-start; width: 268px; height: 1px; padding-top: 360px; margin-left: 2px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: left;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: normal; overflow-wrap: normal;"><font face="Courier New">pipe.predict(X')<br /></font></div></div></div></foreignObject><text x="2" y="367" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" font-weight="bold">pipe.predict(X')&#xa;</text></switch></g><path d="M 50 415 L 233.63 415" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 238.88 415 L 231.88 418.5 L 233.63 415 L 231.88 411.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 415px; margin-left: 141px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">T1.transform(X')</div></div></div></foreignObject><text x="141" y="422" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T1.transform(X')</text></switch></g><ellipse cx="25" cy="415" rx="25" ry="25" fill="#fff2cc" stroke="#d6b656" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 415px; margin-left: 1px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">X'</div></div></div></foreignObject><text x="25" y="422" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">X'</text></switch></g><path d="M 290 415 L 503.63 415" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 508.88 415 L 501.88 418.5 L 503.63 415 L 501.88 411.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 415px; margin-left: 396px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">T2.transform(X'1)</div></div></div></foreignObject><text x="396" y="422" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">T2.transform(X'1)</text></switch></g><ellipse cx="265" cy="415" rx="25" ry="25" fill="#fff2cc" stroke="#d6b656" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 415px; margin-left: 241px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">X'1</div></div></div></foreignObject><text x="265" y="422" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">X'1</text></switch></g><path d="M 560 415 L 823.63 415" fill="none" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 828.88 415 L 821.88 418.5 L 823.63 415 L 821.88 411.5 Z" fill="rgba(0, 0, 0, 1)" stroke="rgba(0, 0, 0, 1)" stroke-miterlimit="10" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 415px; margin-left: 695px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; background-color: rgb(255, 255, 255); white-space: nowrap;">Estimator.predict(X'2)</div></div></div></foreignObject><text x="695" y="422" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">Estimator.predict(X'2)</text></switch></g><ellipse cx="535" cy="415" rx="25" ry="25" fill="#fff2cc" stroke="#d6b656" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 415px; margin-left: 511px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">X'2</div></div></div></foreignObject><text x="535" y="422" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">X'2</text></switch></g><ellipse cx="855" cy="415" rx="25" ry="25" fill="#e1d5e7" stroke="#9673a6" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 415px; margin-left: 831px;"><div data-drawio-colors="color: rgba(0, 0, 0, 1); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 24px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">y'</div></div></div></foreignObject><text x="855" y="422" fill="rgba(0, 0, 0, 1)" font-family="Helvetica" font-size="24px" text-anchor="middle">y'</text></switch></g></g><switch><g requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"/><a transform="translate(0,-5)" xlink:href="https://www.diagrams.net/doc/faq/svg-export-text-problems" target="_blank"><text text-anchor="middle" font-size="10px" x="50%" y="100%">Viewer does not support full SVG 1.1</text></a></switch></svg>


--------------------------------------------------------------------------------
/images/poisson_gamma_tweedie_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/poisson_gamma_tweedie_distributions.png


--------------------------------------------------------------------------------
/images/resampling_approches.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/resampling_approches.png


--------------------------------------------------------------------------------
/images/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/scikit-learn-logo-notext.png


--------------------------------------------------------------------------------
/images/single_words.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/single_words.png


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <head>
  5 |   <title>Advanced Machine Learning with scikit-learn: Text Data, Imbalanced Data, and Poisson Regression</title>
  6 |   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
  7 |   <link rel="stylesheet" href="./assets/style.css">
  8 |   <link rel="shortcut icon" type="image/x-icon" href="favicon.ico">
  9 |   
 10 |   <link rel="stylesheet" href="./assets/katex.min.css">
 11 |   <script src="./assets/katex.min.js"></script>
 12 |   <script src="./assets/auto-render.min.js"></script>
 13 |   
 14 | </head>
 15 | 
 16 | <body>
 17 |   <textarea id="source">title: Advanced Machine Learning with scikit-learn: Text Data, Imbalanced Data, and Poisson Regression
 18 | use_katex: True
 19 | class: title-slide
 20 | 
 21 | # Advanced Machine Learning with scikit-learn
 22 | ## Text Data, Imbalanced Data, and Poisson Regression
 23 | 
 24 | ![](images/scikit-learn-logo-notext.png)
 25 | 
 26 | .larger[Thomas J. Fan]<br>
 27 | @thomasjpfan<br>
 28 | <a href="https://www.github.com/thomasjpfan" target="_blank"><span class="icon icon-github icon-left"></span></a>
 29 | <a href="https://www.twitter.com/thomasjpfan" target="_blank"><span class="icon icon-twitter"></span></a>
 30 | <a class="this-talk-link", href="https://github.com/thomasjpfan/ml-workshop-advanced" target="_blank">
 31 | This workshop on Github: github.com/thomasjpfan/ml-workshop-advanced</a>
 32 | 
 33 | ---
 34 | 
 35 | name: table-of-contents
 36 | class: title-slide, left
 37 | 
 38 | # Table of Contents
 39 | 
 40 | .g[
 41 | .g-6[
 42 | 1. [Text Data](#text)
 43 | 1. [Imbalanced Data](#imbalanced)
 44 | 1. [Poisson Regression](#poisson)
 45 | ]
 46 | .g-6.g-center[
 47 | ![](images/scikit-learn-logo-notext.png)
 48 | ]
 49 | ]
 50 | 
 51 | ---
 52 | 
 53 | name: text
 54 | class: chapter-slide
 55 | 
 56 | # 1. Text Data
 57 | 
 58 | .footnote-back[
 59 | [Back to Table of Contents](#table-of-contents)
 60 | ]
 61 | 
 62 | ---
 63 | 
 64 | # Types of text data
 65 | 
 66 | <table border="1" class="dataframe">
 67 |   <thead>
 68 |     <tr style="text-align: right;">
 69 |       <th></th>
 70 |       <th>fullName</th>
 71 |       <th>country</th>
 72 |       <th>politicalGroup</th>
 73 |       <th>nationalPoliticalGroup</th>
 74 |     </tr>
 75 |   </thead>
 76 |   <tbody>
 77 |     <tr>
 78 |       <th>0</th>
 79 |       <td>Magdalena ADAMOWICZ</td>
 80 |       <td>Poland</td>
 81 |       <td>Group of the European People's Party (Christian Democrats)</td>
 82 |       <td>Independent</td>
 83 |     </tr>
 84 |     <tr>
 85 |       <th>1</th>
 86 |       <td>Asim ADEMOV</td>
 87 |       <td>Bulgaria</td>
 88 |       <td>Group of the European People's Party (Christian Democrats)</td>
 89 |       <td>Citizens for European Development of Bulgaria</td>
 90 |     </tr>
 91 |     <tr>
 92 |       <th>2</th>
 93 |       <td>Isabella ADINOLFI</td>
 94 |       <td>Italy</td>
 95 |       <td>Non-attached Members</td>
 96 |       <td>Movimento 5 Stelle</td>
 97 |     </tr>
 98 |     <tr>
 99 |       <th>3</th>
100 |       <td>Matteo ADINOLFI</td>
101 |       <td>Italy</td>
102 |       <td>Identity and Democracy Group</td>
103 |       <td>Lega</td>
104 |     </tr>
105 |     <tr>
106 |       <th>4</th>
107 |       <td>Alex AGIUS SALIBA</td>
108 |       <td>Malta</td>
109 |       <td>Group of the Progressive Alliance of Socialists and Democrats in the European Parliament</td>
110 |       <td>Partit Laburista</td>
111 |     </tr>
112 |   </tbody>
113 | </table>
114 | 
115 | 
116 | ---
117 | 
118 | # Text data we are considering
119 | 
120 | I've just had the evidence that confirmed my suspicions. A bunch of kids, 14 to 22 put on the DVD of "Titanic" on a fantastic state of the art mega screen home entertainment type deal. Only two of them had actually seen it before. But they all had seen the moment of Kate, Leo and Celine Dion so many times that most of them felt they had seen the whole movie. Shortly after the epic started, they started to get restless, some of them left asking the others
121 | 
122 | This independent, B&W, DV feature consistently shocks, amazes and amuses with it's ability to create the most insane situations and then find humor and interest in them. It's all hilarious and ridiculous stuff, yet as absurd as much of the film should be, there is a heart and a reality here that keeps the film grounded, keeps the entire piece from drifting into complete craziness and therein lies the real message here.
123 | 
124 | ---
125 | 
126 | # Bag of words
127 | 
128 | .center[
129 | ![:scale 90%](images/countvectorizer.svg)
130 | ]
131 | 
132 | ---
133 | 
134 | # Text processing in scikit-learn
135 | 
136 | ```py
137 | from sklearn.feature_extraction.text import CountVectorizer
138 | 
139 | sample_text = ["Can we go to the mountain tomorrow?",
140 |                "The mountain is really tall"]
141 | 
142 | vect = CountVectorizer()
143 | vect.fit(sample_text)
144 | 
145 | vect.get_feature_names()
146 | # ['be', 'can', 'careful', 'finished', 'go', 'hill', 'homework', 'is', 'my',
147 | # 'please', 'tall', 'the', 'to', 'very', 'we']
148 | 
149 | X = vect.transform(sample_text)
150 | X.toarray()
151 | # array([[0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1],
152 | #        [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0]])
153 | ```
154 | 
155 | ---
156 | 
157 | class: chapter-slide
158 | 
159 | # Notebook 📓!
160 | ## notebooks/01-text-data.ipynb
161 | 
162 | ---
163 | 
164 | # N-grams
165 | 
166 | - Tries to keep words together
167 | - "really tall" and "not tall" has different contexts
168 | 
169 | ![:scale 90%](images/countvectorizer-ngrams.svg)
170 | 
171 | ---
172 | 
173 | class: chapter-slide
174 | 
175 | # Notebook 📓!
176 | ## notebooks/01-text-data.ipynb
177 | 
178 | ---
179 | 
180 | # Tf-idf rescaling
181 | 
182 | $$
183 | \text{tf-idf}(t, d) = tf(t, d) \cdot \text{idf}(t)
184 | $$
185 | $$
186 | \text{idf}(t) = \text{log}\frac{1 + n_d}{1 + \text{df}(d, t)} + 1
187 | $$
188 | 
189 | - $\text{tf}(t, d)$ = The count of term $t$ in document $d$.
190 | - $n_d$ = total number of documents
191 | - $\text{df}(d, t)$ = number of documents containing term $t$
192 | 
193 | - scikit-learn divides each row by its length (L2 normalization)
194 | 
195 | ```py
196 | from sklearn.feature_extraction.text import TfidfVectorizer
197 | ```
198 | 
199 | ---
200 | 
201 | class: chapter-slide
202 | 
203 | # Notebook 📓!
204 | ## notebooks/01-text-data.ipynb
205 | 
206 | ---
207 | 
208 | name: imbalanced
209 | class: chapter-slide
210 | 
211 | # 2. Imbalanced Data
212 | 
213 | .footnote-back[
214 | [Back to Table of Contents](#table-of-contents)
215 | ]
216 | 
217 | ---
218 | 
219 | class: middle
220 | 
221 | # What is imbalanced data?
222 | 
223 | - Cost are different between classes
224 | - Data is imbalanced
225 | - Some datasets have very few positive classes
226 | 
227 | ---
228 | 
229 | # Different Cost between classes
230 | 
231 | .g[
232 | .g-8[
233 | .smaller-x[
234 | ```py
235 | y_pred = log_reg.predict(X_test)
236 | print(classification_report(y_test, y_pred))
237 | ```
238 | 
239 | ```
240 |               precision    recall  f1-score   support
241 | 
242 |        False       0.99      1.00      0.99      2731
243 |         True       0.75      0.37      0.49        65
244 | 
245 |     accuracy                           0.98      2796
246 |    macro avg       0.87      0.68      0.74      2796
247 | weighted avg       0.98      0.98      0.98      2796
248 | ```
249 | 
250 | ```py
251 | y_pred_20 = log_reg.predict_proba(X_test)[:, 1] > 0.25
252 | print(classification_report(y_test, y_pred_20))
253 | ```
254 | 
255 | ```
256 |               precision    recall  f1-score   support
257 | 
258 |        False       0.99      0.99      0.99      2731
259 |         True       0.63      0.55      0.59        65
260 | 
261 |     accuracy                           0.98      2796
262 |    macro avg       0.81      0.77      0.79      2796
263 | weighted avg       0.98      0.98      0.98      2796
264 | ```
265 | ]
266 | ]
267 | .g-4[
268 | $$
269 | \text{precision} = \frac{TP}{TP + FP}
270 | $$
271 | 
272 | $$
273 | \text{recall} = \frac{TP}{TP + FN}
274 | $$
275 | ]
276 | ]
277 | 
278 | ---
279 | 
280 | class: chapter-slide
281 | 
282 | # Notebook 📕!
283 | ## notebooks/02-imbalanced-data.ipynb
284 | 
285 | ---
286 | 
287 | # Resampling
288 | 
289 | ![:scale 60%](images/resampling_approches.png)
290 | 
291 | ---
292 | 
293 | # scikit-learn Pipelines
294 | ### Unable to handle sampling
295 | 
296 | ![](images/pipeline.svg)
297 | 
298 | ---
299 | 
300 | class: middle
301 | 
302 | # Imbalanced-learn
303 | 
304 | [https://imbalanced-learn.org/stable/](https://imbalanced-learn.org/stable/)
305 | 
306 | - Extends scikit-learn API
307 | - Defines samplers
308 | 
309 | ---
310 | 
311 | # Defines pipeline to handle sampling
312 | 
313 | ```py
314 | from imblearn.pipeline import make_pipeline as make_imb_pipeline
315 | 
316 | from imblearn.under_sampling import RandomUnderSampler
317 | from imblearn.over_sampling import RandomOverSampler
318 | ```
319 | 
320 | Fitting and sampling done in one line:
321 | 
322 | ```py
323 | data_resampled, targets_resampled = obj.fit_resample(data, targets)
324 | ```
325 | 
326 | ---
327 | 
328 | class: chapter-slide
329 | 
330 | # Notebook 📕!
331 | ## notebooks/02-imbalanced-data.ipynb
332 | 
333 | ---
334 | 
335 | class: middle
336 | 
337 | # Class-weights
338 | 
339 | - Re-weight the loss functions
340 | - Native to scikit-learn for most models
341 | - Same effect as over-sampling, but keeps the dataset size the same
342 | 
343 | ---
344 | 
345 | # Class-weights
346 | ## Linear models
347 | 
348 | - Loss of a given sample is weighted inversely proportional to class frequencies
349 | 
350 | ```py
351 | LogisticRegression(class_weight='balanced')
352 | ```
353 | 
354 | ## Tree models
355 | 
356 | - When deciding where to split, the `criterion` is weighted inversely proportional to class frequencies
357 | 
358 | ```py
359 | DecisionTreeClassifier(class_weight='balanced')
360 | ```
361 | 
362 | ---
363 | 
364 | class: chapter-slide
365 | 
366 | # Notebook 📕!
367 | ## notebooks/02-imbalanced-data.ipynb
368 | 
369 | ---
370 | 
371 | class: middle
372 | 
373 | # Balanced Bagging
374 | 
375 | - Random under sampling for each instance in the ensemble
376 | 
377 | ```py
378 | from imblearn.ensemble import BalancedRandomForestClassifier
379 | 
380 | balanced_rf = BalancedRandomForestClassifier(random_state=0)
381 | balanced_rf.fit(X_train, y_train)
382 | ```
383 | 
384 | ---
385 | 
386 | class: chapter-slide
387 | 
388 | # Notebook 📕!
389 | ## notebooks/02-imbalanced-data.ipynb
390 | 
391 | ---
392 | 
393 | # Synthetic Minority Oversampling Technique
394 | ## SMOTE
395 | 
396 | - Adds synthetic interpolated data to the minority class
397 | - For each sample in minority class:
398 |     - Pick random neighbor from k neighbors.
399 |     - Pick point on line connecting the two uniformly
400 | 
401 | ---
402 | 
403 | # Generated Data with SMOTE
404 | 
405 | ![](notebooks/images/smote_generated.png)
406 | 
407 | ---
408 | 
409 | class: chapter-slide
410 | 
411 | ## Notebook 📕!
412 | ### notebooks/02-imbalanced-data.ipynb
413 | 
414 | ---
415 | 
416 | name: poisson
417 | class: chapter-slide
418 | 
419 | # 3. Poisson Regression
420 | 
421 | .footnote-back[
422 | [Back to Table of Contents](#table-of-contents)
423 | ]
424 | 
425 | ---
426 | 
427 | # Generalized Linear Models
428 | 
429 | Predicted values $\hat{y}$ are linked to a linear combination of $X$ via an
430 | inverse link function:
431 | 
432 | $$
433 | \hat{y}(w, X) = h(Xw)
434 | $$
435 | 
436 | The minimization problem becomes:
437 | 
438 | $$
439 | \min_{w}\frac{1}{2n}\sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2
440 | $$
441 | 
442 | - $n$ is the number of samples
443 | - $\alpha$ is the L2 regularization penalty,
444 | - $d(y_i, \hat{y}_i)$ is the unit deviance
445 | 
446 | ---
447 | 
448 | # Unit Deviances
449 | 
450 | ![](images/glm_unit_deviance.png)
451 | 
452 | ![:scale 100%](images/poisson_gamma_tweedie_distributions.png)
453 | 
454 | ---
455 | 
456 | class: middle
457 | 
458 | # Scikit-learn supports GLMS!
459 | 
460 | `TweedieRegressor` implements GLM for the Tweedie distribution with a `power` parameter
461 | ```py
462 | from sklearn.linear_model import TweedieRegressor
463 | ```
464 | 
465 | `power=0`: Normal distribution: Use `Ridge`, `ElasticNet`, etc.
466 | 
467 | ---
468 | 
469 | class: middle
470 | 
471 | # Convenience estimators GLM
472 | 
473 | Same as `TweedieRegressor(power=1, link='log')`
474 | ```py
475 | from sklearn.linear_model import PoissonRegressor
476 | ```
477 | 
478 | Same as `TweedieRegressor(power=2, link='log')`
479 | ```py
480 | from sklearn.linear_model import GammaRegressor
481 | ```
482 | 
483 | ---
484 | 
485 | # Examples of use cases for GLMS
486 | 
487 | - Agriculture / weather modeling:
488 |     - number of rain events per year (Poisson)
489 |     - amount of rainfall per event (Gamma)
490 | 
491 | - Risk modeling / insurance policy pricing:
492 |     -  number of claim events / policyholder per year (Poisson)
493 |     - cost per event (Gamma)
494 | 
495 | - Predictive maintenance:
496 |     - number of production interruption events per year (Poisson)
497 |     - duration of interruption (Gamma)
498 | 
499 | ---
500 | 
501 | # Calibration for Regressors
502 | 
503 | .g.g-middle[
504 | .g-6[
505 | ![](images/calibration-regression.svg)
506 | ]
507 | .g-6[
508 | ![:scale 120%](images/calibration_regression.png)
509 | ]
510 | ]
511 | 
512 | ---
513 | 
514 | class: chapter-slide
515 | 
516 | # Notebook 📘!
517 | ## notebooks/03-poisson-regression.ipynb
518 | 
519 | ---
520 | 
521 | class: title-slide, left
522 | 
523 | # Closing
524 | 
525 | .g.g-middle[
526 | .g-7[
527 | ![:scale 30%](images/scikit-learn-logo-notext.png)
528 | 1. [Text data](#text)
529 | 1. [Imbalanced Data](#imbalanced)
530 | 1. [Poisson Regression](#poisson)
531 | ]
532 | .g-5.center[
533 | <br>
534 | .larger[Thomas J. Fan]<br>
535 | @thomasjpfan<br>
536 | <a href="https://www.github.com/thomasjpfan" target="_blank"><span class="icon icon-github icon-left"></span></a>
537 | <a href="https://www.twitter.com/thomasjpfan" target="_blank"><span class="icon icon-twitter"></span></a>
538 | <a class="this-talk-link", href="https://github.com/thomasjpfan/ml-workshop-advanced" target="_blank">
539 | This workshop on Github: github.com/thomasjpfan/ml-workshop-advanced</a>
540 | ]
541 | ]
542 | </textarea>
543 |   <script src="./assets/remark.min.js"></script>
544 |   <script>
545 |     var options = {
546 |       ratio: '16:9',
547 |       highlightLanguage: "python",
548 |       highlightStyle: "github",
549 |       highlightLines: true,
550 |       highlightSpans: true,
551 |       slideNumberFormat: '',
552 |     };
553 |     remark.macros['scale'] = function (percentage) {
554 |         var url = this;
555 |         return '<img src="' + url + '" style="width: ' + percentage + '" />';
556 |     };
557 |     
558 |     var renderMath = function () {
559 |       renderMathInElement(document.body, {
560 |         delimiters: [
561 |           { left: "$$", right: "$$", display: true },
562 |           { left: "$", right: "$", display: false },
563 |           { left: "\\[", right: "\\]", display: true },
564 |           { left: "\\(", right: "\\)", display: false },
565 |         ]
566 |       });
567 |     }
568 |     var slideshow = remark.create(options, renderMath);
569 |     
570 |   </script>
571 | </body>
572 | 
573 | </html>


--------------------------------------------------------------------------------
/maint_tools/check_notebooks.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | 
3 | for f in notebooks/0*.ipynb; do
4 |  jupyter nbconvert --execute $f --to notebook --stdout >/dev/null
5 | done
6 | 


--------------------------------------------------------------------------------
/make.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from jinja2 import Template
 3 | from livereload import Server
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def build():
 8 |     print("building index.html")
 9 | 
10 |     assets = Path("assets")
11 | 
12 |     with (assets / "index.html.jinja").open('r') as f:
13 |         template = Template(f.read())
14 | 
15 |     with open("slides.md", 'r') as f:
16 |         slides = f.readlines()
17 | 
18 |     # get metadata up to the first title
19 |     valid_metadata_keys = set(['title', 'use_katex'])
20 |     metadata = {}
21 |     for line in slides:
22 | 
23 |         # first title
24 |         if line.startswith("#") or len(metadata) == len(valid_metadata_keys):
25 |             break
26 |         line_split = line.split(":", maxsplit=1)
27 |         if len(line_split) != 2:
28 |             continue
29 | 
30 |         key = line_split[0]
31 |         if key not in valid_metadata_keys:
32 |             continue
33 | 
34 |         value = line_split[1].strip()
35 |         metadata[key] = value
36 | 
37 |     if len(metadata) != 2:
38 |         raise ValueError("Be sure to include title: and use_katex as metadata "
39 |                          "in slides.md file")
40 | 
41 |     output = template.render(title=metadata['title'],
42 |                              use_katex=metadata['use_katex'] == 'True',
43 |                              slides="".join(slides))
44 | 
45 |     with open("index.html", 'w') as f:
46 |         f.write(output)
47 | 
48 | 
49 | def live():
50 |     print("Serving index.html")
51 |     cur_dir = Path('.')
52 | 
53 |     server = Server()
54 |     server.watch("slides.md", build)
55 |     server.watch(str(cur_dir / 'assets' / "style.css"))
56 |     server.serve(open_url_delay=2)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     parser = argparse.ArgumentParser(description="Building slides")
61 |     parser.add_argument("action", choices=['build', 'live'])
62 | 
63 |     args = parser.parse_args()
64 | 
65 |     if args.action == 'build':
66 |         build()
67 |     else:
68 |         live()
69 | 


--------------------------------------------------------------------------------
/notebooks/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/.gitkeep


--------------------------------------------------------------------------------
/notebooks/02-imbalanced-data.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Imbalanced data\n",
   8 |     "\n",
   9 |     "<a href=\"https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/02-imbalanced-data.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open in Colab\" title=\"Open and Execute in Google Colaboratory\"></a>"
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "code",
  14 |    "execution_count": null,
  15 |    "metadata": {},
  16 |    "outputs": [],
  17 |    "source": [
  18 |     "# Install dependencies for google colab\n",
  19 |     "import sys\n",
  20 |     "IN_COLAB = 'google.colab' in sys.modules\n",
  21 |     "if IN_COLAB:\n",
  22 |     "    %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/requirements.txt"
  23 |    ]
  24 |   },
  25 |   {
  26 |    "cell_type": "code",
  27 |    "execution_count": null,
  28 |    "metadata": {},
  29 |    "outputs": [],
  30 |    "source": [
  31 |     "import sklearn\n",
  32 |     "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\""
  33 |    ]
  34 |   },
  35 |   {
  36 |    "cell_type": "code",
  37 |    "execution_count": null,
  38 |    "metadata": {},
  39 |    "outputs": [],
  40 |    "source": [
  41 |     "import numpy as np\n",
  42 |     "import pandas as pd\n",
  43 |     "import matplotlib.pyplot as plt\n",
  44 |     "import warnings\n",
  45 |     "warnings.filterwarnings(\"ignore\", category=UserWarning)\n",
  46 |     "\n",
  47 |     "plt.rcParams['font.size'] = 16\n",
  48 |     "plt.rcParams['figure.figsize'] = [12, 8]\n",
  49 |     "\n",
  50 |     "sklearn.set_config(display='diagram')"
  51 |    ]
  52 |   },
  53 |   {
  54 |    "cell_type": "markdown",
  55 |    "metadata": {},
  56 |    "source": [
  57 |     "## Load Mammography Data"
  58 |    ]
  59 |   },
  60 |   {
  61 |    "cell_type": "code",
  62 |    "execution_count": null,
  63 |    "metadata": {},
  64 |    "outputs": [],
  65 |    "source": [
  66 |     "from sklearn.datasets import fetch_openml\n",
  67 |     "\n",
  68 |     "mammography = fetch_openml(data_id=310)\n",
  69 |     "X, y = mammography.data, mammography.target"
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "code",
  74 |    "execution_count": null,
  75 |    "metadata": {},
  76 |    "outputs": [],
  77 |    "source": [
  78 |     "y = (y == '1').astype(int)"
  79 |    ]
  80 |   },
  81 |   {
  82 |    "cell_type": "code",
  83 |    "execution_count": null,
  84 |    "metadata": {},
  85 |    "outputs": [],
  86 |    "source": [
  87 |     "np.bincount(y)"
  88 |    ]
  89 |   },
  90 |   {
  91 |    "cell_type": "markdown",
  92 |    "metadata": {},
  93 |    "source": [
  94 |     "## Split data into train test split"
  95 |    ]
  96 |   },
  97 |   {
  98 |    "cell_type": "code",
  99 |    "execution_count": null,
 100 |    "metadata": {},
 101 |    "outputs": [],
 102 |    "source": [
 103 |     "from sklearn.model_selection import train_test_split\n",
 104 |     "\n",
 105 |     "X_train, X_test, y_train, y_test = train_test_split(\n",
 106 |     "    X, y, stratify=y, random_state=0)"
 107 |    ]
 108 |   },
 109 |   {
 110 |    "cell_type": "markdown",
 111 |    "metadata": {
 112 |     "tags": []
 113 |    },
 114 |    "source": [
 115 |     "### Base models"
 116 |    ]
 117 |   },
 118 |   {
 119 |    "cell_type": "markdown",
 120 |    "metadata": {},
 121 |    "source": [
 122 |     "#### DummyClassifier"
 123 |    ]
 124 |   },
 125 |   {
 126 |    "cell_type": "code",
 127 |    "execution_count": null,
 128 |    "metadata": {},
 129 |    "outputs": [],
 130 |    "source": [
 131 |     "from sklearn.model_selection import cross_validate\n",
 132 |     "from sklearn.dummy import DummyClassifier"
 133 |    ]
 134 |   },
 135 |   {
 136 |    "cell_type": "code",
 137 |    "execution_count": null,
 138 |    "metadata": {},
 139 |    "outputs": [],
 140 |    "source": [
 141 |     "dc = DummyClassifier()"
 142 |    ]
 143 |   },
 144 |   {
 145 |    "cell_type": "code",
 146 |    "execution_count": null,
 147 |    "metadata": {},
 148 |    "outputs": [],
 149 |    "source": [
 150 |     "dc_reg_scores = cross_validate(dc, X_train, y_train, scoring=['roc_auc', 'average_precision', \"accuracy\"], cv=5)"
 151 |    ]
 152 |   },
 153 |   {
 154 |    "cell_type": "code",
 155 |    "execution_count": null,
 156 |    "metadata": {},
 157 |    "outputs": [],
 158 |    "source": [
 159 |     "dc_reg_scores"
 160 |    ]
 161 |   },
 162 |   {
 163 |    "cell_type": "code",
 164 |    "execution_count": null,
 165 |    "metadata": {},
 166 |    "outputs": [],
 167 |    "source": [
 168 |     "dc_reg_scores[\"test_accuracy\"].mean()"
 169 |    ]
 170 |   },
 171 |   {
 172 |    "cell_type": "code",
 173 |    "execution_count": null,
 174 |    "metadata": {},
 175 |    "outputs": [],
 176 |    "source": [
 177 |     "dc_reg_scores[\"test_average_precision\"].mean()"
 178 |    ]
 179 |   },
 180 |   {
 181 |    "cell_type": "code",
 182 |    "execution_count": null,
 183 |    "metadata": {},
 184 |    "outputs": [],
 185 |    "source": [
 186 |     "dc_reg_scores[\"test_roc_auc\"].mean()"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": null,
 192 |    "metadata": {},
 193 |    "outputs": [],
 194 |    "source": [
 195 |     "from sklearn.metrics import get_scorer\n",
 196 |     "\n",
 197 |     "def compute_metrics(estimator):\n",
 198 |     "    cv_results = cross_validate(estimator,\n",
 199 |     "                                X_train, y_train, scoring=['roc_auc', 'average_precision'],\n",
 200 |     "                                cv=5)\n",
 201 |     "    return {\n",
 202 |     "        \"roc_auc\": cv_results[\"test_roc_auc\"].mean(),\n",
 203 |     "        \"avg_precision\": cv_results[\"test_average_precision\"].mean(),\n",
 204 |     "    }"
 205 |    ]
 206 |   },
 207 |   {
 208 |    "cell_type": "markdown",
 209 |    "metadata": {
 210 |     "tags": []
 211 |    },
 212 |    "source": [
 213 |     "#### Linear model"
 214 |    ]
 215 |   },
 216 |   {
 217 |    "cell_type": "code",
 218 |    "execution_count": null,
 219 |    "metadata": {},
 220 |    "outputs": [],
 221 |    "source": [
 222 |     "from sklearn.model_selection import cross_validate\n",
 223 |     "from sklearn.linear_model import LogisticRegression"
 224 |    ]
 225 |   },
 226 |   {
 227 |    "cell_type": "code",
 228 |    "execution_count": null,
 229 |    "metadata": {},
 230 |    "outputs": [],
 231 |    "source": [
 232 |     "base_log_reg = LogisticRegression()"
 233 |    ]
 234 |   },
 235 |   {
 236 |    "cell_type": "code",
 237 |    "execution_count": null,
 238 |    "metadata": {},
 239 |    "outputs": [],
 240 |    "source": [
 241 |     "base_log_reg_metrics = compute_metrics(base_log_reg)\n",
 242 |     "base_log_reg_metrics"
 243 |    ]
 244 |   },
 245 |   {
 246 |    "cell_type": "markdown",
 247 |    "metadata": {},
 248 |    "source": [
 249 |     "#### Random Forest"
 250 |    ]
 251 |   },
 252 |   {
 253 |    "cell_type": "code",
 254 |    "execution_count": null,
 255 |    "metadata": {},
 256 |    "outputs": [],
 257 |    "source": [
 258 |     "from sklearn.ensemble import RandomForestClassifier"
 259 |    ]
 260 |   },
 261 |   {
 262 |    "cell_type": "code",
 263 |    "execution_count": null,
 264 |    "metadata": {},
 265 |    "outputs": [],
 266 |    "source": [
 267 |     "base_rf = RandomForestClassifier(random_state=42, n_jobs=-1)"
 268 |    ]
 269 |   },
 270 |   {
 271 |    "cell_type": "code",
 272 |    "execution_count": null,
 273 |    "metadata": {},
 274 |    "outputs": [],
 275 |    "source": [
 276 |     "base_rf_metrics = compute_metrics(base_rf)\n",
 277 |     "base_rf_metrics"
 278 |    ]
 279 |   },
 280 |   {
 281 |    "cell_type": "markdown",
 282 |    "metadata": {},
 283 |    "source": [
 284 |     "### Imbalance-learn sampler"
 285 |    ]
 286 |   },
 287 |   {
 288 |    "cell_type": "markdown",
 289 |    "metadata": {},
 290 |    "source": [
 291 |     "#### Under sampler"
 292 |    ]
 293 |   },
 294 |   {
 295 |    "cell_type": "code",
 296 |    "execution_count": null,
 297 |    "metadata": {},
 298 |    "outputs": [],
 299 |    "source": [
 300 |     "np.bincount(y_train)"
 301 |    ]
 302 |   },
 303 |   {
 304 |    "cell_type": "code",
 305 |    "execution_count": null,
 306 |    "metadata": {},
 307 |    "outputs": [],
 308 |    "source": [
 309 |     "from imblearn.under_sampling import RandomUnderSampler"
 310 |    ]
 311 |   },
 312 |   {
 313 |    "cell_type": "code",
 314 |    "execution_count": null,
 315 |    "metadata": {},
 316 |    "outputs": [],
 317 |    "source": [
 318 |     "under_sampler = RandomUnderSampler(random_state=42)"
 319 |    ]
 320 |   },
 321 |   {
 322 |    "cell_type": "code",
 323 |    "execution_count": null,
 324 |    "metadata": {},
 325 |    "outputs": [],
 326 |    "source": [
 327 |     "X_train_subsample, y_train_subsample = under_sampler.fit_resample(X_train, y_train)"
 328 |    ]
 329 |   },
 330 |   {
 331 |    "cell_type": "code",
 332 |    "execution_count": null,
 333 |    "metadata": {},
 334 |    "outputs": [],
 335 |    "source": [
 336 |     "X_train.shape"
 337 |    ]
 338 |   },
 339 |   {
 340 |    "cell_type": "code",
 341 |    "execution_count": null,
 342 |    "metadata": {},
 343 |    "outputs": [],
 344 |    "source": [
 345 |     "X_train_subsample.shape"
 346 |    ]
 347 |   },
 348 |   {
 349 |    "cell_type": "code",
 350 |    "execution_count": null,
 351 |    "metadata": {},
 352 |    "outputs": [],
 353 |    "source": [
 354 |     "np.bincount(y_train_subsample)"
 355 |    ]
 356 |   },
 357 |   {
 358 |    "cell_type": "markdown",
 359 |    "metadata": {},
 360 |    "source": [
 361 |     "#### Oversampling"
 362 |    ]
 363 |   },
 364 |   {
 365 |    "cell_type": "code",
 366 |    "execution_count": null,
 367 |    "metadata": {},
 368 |    "outputs": [],
 369 |    "source": [
 370 |     "from imblearn.over_sampling import RandomOverSampler"
 371 |    ]
 372 |   },
 373 |   {
 374 |    "cell_type": "code",
 375 |    "execution_count": null,
 376 |    "metadata": {},
 377 |    "outputs": [],
 378 |    "source": [
 379 |     "over_sampler = RandomOverSampler(random_state=42)"
 380 |    ]
 381 |   },
 382 |   {
 383 |    "cell_type": "code",
 384 |    "execution_count": null,
 385 |    "metadata": {},
 386 |    "outputs": [],
 387 |    "source": [
 388 |     "X_train_subsample, y_train_subsample = over_sampler.fit_resample(X_train, y_train)"
 389 |    ]
 390 |   },
 391 |   {
 392 |    "cell_type": "code",
 393 |    "execution_count": null,
 394 |    "metadata": {},
 395 |    "outputs": [],
 396 |    "source": [
 397 |     "X_train_subsample.shape"
 398 |    ]
 399 |   },
 400 |   {
 401 |    "cell_type": "code",
 402 |    "execution_count": null,
 403 |    "metadata": {},
 404 |    "outputs": [],
 405 |    "source": [
 406 |     "np.bincount(y_train_subsample)"
 407 |    ]
 408 |   },
 409 |   {
 410 |    "cell_type": "markdown",
 411 |    "metadata": {},
 412 |    "source": [
 413 |     "## Pipelines with imblean\n",
 414 |     "\n",
 415 |     "### Linear model with under sampling"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "code",
 420 |    "execution_count": null,
 421 |    "metadata": {},
 422 |    "outputs": [],
 423 |    "source": [
 424 |     "from imblearn.pipeline import make_pipeline as make_imb_pipeline"
 425 |    ]
 426 |   },
 427 |   {
 428 |    "cell_type": "code",
 429 |    "execution_count": null,
 430 |    "metadata": {},
 431 |    "outputs": [],
 432 |    "source": [
 433 |     "under_log_reg = make_imb_pipeline(\n",
 434 |     "    RandomUnderSampler(random_state=42), LogisticRegression(random_state=42))"
 435 |    ]
 436 |   },
 437 |   {
 438 |    "cell_type": "code",
 439 |    "execution_count": null,
 440 |    "metadata": {},
 441 |    "outputs": [],
 442 |    "source": [
 443 |     "base_log_reg_metrics"
 444 |    ]
 445 |   },
 446 |   {
 447 |    "cell_type": "code",
 448 |    "execution_count": null,
 449 |    "metadata": {},
 450 |    "outputs": [],
 451 |    "source": [
 452 |     "compute_metrics(under_log_reg)"
 453 |    ]
 454 |   },
 455 |   {
 456 |    "cell_type": "markdown",
 457 |    "metadata": {},
 458 |    "source": [
 459 |     "### Random Forest with under sampling"
 460 |    ]
 461 |   },
 462 |   {
 463 |    "cell_type": "code",
 464 |    "execution_count": null,
 465 |    "metadata": {},
 466 |    "outputs": [],
 467 |    "source": [
 468 |     "under_rf = make_imb_pipeline(\n",
 469 |     "    RandomUnderSampler(random_state=42), RandomForestClassifier(random_state=42))"
 470 |    ]
 471 |   },
 472 |   {
 473 |    "cell_type": "code",
 474 |    "execution_count": null,
 475 |    "metadata": {},
 476 |    "outputs": [],
 477 |    "source": [
 478 |     "base_rf_metrics"
 479 |    ]
 480 |   },
 481 |   {
 482 |    "cell_type": "code",
 483 |    "execution_count": null,
 484 |    "metadata": {},
 485 |    "outputs": [],
 486 |    "source": [
 487 |     "compute_metrics(under_rf)"
 488 |    ]
 489 |   },
 490 |   {
 491 |    "cell_type": "markdown",
 492 |    "metadata": {},
 493 |    "source": [
 494 |     "### Linear model with over sampling"
 495 |    ]
 496 |   },
 497 |   {
 498 |    "cell_type": "code",
 499 |    "execution_count": null,
 500 |    "metadata": {},
 501 |    "outputs": [],
 502 |    "source": [
 503 |     "over_log_reg = make_imb_pipeline(\n",
 504 |     "    RandomOverSampler(random_state=42), LogisticRegression(random_state=42))"
 505 |    ]
 506 |   },
 507 |   {
 508 |    "cell_type": "code",
 509 |    "execution_count": null,
 510 |    "metadata": {},
 511 |    "outputs": [],
 512 |    "source": [
 513 |     "base_log_reg_metrics"
 514 |    ]
 515 |   },
 516 |   {
 517 |    "cell_type": "code",
 518 |    "execution_count": null,
 519 |    "metadata": {},
 520 |    "outputs": [],
 521 |    "source": [
 522 |     "compute_metrics(over_log_reg)"
 523 |    ]
 524 |   },
 525 |   {
 526 |    "cell_type": "markdown",
 527 |    "metadata": {},
 528 |    "source": [
 529 |     "## Exercise 1\n",
 530 |     "\n",
 531 |     "1. Use `make_imb_pipeline` with `RandomOverSampler(random_state=42)` to create a pipline with random forset called `over_rf`.\n",
 532 |     "1. Compute our metrics using `compute_metrics`."
 533 |    ]
 534 |   },
 535 |   {
 536 |    "cell_type": "code",
 537 |    "execution_count": null,
 538 |    "metadata": {},
 539 |    "outputs": [],
 540 |    "source": []
 541 |   },
 542 |   {
 543 |    "cell_type": "markdown",
 544 |    "metadata": {},
 545 |    "source": [
 546 |     "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/02-ex01-solutions.py)."
 547 |    ]
 548 |   },
 549 |   {
 550 |    "cell_type": "code",
 551 |    "execution_count": null,
 552 |    "metadata": {},
 553 |    "outputs": [],
 554 |    "source": [
 555 |     "# %load solutions/02-ex01-solutions.py"
 556 |    ]
 557 |   },
 558 |   {
 559 |    "cell_type": "markdown",
 560 |    "metadata": {},
 561 |    "source": [
 562 |     "## Plotting curves for logistic regression"
 563 |    ]
 564 |   },
 565 |   {
 566 |    "cell_type": "code",
 567 |    "execution_count": null,
 568 |    "metadata": {},
 569 |    "outputs": [],
 570 |    "source": [
 571 |     "base_log_reg.fit(X_train, y_train)\n",
 572 |     "under_log_reg.fit(X_train, y_train)\n",
 573 |     "over_log_reg.fit(X_train, y_train);"
 574 |    ]
 575 |   },
 576 |   {
 577 |    "cell_type": "markdown",
 578 |    "metadata": {},
 579 |    "source": [
 580 |     "### Plotting"
 581 |    ]
 582 |   },
 583 |   {
 584 |    "cell_type": "code",
 585 |    "execution_count": null,
 586 |    "metadata": {},
 587 |    "outputs": [],
 588 |    "source": [
 589 |     "from sklearn.metrics import PrecisionRecallDisplay\n",
 590 |     "from sklearn.metrics import RocCurveDisplay"
 591 |    ]
 592 |   },
 593 |   {
 594 |    "cell_type": "code",
 595 |    "execution_count": null,
 596 |    "metadata": {},
 597 |    "outputs": [],
 598 |    "source": [
 599 |     "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 8))\n",
 600 |     "RocCurveDisplay.from_estimator(base_log_reg, X_test, y_test, ax=ax1, name=\"original\")\n",
 601 |     "RocCurveDisplay.from_estimator(under_log_reg, X_test, y_test, ax=ax1, name=\"undersampling\")\n",
 602 |     "RocCurveDisplay.from_estimator(over_log_reg, X_test, y_test, ax=ax1, name=\"oversampling\")\n",
 603 |     "\n",
 604 |     "PrecisionRecallDisplay.from_estimator(base_log_reg, X_test, y_test, ax=ax2, name=\"original\")\n",
 605 |     "PrecisionRecallDisplay.from_estimator(under_log_reg, X_test, y_test, ax=ax2, name=\"undersampling\")\n",
 606 |     "PrecisionRecallDisplay.from_estimator(over_log_reg, X_test, y_test, ax=ax2, name=\"oversampling\");"
 607 |    ]
 608 |   },
 609 |   {
 610 |    "cell_type": "markdown",
 611 |    "metadata": {},
 612 |    "source": [
 613 |     "We create a custom plotting function for future use:"
 614 |    ]
 615 |   },
 616 |   {
 617 |    "cell_type": "code",
 618 |    "execution_count": null,
 619 |    "metadata": {},
 620 |    "outputs": [],
 621 |    "source": [
 622 |     "def plot_roc_and_precision_recall_curves(estimators):\n",
 623 |     "    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 8))\n",
 624 |     "    for name, est in estimators:\n",
 625 |     "        RocCurveDisplay.from_estimator(est, X_test, y_test, ax=ax1, name=name)\n",
 626 |     "        PrecisionRecallDisplay.from_estimator(est, X_test, y_test, ax=ax2, name=name)"
 627 |    ]
 628 |   },
 629 |   {
 630 |    "cell_type": "code",
 631 |    "execution_count": null,
 632 |    "metadata": {},
 633 |    "outputs": [],
 634 |    "source": [
 635 |     "plot_roc_and_precision_recall_curves([(\"original\", base_log_reg),\n",
 636 |     "                                      (\"undersampling\", under_log_reg),\n",
 637 |     "                                      (\"oversampling\", over_log_reg)])"
 638 |    ]
 639 |   },
 640 |   {
 641 |    "cell_type": "markdown",
 642 |    "metadata": {},
 643 |    "source": [
 644 |     "## Exercise 2\n",
 645 |     "\n",
 646 |     "1. Train the three random forest models, `base_rf`, `under_rf`, `over_rf`.\n",
 647 |     "1. Plot the roc and precision recall for the three random forest models."
 648 |    ]
 649 |   },
 650 |   {
 651 |    "cell_type": "code",
 652 |    "execution_count": null,
 653 |    "metadata": {},
 654 |    "outputs": [],
 655 |    "source": []
 656 |   },
 657 |   {
 658 |    "cell_type": "markdown",
 659 |    "metadata": {},
 660 |    "source": [
 661 |     "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/02-ex02-solutions.py)."
 662 |    ]
 663 |   },
 664 |   {
 665 |    "cell_type": "code",
 666 |    "execution_count": null,
 667 |    "metadata": {},
 668 |    "outputs": [],
 669 |    "source": [
 670 |     "# %load solutions/02-ex02-solutions.py"
 671 |    ]
 672 |   },
 673 |   {
 674 |    "cell_type": "markdown",
 675 |    "metadata": {},
 676 |    "source": [
 677 |     "#### Back to slides!"
 678 |    ]
 679 |   },
 680 |   {
 681 |    "cell_type": "markdown",
 682 |    "metadata": {},
 683 |    "source": [
 684 |     "## Class-Weights"
 685 |    ]
 686 |   },
 687 |   {
 688 |    "cell_type": "markdown",
 689 |    "metadata": {},
 690 |    "source": [
 691 |     "#### Linear model with class weights"
 692 |    ]
 693 |   },
 694 |   {
 695 |    "cell_type": "code",
 696 |    "execution_count": null,
 697 |    "metadata": {},
 698 |    "outputs": [],
 699 |    "source": [
 700 |     "class_weight_log_reg = LogisticRegression(class_weight='balanced')\n",
 701 |     "\n",
 702 |     "class_weight_log_reg.fit(X_train, y_train)"
 703 |    ]
 704 |   },
 705 |   {
 706 |    "cell_type": "code",
 707 |    "execution_count": null,
 708 |    "metadata": {},
 709 |    "outputs": [],
 710 |    "source": [
 711 |     "plot_roc_and_precision_recall_curves([(\"original\", base_log_reg),\n",
 712 |     "                                      (\"class weighted\", class_weight_log_reg)])"
 713 |    ]
 714 |   },
 715 |   {
 716 |    "cell_type": "markdown",
 717 |    "metadata": {},
 718 |    "source": [
 719 |     "#### Random forest with class weights "
 720 |    ]
 721 |   },
 722 |   {
 723 |    "cell_type": "code",
 724 |    "execution_count": null,
 725 |    "metadata": {},
 726 |    "outputs": [],
 727 |    "source": [
 728 |     "class_weight_rf = RandomForestClassifier(class_weight='balanced', random_state=42)\n",
 729 |     "class_weight_rf.fit(X_train, y_train)"
 730 |    ]
 731 |   },
 732 |   {
 733 |    "cell_type": "code",
 734 |    "execution_count": null,
 735 |    "metadata": {},
 736 |    "outputs": [],
 737 |    "source": [
 738 |     "base_rf.fit(X_train, y_train)"
 739 |    ]
 740 |   },
 741 |   {
 742 |    "cell_type": "code",
 743 |    "execution_count": null,
 744 |    "metadata": {},
 745 |    "outputs": [],
 746 |    "source": [
 747 |     "plot_roc_and_precision_recall_curves([(\"original\", base_rf),\n",
 748 |     "                                      (\"class weighted\", class_weight_rf)])"
 749 |    ]
 750 |   },
 751 |   {
 752 |    "cell_type": "markdown",
 753 |    "metadata": {},
 754 |    "source": [
 755 |     "#### Back to slides!"
 756 |    ]
 757 |   },
 758 |   {
 759 |    "cell_type": "markdown",
 760 |    "metadata": {},
 761 |    "source": [
 762 |     "## Ensemble Resampling"
 763 |    ]
 764 |   },
 765 |   {
 766 |    "cell_type": "code",
 767 |    "execution_count": null,
 768 |    "metadata": {},
 769 |    "outputs": [],
 770 |    "source": [
 771 |     "from imblearn.ensemble import BalancedRandomForestClassifier\n",
 772 |     "from sklearn.tree import DecisionTreeClassifier"
 773 |    ]
 774 |   },
 775 |   {
 776 |    "cell_type": "code",
 777 |    "execution_count": null,
 778 |    "metadata": {},
 779 |    "outputs": [],
 780 |    "source": [
 781 |     "balanced_rf = BalancedRandomForestClassifier(random_state=0)\n",
 782 |     "\n",
 783 |     "balanced_rf.fit(X_train, y_train)"
 784 |    ]
 785 |   },
 786 |   {
 787 |    "cell_type": "code",
 788 |    "execution_count": null,
 789 |    "metadata": {},
 790 |    "outputs": [],
 791 |    "source": [
 792 |     "under_rf.fit(X_train, y_train)\n",
 793 |     "\n",
 794 |     "over_rf = make_imb_pipeline(RandomOverSampler(random_state=42), RandomForestClassifier(random_state=42))\n",
 795 |     "over_rf.fit(X_train, y_train)"
 796 |    ]
 797 |   },
 798 |   {
 799 |    "cell_type": "code",
 800 |    "execution_count": null,
 801 |    "metadata": {},
 802 |    "outputs": [],
 803 |    "source": [
 804 |     "plot_roc_and_precision_recall_curves(\n",
 805 |     "    [\n",
 806 |     "        (\"original\", base_rf),\n",
 807 |     "        (\"undersampling\", under_rf),\n",
 808 |     "        (\"oversampling\", over_rf),\n",
 809 |     "        (\"balanced bagging\", balanced_rf)\n",
 810 |     "    ]\n",
 811 |     ")"
 812 |    ]
 813 |   },
 814 |   {
 815 |    "cell_type": "markdown",
 816 |    "metadata": {},
 817 |    "source": [
 818 |     "#### Back to slides!"
 819 |    ]
 820 |   },
 821 |   {
 822 |    "cell_type": "markdown",
 823 |    "metadata": {},
 824 |    "source": [
 825 |     "## SMOTE"
 826 |    ]
 827 |   },
 828 |   {
 829 |    "cell_type": "code",
 830 |    "execution_count": null,
 831 |    "metadata": {},
 832 |    "outputs": [],
 833 |    "source": [
 834 |     "from imblearn.over_sampling import SMOTE\n",
 835 |     "\n",
 836 |     "smote = SMOTE(random_state=42)\n",
 837 |     "X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)\n",
 838 |     "\n",
 839 |     "X_train_smote.shape"
 840 |    ]
 841 |   },
 842 |   {
 843 |    "cell_type": "code",
 844 |    "execution_count": null,
 845 |    "metadata": {},
 846 |    "outputs": [],
 847 |    "source": [
 848 |     "np.bincount(y_train_smote)"
 849 |    ]
 850 |   },
 851 |   {
 852 |    "cell_type": "code",
 853 |    "execution_count": null,
 854 |    "metadata": {},
 855 |    "outputs": [],
 856 |    "source": [
 857 |     "fig, axes = plt.subplots(1, 2, figsize=(16, 8))\n",
 858 |     "sorting = np.argsort(y_train)\n",
 859 |     "\n",
 860 |     "axes[0].set_title(\"Original\")\n",
 861 |     "axes[0].scatter(X_train.iloc[sorting, 3], X_train.iloc[sorting, 4], c=plt.cm.tab10(y_train.iloc[sorting]), alpha=.3, s=2)\n",
 862 |     "\n",
 863 |     "axes[1].set_title(\"SMOTE\")\n",
 864 |     "axes[1].scatter(X_train_smote.iloc[:, 3], X_train_smote.iloc[:, 4], c=plt.cm.tab10(y_train_smote), alpha=1, s=2)"
 865 |    ]
 866 |   },
 867 |   {
 868 |    "cell_type": "code",
 869 |    "execution_count": null,
 870 |    "metadata": {},
 871 |    "outputs": [],
 872 |    "source": [
 873 |     "base_log_reg_metrics"
 874 |    ]
 875 |   },
 876 |   {
 877 |    "cell_type": "code",
 878 |    "execution_count": null,
 879 |    "metadata": {},
 880 |    "outputs": [],
 881 |    "source": [
 882 |     "smote_log_reg = make_imb_pipeline(\n",
 883 |     "    SMOTE(random_state=42), LogisticRegression())\n",
 884 |     "compute_metrics(smote_log_reg)"
 885 |    ]
 886 |   },
 887 |   {
 888 |    "cell_type": "code",
 889 |    "execution_count": null,
 890 |    "metadata": {},
 891 |    "outputs": [],
 892 |    "source": [
 893 |     "base_rf_metrics"
 894 |    ]
 895 |   },
 896 |   {
 897 |    "cell_type": "code",
 898 |    "execution_count": null,
 899 |    "metadata": {},
 900 |    "outputs": [],
 901 |    "source": [
 902 |     "smote_rf = make_imb_pipeline(SMOTE(random_state=42), RandomForestClassifier(random_state=42, n_jobs=-1))\n",
 903 |     "compute_metrics(smote_rf)"
 904 |    ]
 905 |   },
 906 |   {
 907 |    "cell_type": "markdown",
 908 |    "metadata": {},
 909 |    "source": [
 910 |     "## Plotting all the version of random forest"
 911 |    ]
 912 |   },
 913 |   {
 914 |    "cell_type": "code",
 915 |    "execution_count": null,
 916 |    "metadata": {},
 917 |    "outputs": [],
 918 |    "source": [
 919 |     "smote_rf.fit(X_train, y_train)"
 920 |    ]
 921 |   },
 922 |   {
 923 |    "cell_type": "code",
 924 |    "execution_count": null,
 925 |    "metadata": {},
 926 |    "outputs": [],
 927 |    "source": [
 928 |     "plot_roc_and_precision_recall_curves(\n",
 929 |     "    [\n",
 930 |     "        (\"original\", base_rf),\n",
 931 |     "        (\"smote\", smote_rf),\n",
 932 |     "    ]\n",
 933 |     ")"
 934 |    ]
 935 |   },
 936 |   {
 937 |    "cell_type": "markdown",
 938 |    "metadata": {},
 939 |    "source": [
 940 |     "## Exercise 3\n",
 941 |     "\n",
 942 |     "1. Train a `HistGradientBoostingClassifer` on the training set.\n",
 943 |     "2. Construct a pipline with `SMOTE` and `HistGradientBoostingClassifer` fit it on the training set.\n",
 944 |     "3. Plot the ROC and PR curves between the two models."
 945 |    ]
 946 |   },
 947 |   {
 948 |    "cell_type": "code",
 949 |    "execution_count": null,
 950 |    "metadata": {},
 951 |    "outputs": [],
 952 |    "source": [
 953 |     "from sklearn.ensemble import HistGradientBoostingClassifier"
 954 |    ]
 955 |   },
 956 |   {
 957 |    "cell_type": "code",
 958 |    "execution_count": null,
 959 |    "metadata": {},
 960 |    "outputs": [],
 961 |    "source": []
 962 |   },
 963 |   {
 964 |    "cell_type": "markdown",
 965 |    "metadata": {},
 966 |    "source": [
 967 |     "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/02-ex03-solutions.py)."
 968 |    ]
 969 |   },
 970 |   {
 971 |    "cell_type": "code",
 972 |    "execution_count": null,
 973 |    "metadata": {},
 974 |    "outputs": [],
 975 |    "source": [
 976 |     "# %load solutions/02-ex03-solutions.py"
 977 |    ]
 978 |   }
 979 |  ],
 980 |  "metadata": {
 981 |   "kernelspec": {
 982 |    "display_name": "Python 3 (ipykernel)",
 983 |    "language": "python",
 984 |    "name": "python3"
 985 |   },
 986 |   "language_info": {
 987 |    "codemirror_mode": {
 988 |     "name": "ipython",
 989 |     "version": 3
 990 |    },
 991 |    "file_extension": ".py",
 992 |    "mimetype": "text/x-python",
 993 |    "name": "python",
 994 |    "nbconvert_exporter": "python",
 995 |    "pygments_lexer": "ipython3",
 996 |    "version": "3.9.7"
 997 |   }
 998 |  },
 999 |  "nbformat": 4,
1000 |  "nbformat_minor": 4
1001 | }
1002 | 


--------------------------------------------------------------------------------
/notebooks/03-poisson-regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": []
  7 |    },
  8 |    "source": [
  9 |     "# Poisson regression\n",
 10 |     "\n",
 11 |     "<a href=\"https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/03-poisson-regression.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open in Colab\" title=\"Open and Execute in Google Colaboratory\"></a>"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# Install dependencies for google colab\n",
 21 |     "import sys\n",
 22 |     "IN_COLAB = 'google.colab' in sys.modules\n",
 23 |     "if IN_COLAB:\n",
 24 |     "    %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/requirements.txt"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import sklearn\n",
 34 |     "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\""
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "import numpy as np\n",
 44 |     "import pandas as pd\n",
 45 |     "import matplotlib.pyplot as plt\n",
 46 |     "import seaborn as sns\n",
 47 |     "\n",
 48 |     "sns.set_theme(font_scale=1.5, rc={'figure.figsize': [12, 8]})\n",
 49 |     "sklearn.set_config(display='diagram')"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Load London Bike Data"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from pathlib import Path\n",
 66 |     "\n",
 67 |     "data_path = Path(\"data\")\n",
 68 |     "bikes_path = data_path / \"london_bikes.csv\""
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "- \"timestamp\" - timestamp field for grouping the data \n",
 76 |     "- \"cnt\" - the count of a new bike shares \n",
 77 |     "- \"t1\" - real temperature in C \n",
 78 |     "- \"t2\" - temperature in C \"feels like\" \n",
 79 |     "- \"hum\" - humidity in percentage \n",
 80 |     "- \"windspeed\" - wind speed in km/h \n",
 81 |     "- \"weathercode\" - category of the weather \n",
 82 |     "- \"isholiday\" - boolean field - 1 holiday / 0 non holiday \n",
 83 |     "- \"isweekend\" - boolean field - 1 if the day is weekend \n",
 84 |     "- \"season\" - category field meteorological seasons: 0-spring ; 1-summer; 2-fall; 3-winter."
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "if IN_COLAB:\n",
 94 |     "    BIKES_URL = \"https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/notebooks/data/london_bikes.csv\"\n",
 95 |     "    bikes = pd.read_csv(BIKES_URL, parse_dates=['timestamp'])\n",
 96 |     "else:\n",
 97 |     "    bikes = pd.read_csv(bikes_path, parse_dates=['timestamp'])"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "bikes.head()"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "bikes['timestamp'].head()"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "bikes['hr'] = bikes['timestamp'].dt.hour"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "bikes['weather_code'].unique()"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "bikes['season'].unique()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "X = bikes[['t1', 't2', 'hum', 'wind_speed', 'weather_code', 'is_holiday', 'is_weekend', 'season', 'hr']]\n",
152 |     "y = bikes['cnt']"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "### Split data"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "from sklearn.model_selection import train_test_split\n",
169 |     "\n",
170 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "### Column Transformer"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "from sklearn.compose import ColumnTransformer\n",
187 |     "from sklearn.preprocessing import OneHotEncoder\n",
188 |     "from sklearn.preprocessing import StandardScaler\n",
189 |     "\n",
190 |     "numerical_features = ['t1', 't2', 'hum', 'wind_speed']\n",
191 |     "cat_features = ['weather_code', 'season', 'hr', 'is_holiday', 'is_weekend']\n",
192 |     "\n",
193 |     "ct = ColumnTransformer([\n",
194 |     "    ('numerical', StandardScaler(), numerical_features),\n",
195 |     "    ('categorical', OneHotEncoder(sparse=False, handle_unknown='ignore', drop='if_binary'), cat_features)\n",
196 |     "])"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "### Pipeline"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "from sklearn.linear_model import PoissonRegressor\n",
213 |     "from sklearn.pipeline import Pipeline\n",
214 |     "from sklearn.preprocessing import StandardScaler\n",
215 |     "\n",
216 |     "pois_reg = Pipeline([\n",
217 |     "    ('prep', ct),\n",
218 |     "    ('reg', PoissonRegressor(alpha=1.0, max_iter=300))\n",
219 |     "])\n",
220 |     "\n",
221 |     "pois_reg"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": [
230 |     "pois_reg.fit(X_train, y_train);"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "pois_reg.score(X_test, y_test)"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "### What about ridge?"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": [
255 |     "from sklearn.linear_model import Ridge\n",
256 |     "\n",
257 |     "ct = ColumnTransformer([\n",
258 |     "    ('numerical', StandardScaler(), numerical_features),\n",
259 |     "    ('categorical', OneHotEncoder(sparse=False, handle_unknown='ignore'), cat_features)\n",
260 |     "    \n",
261 |     "])\n",
262 |     "\n",
263 |     "ridge = Pipeline([\n",
264 |     "    ('prep', ct),\n",
265 |     "    ('reg', Ridge(random_state=42))\n",
266 |     "])\n",
267 |     "\n",
268 |     "ridge.fit(X_train, y_train)"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {},
275 |    "outputs": [],
276 |    "source": [
277 |     "ridge.score(X_test, y_test)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {},
284 |    "outputs": [],
285 |    "source": [
286 |     "from sklearn.metrics import mean_squared_error\n",
287 |     "from sklearn.metrics import mean_poisson_deviance\n",
288 |     "\n",
289 |     "def compute_metrics(y_true, y_pred, sample_weight=None):\n",
290 |     "    \n",
291 |     "    mask = y_pred > 0\n",
292 |     "    if (~mask).any():\n",
293 |     "        n_masked, n_samples = (~mask).sum(), mask.shape[0]\n",
294 |     "        print(f\"WARNING: Estimator yields invalid, non-positive predictions \"\n",
295 |     "              f\" for {n_masked} samples out of {n_samples}. These predictions \"\n",
296 |     "              f\"are ignored when computing the Poisson deviance.\")\n",
297 |     "        \n",
298 |     "        y_true = y_true[mask]\n",
299 |     "        y_pred = y_pred[mask]\n",
300 |     "        if sample_weight is not None:\n",
301 |     "            sample_weight = sample_weight[mask]\n",
302 |     "   \n",
303 |     "    return {\n",
304 |     "        'mse': mean_squared_error(y_true, y_pred, sample_weight=sample_weight),\n",
305 |     "        'mean poisson deviance': mean_poisson_deviance(y_true, y_pred, sample_weight=sample_weight)\n",
306 |     "    }"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {},
312 |    "source": [
313 |     "### Compute metrics"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "ridge_pred = ridge.predict(X_test)\n",
323 |     "compute_metrics(y_test, ridge_pred)"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "poisson_pred = pois_reg.predict(X_test)\n",
333 |     "compute_metrics(y_test, poisson_pred)"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "## Plotting the prediction distrubutions"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": null,
346 |    "metadata": {},
347 |    "outputs": [],
348 |    "source": [
349 |     "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6), sharey=True)\n",
350 |     "ax1.hist(y_test, bins=30, alpha=0.5)\n",
351 |     "ax1.set_title(\"Test data\")\n",
352 |     "ax2.hist(poisson_pred, bins=30, alpha=0.5)\n",
353 |     "ax2.set_title(\"Poisson predictions\")\n",
354 |     "ax3.hist(ridge_pred, bins=30, alpha=0.5)\n",
355 |     "ax3.set_title(\"Ridge predictions\")"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "metadata": {},
361 |    "source": [
362 |     "## Exercise 1\n",
363 |     "\n",
364 |     "1. Train a `HistGradientBoostingRegressor` with `random_state=42` on the training set.\n",
365 |     "1. Compute the predictions on the test set and save it as `hist_pred`.\n",
366 |     "1. Compute the metrics for the predicitons on the model using `compute_metrics`.\n",
367 |     "1. Train a `HistGradientBoostingRegressor` with `loss='poisson'` and `random_state=42` on the training set.\n",
368 |     "1. Compute the predictions from this estimator and save it as `hist_poisson_pred`.\n",
369 |     "1. Compute the metrics for the predicitons on the model using `compute_metrics`.\n",
370 |     "1. **Extra:** Plot the prediction distrubutions for the two models and the original data.\n",
371 |     "    - **Hint** You may copy the code right above this cell."
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": [
380 |     "from sklearn.ensemble import HistGradientBoostingRegressor"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": null,
386 |    "metadata": {},
387 |    "outputs": [],
388 |    "source": []
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/03-ex01-solutions.py)."
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "# %load solutions/03-ex01-solutions.py"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "markdown",
408 |    "metadata": {},
409 |    "source": [
410 |     "### Back to slides!"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "markdown",
415 |    "metadata": {},
416 |    "source": [
417 |     "## Calibration Curve for Regressors"
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "code",
422 |    "execution_count": null,
423 |    "metadata": {},
424 |    "outputs": [],
425 |    "source": [
426 |     "from sklearn.utils import gen_even_slices\n",
427 |     "\n",
428 |     "def _calibration_curve_weighted(y_true, y_pred, n_bins=10, sample_weight=None):\n",
429 |     "    y_true = np.asarray(y_true)\n",
430 |     "    y_pred = np.asarray(y_pred)\n",
431 |     "    \n",
432 |     "    idx_sort = np.argsort(y_pred)\n",
433 |     "    y_pred_bin = np.zeros(n_bins)\n",
434 |     "    y_true_bin = np.zeros(n_bins)\n",
435 |     "    \n",
436 |     "    if sample_weight is not None:\n",
437 |     "        sample_weight = np.asarray(sample_weight)\n",
438 |     "    \n",
439 |     "    for i, sl in enumerate(gen_even_slices(len(y_true), n_bins)):\n",
440 |     "        if sample_weight is None:\n",
441 |     "            y_pred_bin[i] = np.average(y_pred[idx_sort][sl])\n",
442 |     "            y_true_bin[i] = np.average(y_true[idx_sort][sl])\n",
443 |     "        else:\n",
444 |     "            weights = sample_weight[idx_sort][sl]\n",
445 |     "            y_pred_bin[i] = np.average(y_pred[idx_sort][sl], weights=weights)\n",
446 |     "            y_true_bin[i] = np.average(y_true[idx_sort][sl], weights=weights)\n",
447 |     "    return y_pred_bin, y_true_bin\n",
448 |     "\n",
449 |     "def plot_calibration_curve_regression(y_true, y_pred, n_bins=10, ax=None, title=\"\", sample_weight=None):\n",
450 |     "    if ax is None:\n",
451 |     "        fig, ax = plt.subplots(figsize=(8, 8))\n",
452 |     "    y_pred_bin, y_true_bin = _calibration_curve_weighted(y_test, y_pred, sample_weight=sample_weight)\n",
453 |     "    \n",
454 |     "    bin_centers = np.arange(1, len(y_pred_bin) + 1)\n",
455 |     "    ax.plot(bin_centers, y_pred_bin, marker='x', linestyle=\"--\", label=\"predictions\")\n",
456 |     "    ax.plot(bin_centers, y_true_bin, marker='o', linestyle=\"--\", label=\"observations\")\n",
457 |     "    ax.set(xlabel=\"Bin number\", xticks=bin_centers, title=title)\n",
458 |     "    ax.legend()\n",
459 |     "    return ax"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "markdown",
464 |    "metadata": {},
465 |    "source": [
466 |     "Train a `hist_poisson` to compare calibration curves"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "code",
471 |    "execution_count": null,
472 |    "metadata": {},
473 |    "outputs": [],
474 |    "source": [
475 |     "from sklearn.ensemble import HistGradientBoostingRegressor\n",
476 |     "\n",
477 |     "hist_poisson = HistGradientBoostingRegressor(loss='poisson', random_state=42)\n",
478 |     "hist_poisson.fit(X_train, y_train)\n",
479 |     "\n",
480 |     "hist_poisson_pred = hist_poisson.predict(X_test)"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "code",
485 |    "execution_count": null,
486 |    "metadata": {},
487 |    "outputs": [],
488 |    "source": [
489 |     "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 8))\n",
490 |     "plot_calibration_curve_regression(y_test, ridge_pred, ax=ax1, title=\"Ridge\")\n",
491 |     "plot_calibration_curve_regression(y_test, poisson_pred, ax=ax2, title=\"Poisson Regression\")\n",
492 |     "plot_calibration_curve_regression(y_test, hist_poisson_pred, ax=ax3, title=\"Hist Poisson\");"
493 |    ]
494 |   },
495 |   {
496 |    "cell_type": "markdown",
497 |    "metadata": {},
498 |    "source": [
499 |     "## Claims dataset"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": null,
505 |    "metadata": {},
506 |    "outputs": [],
507 |    "source": [
508 |     "claims_path = data_path / \"claims.csv\"\n",
509 |     "if IN_COLAB:\n",
510 |     "    CLAIMS_URL = \"https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/notebooks/data/claims.csv\"\n",
511 |     "    claims = pd.read_csv(CLAIMS_URL)\n",
512 |     "else:\n",
513 |     "    claims = pd.read_csv(claims_path)"
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "markdown",
518 |    "metadata": {},
519 |    "source": [
520 |     "- ClaimPerYear: number of claims on the given policy;\n",
521 |     "- Exposure: total exposure in yearly units;\n",
522 |     "- Area: area code (categorical, ordinal);\n",
523 |     "- VehPower: power of the car (categorical, ordinal);\n",
524 |     "- VehAge: age of the car in years;\n",
525 |     "- DrivAge: age of the (most common) driver in years;\n",
526 |     "- BonusMalus: bonus-malus level between 50 and 230 (with reference level 100);\n",
527 |     "- VehBrand: car brand (categorical, nominal);\n",
528 |     "- VehGas: diesel or regular fuel car (binary);\n",
529 |     "- Density: density of inhabitants per km2 in the city of the living place of the driver;\n",
530 |     "- Region: regions in France (prior to 2016)"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": null,
536 |    "metadata": {},
537 |    "outputs": [],
538 |    "source": [
539 |     "claims.head()"
540 |    ]
541 |   },
542 |   {
543 |    "cell_type": "code",
544 |    "execution_count": null,
545 |    "metadata": {},
546 |    "outputs": [],
547 |    "source": [
548 |     "exposure = claims['Exposure']"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": null,
554 |    "metadata": {},
555 |    "outputs": [],
556 |    "source": [
557 |     "y = claims[\"ClaimsPerYear\"]"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "code",
562 |    "execution_count": null,
563 |    "metadata": {},
564 |    "outputs": [],
565 |    "source": [
566 |     "X = claims.drop([\"Exposure\", \"ClaimsPerYear\"], axis=\"columns\")"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": null,
572 |    "metadata": {},
573 |    "outputs": [],
574 |    "source": [
575 |     "X.head()"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "markdown",
580 |    "metadata": {},
581 |    "source": [
582 |     "### Split data"
583 |    ]
584 |   },
585 |   {
586 |    "cell_type": "code",
587 |    "execution_count": null,
588 |    "metadata": {},
589 |    "outputs": [],
590 |    "source": [
591 |     "X_train, X_test, y_train, y_test, exposure_train, exposure_test = train_test_split(\n",
592 |     "    X, y, exposure, random_state=42)"
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "markdown",
597 |    "metadata": {},
598 |    "source": [
599 |     "### Train simple dummy regresor"
600 |    ]
601 |   },
602 |   {
603 |    "cell_type": "code",
604 |    "execution_count": null,
605 |    "metadata": {},
606 |    "outputs": [],
607 |    "source": [
608 |     "from sklearn.dummy import DummyRegressor\n",
609 |     "\n",
610 |     "dummy = DummyRegressor()\n",
611 |     "dummy.fit(X_train, y_train, sample_weight=exposure_train)"
612 |    ]
613 |   },
614 |   {
615 |    "cell_type": "code",
616 |    "execution_count": null,
617 |    "metadata": {},
618 |    "outputs": [],
619 |    "source": [
620 |     "dummy_pred = dummy.predict(X_test)\n",
621 |     "compute_metrics(y_test, dummy_pred, sample_weight=exposure_test)"
622 |    ]
623 |   },
624 |   {
625 |    "cell_type": "code",
626 |    "execution_count": null,
627 |    "metadata": {},
628 |    "outputs": [],
629 |    "source": [
630 |     "dummy_pred[:10]"
631 |    ]
632 |   },
633 |   {
634 |    "cell_type": "markdown",
635 |    "metadata": {},
636 |    "source": [
637 |     "## Ridge"
638 |    ]
639 |   },
640 |   {
641 |    "cell_type": "code",
642 |    "execution_count": null,
643 |    "metadata": {},
644 |    "outputs": [],
645 |    "source": [
646 |     "X['Density'].hist(bins=25);"
647 |    ]
648 |   },
649 |   {
650 |    "cell_type": "code",
651 |    "execution_count": null,
652 |    "metadata": {},
653 |    "outputs": [],
654 |    "source": [
655 |     "from sklearn.pipeline import make_pipeline\n",
656 |     "from sklearn.preprocessing import FunctionTransformer, OneHotEncoder\n",
657 |     "from sklearn.preprocessing import KBinsDiscretizer\n",
658 |     "from sklearn.compose import ColumnTransformer\n",
659 |     "\n",
660 |     "linear_model_preprocessor = ColumnTransformer(\n",
661 |     "    [\n",
662 |     "        (\"passthrough_numeric\", StandardScaler(),\n",
663 |     "            [\"BonusMalus\"]),\n",
664 |     "        (\"binned_numeric\", KBinsDiscretizer(n_bins=10),\n",
665 |     "            [\"VehAge\", \"DrivAge\"]),\n",
666 |     "        (\"log_scaled_numeric\", FunctionTransformer(np.log),\n",
667 |     "            [\"Density\"]),\n",
668 |     "        (\"onehot_categorical\", OneHotEncoder(handle_unknown='ignore'),\n",
669 |     "            [\"VehBrand\", \"VehPower\", \"VehGas\", \"Region\", \"Area\"]),\n",
670 |     "    ],\n",
671 |     ")"
672 |    ]
673 |   },
674 |   {
675 |    "cell_type": "code",
676 |    "execution_count": null,
677 |    "metadata": {},
678 |    "outputs": [],
679 |    "source": [
680 |     "linear_model_preprocessor.fit_transform(X_train)"
681 |    ]
682 |   },
683 |   {
684 |    "cell_type": "code",
685 |    "execution_count": null,
686 |    "metadata": {},
687 |    "outputs": [],
688 |    "source": [
689 |     "from sklearn.linear_model import Ridge\n",
690 |     "from sklearn.preprocessing import MaxAbsScaler\n",
691 |     "\n",
692 |     "ridge = Pipeline([\n",
693 |     "    (\"preprocessor\", linear_model_preprocessor),\n",
694 |     "    (\"reg\", Ridge(alpha=1e-6))])\n",
695 |     "ridge.fit(X_train, y_train, reg__sample_weight=exposure_train)"
696 |    ]
697 |   },
698 |   {
699 |    "cell_type": "code",
700 |    "execution_count": null,
701 |    "metadata": {},
702 |    "outputs": [],
703 |    "source": [
704 |     "ridge_pred = ridge.predict(X_test)\n",
705 |     "ridge_metrics = compute_metrics(y_test, ridge_pred, sample_weight=exposure_test)\n",
706 |     "ridge_metrics"
707 |    ]
708 |   },
709 |   {
710 |    "cell_type": "code",
711 |    "execution_count": null,
712 |    "metadata": {},
713 |    "outputs": [],
714 |    "source": [
715 |     "plot_calibration_curve_regression(y_test, ridge_pred, title=\"Ridge\", sample_weight=exposure_test);"
716 |    ]
717 |   },
718 |   {
719 |    "cell_type": "markdown",
720 |    "metadata": {},
721 |    "source": [
722 |     "## Exercise 2\n",
723 |     "\n",
724 |     "1. Construct a pipeline with `PoissonRegressor(alpha=1e-4)` with the same preprocesser we have above.\n",
725 |     "    - **Hint**: You may reuse `linear_model_preprocessor` and `MaxAbsScaler`\n",
726 |     "2. Training the pipeline on the training set. **Hint**: Remember to set the the sample weight!\n",
727 |     "3. Plot the calibration curve using `plot_calibration_curve_regression`. **Hint**: Remember to include the sample weights!"
728 |    ]
729 |   },
730 |   {
731 |    "cell_type": "code",
732 |    "execution_count": null,
733 |    "metadata": {},
734 |    "outputs": [],
735 |    "source": []
736 |   },
737 |   {
738 |    "cell_type": "markdown",
739 |    "metadata": {},
740 |    "source": [
741 |     "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/03-ex02-solutions.py)."
742 |    ]
743 |   },
744 |   {
745 |    "cell_type": "code",
746 |    "execution_count": null,
747 |    "metadata": {},
748 |    "outputs": [],
749 |    "source": [
750 |     "# %load solutions/03-ex02-solutions.py"
751 |    ]
752 |   },
753 |   {
754 |    "cell_type": "markdown",
755 |    "metadata": {
756 |     "tags": []
757 |    },
758 |    "source": [
759 |     "## Exercise 3\n",
760 |     "\n",
761 |     "1. Build a preprocessor for a tree based model.\n",
762 |     "    - **Hint**: Use `ColumnTransformer`, encode categories with `OrdinalEncoder` and passthrough the numerical features.\n",
763 |     "    - **Extra**: You can use `make_column_selector` to select the correct dtypes.\n",
764 |     "2. Use the preprocessor from step 1 to build a pipeline with `HistGradientBoostingRegressor` with `loss=\"poisson\"`.\n",
765 |     "3. Fit the model from step 2 while also setting `sample_weight` to `exposure_train`.\n",
766 |     "4. Use `compute_metrics` to compute the mse and the mean poisson deviance.\n",
767 |     "    - **Hint** Rememver to incldue the sample weight!\n",
768 |     "5. Plot the calibration curve using `plot_calibration_curve_regression`.\n",
769 |     "    - **Hint** remember to include the sample weights!"
770 |    ]
771 |   },
772 |   {
773 |    "cell_type": "code",
774 |    "execution_count": null,
775 |    "metadata": {},
776 |    "outputs": [],
777 |    "source": [
778 |     "from sklearn.preprocessing import OrdinalEncoder\n",
779 |     "from sklearn.compose import make_column_selector"
780 |    ]
781 |   },
782 |   {
783 |    "cell_type": "code",
784 |    "execution_count": null,
785 |    "metadata": {},
786 |    "outputs": [],
787 |    "source": []
788 |   },
789 |   {
790 |    "cell_type": "markdown",
791 |    "metadata": {},
792 |    "source": [
793 |     "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/03-ex03-solutions.py)."
794 |    ]
795 |   },
796 |   {
797 |    "cell_type": "code",
798 |    "execution_count": null,
799 |    "metadata": {},
800 |    "outputs": [],
801 |    "source": [
802 |     "# %load solutions/03-ex03-solutions.py"
803 |    ]
804 |   }
805 |  ],
806 |  "metadata": {
807 |   "kernelspec": {
808 |    "display_name": "Python 3 (ipykernel)",
809 |    "language": "python",
810 |    "name": "python3"
811 |   },
812 |   "language_info": {
813 |    "codemirror_mode": {
814 |     "name": "ipython",
815 |     "version": 3
816 |    },
817 |    "file_extension": ".py",
818 |    "mimetype": "text/x-python",
819 |    "name": "python",
820 |    "nbconvert_exporter": "python",
821 |    "pygments_lexer": "ipython3",
822 |    "version": "3.9.7"
823 |   }
824 |  },
825 |  "nbformat": 4,
826 |  "nbformat_minor": 4
827 | }
828 | 


--------------------------------------------------------------------------------
/notebooks/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/data/.gitkeep


--------------------------------------------------------------------------------
/notebooks/data/review_polarity.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/data/review_polarity.tar.gz


--------------------------------------------------------------------------------
/notebooks/images/smote_generated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/images/smote_generated.png


--------------------------------------------------------------------------------
/notebooks/solutions/01-ex01-solutions.py:
--------------------------------------------------------------------------------
 1 | from sklearn.ensemble import RandomForestClassifier
 2 | 
 3 | rfc = RandomForestClassifier(random_state=42, max_depth=3)
 4 | 
 5 | rfc.fit(X_train, y_train)
 6 | 
 7 | rfc.score(X_test, y_test)
 8 | 
 9 | rfc_feature_importances = rfc.feature_importances_
10 | 
11 | rf_top_20 = rfc_feature_importances.argsort()[-20:]
12 | 
13 | feature_names[rf_top_20]
14 | 


--------------------------------------------------------------------------------
/notebooks/solutions/01-ex02-solutions.py:
--------------------------------------------------------------------------------
1 | rfc_pipe = Pipeline([
2 |     ("vectorizer", CountVectorizer(min_df=2, stop_words='english')),
3 |     ("rf", RandomForestClassifier(random_state=42, max_depth=3))
4 | ])
5 | 
6 | rfc_pipe.fit(text_train, y_train)
7 | 
8 | rfc_pipe.score(text_test, y_test)
9 | 


--------------------------------------------------------------------------------
/notebooks/solutions/01-ex03-solutions.py:
--------------------------------------------------------------------------------
 1 | log_tfid = Pipeline([
 2 |     ("vectorizer", TfidfVectorizer(stop_words='english')),
 3 |     ("log_reg", LogisticRegression(solver='liblinear'))
 4 | ])
 5 | 
 6 | len(text_train)
 7 | 
 8 | len(text_test)
 9 | 
10 | log_tfid.fit(text_train, y_train)
11 | 
12 | log_tfid.score(text_test, y_test)
13 | 
14 | feature_names = log_tfid["vectorizer"].get_feature_names_out()
15 | log_reg_coefs = log_tfid["log_reg"].coef_
16 | 
17 | plot_important_features(log_reg_coefs, feature_names)
18 | 


--------------------------------------------------------------------------------
/notebooks/solutions/02-ex01-solutions.py:
--------------------------------------------------------------------------------
1 | over_rf = make_imb_pipeline(RandomOverSampler(random_state=0), RandomForestClassifier(random_state=42))
2 | 
3 | base_rf_metrics
4 | 
5 | compute_metrics(over_rf)
6 | 


--------------------------------------------------------------------------------
/notebooks/solutions/02-ex02-solutions.py:
--------------------------------------------------------------------------------
1 | base_rf.fit(X_train, y_train)
2 | under_rf.fit(X_train, y_train)
3 | over_rf.fit(X_train, y_train)
4 | 
5 | plot_roc_and_precision_recall_curves([
6 |     ("original", base_rf),
7 |     ("undersampling", under_rf),
8 |     ("oversampling", over_rf),
9 | ])


--------------------------------------------------------------------------------
/notebooks/solutions/02-ex03-solutions.py:
--------------------------------------------------------------------------------
 1 | from sklearn.ensemble import HistGradientBoostingClassifier
 2 | 
 3 | base_hist = HistGradientBoostingClassifier(random_state=42)
 4 | base_hist.fit(X_train, y_train)
 5 | 
 6 | smote_hist = make_imb_pipeline(
 7 |     SMOTE(random_state=42), HistGradientBoostingClassifier(random_state=42))
 8 | smote_hist.fit(X_train, y_train)
 9 | 
10 | 
11 | plot_roc_and_precision_recall_curves(
12 |     [
13 |         ("original", base_hist),
14 |         ("smote", smote_hist),
15 |     ]
16 | )


--------------------------------------------------------------------------------
/notebooks/solutions/03-ex01-solutions.py:
--------------------------------------------------------------------------------
 1 | from sklearn.ensemble import HistGradientBoostingRegressor
 2 | 
 3 | hist = HistGradientBoostingRegressor(random_state=42)
 4 | 
 5 | hist.fit(X_train, y_train)
 6 | 
 7 | hist_pred = hist.predict(X_test)
 8 | 
 9 | compute_metrics(y_test, hist_pred)
10 | 
11 | hist_poisson = HistGradientBoostingRegressor(loss='poisson', random_state=42)
12 | hist_poisson.fit(X_train, y_train)
13 | 
14 | hist_poisson_pred = hist_poisson.predict(X_test)
15 | 
16 | compute_metrics(y_test, hist_poisson_pred)
17 | 
18 | fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
19 | ax1.hist(y_test, bins=30, alpha=0.5)
20 | ax1.set_title("Test data")
21 | ax2.hist(hist_pred, bins=30, alpha=0.5)
22 | ax2.set_title("Hist predictions")
23 | ax3.hist(hist_poisson_pred, bins=30, alpha=0.5)
24 | ax3.set_title("Hist Poisson predictions")
25 | 


--------------------------------------------------------------------------------
/notebooks/solutions/03-ex02-solutions.py:
--------------------------------------------------------------------------------
 1 | poisson_reg = Pipeline([
 2 |     ("preprocessor", linear_model_preprocessor),
 3 |     ("reg", PoissonRegressor(alpha=1e-4)),
 4 | ])
 5 | 
 6 | poisson_reg.fit(X_train, y_train, reg__sample_weight=exposure_train)
 7 | 
 8 | poisson_pred = poisson_reg.predict(X_test)
 9 | compute_metrics(y_test, poisson_pred, sample_weight=exposure_test)
10 | 
11 | fig, ax = plt.subplots(figsize=(8, 8))
12 | plot_calibration_curve_regression(y_test, poisson_pred, ax=ax, title="Poisson", sample_weight=exposure_test);
13 | 


--------------------------------------------------------------------------------
/notebooks/solutions/03-ex03-solutions.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import OrdinalEncoder
 2 | from sklearn.compose import make_column_selector
 3 | 
 4 | tree_preprocessor = ColumnTransformer([
 5 |     ("categorical", 
 6 |      OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1),
 7 |          make_column_selector(dtype_include='object')
 8 |     ),
 9 |     ("numerical", "passthrough", make_column_selector(dtype_include='number'))
10 | ])
11 | 
12 | hist_poisson = Pipeline([
13 |     ("preprocessor", tree_preprocessor),
14 |     ("hist", HistGradientBoostingRegressor(loss="poisson", random_state=42))
15 | ])
16 | 
17 | hist_poisson.fit(X_train, y_train, hist__sample_weight=exposure_train)
18 | 
19 | hist_poisson_pred = hist_poisson.predict(X_test)
20 | 
21 | compute_metrics(y_test, hist_poisson_pred, sample_weight=exposure_test)
22 | 
23 | plot_calibration_curve_regression(y_test, hist_poisson_pred, sample_weight=exposure_test, title="Hist Poisson")
24 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn==1.0.*
2 | seaborn==0.11.2
3 | pandas
4 | imbalanced-learn==0.8.*
5 | 


--------------------------------------------------------------------------------
/slides.md:
--------------------------------------------------------------------------------
  1 | title: Advanced Machine Learning with scikit-learn: Text Data, Imbalanced Data, and Poisson Regression
  2 | use_katex: True
  3 | class: title-slide
  4 | 
  5 | # Advanced Machine Learning with scikit-learn
  6 | ## Text Data, Imbalanced Data, and Poisson Regression
  7 | 
  8 | ![](images/scikit-learn-logo-notext.png)
  9 | 
 10 | .larger[Thomas J. Fan]<br>
 11 | @thomasjpfan<br>
 12 | <a href="https://www.github.com/thomasjpfan" target="_blank"><span class="icon icon-github icon-left"></span></a>
 13 | <a href="https://www.twitter.com/thomasjpfan" target="_blank"><span class="icon icon-twitter"></span></a>
 14 | <a class="this-talk-link", href="https://github.com/thomasjpfan/ml-workshop-advanced" target="_blank">
 15 | This workshop on Github: github.com/thomasjpfan/ml-workshop-advanced</a>
 16 | 
 17 | ---
 18 | 
 19 | name: table-of-contents
 20 | class: title-slide, left
 21 | 
 22 | # Table of Contents
 23 | 
 24 | .g[
 25 | .g-6[
 26 | 1. [Text Data](#text)
 27 | 1. [Imbalanced Data](#imbalanced)
 28 | 1. [Poisson Regression](#poisson)
 29 | ]
 30 | .g-6.g-center[
 31 | ![](images/scikit-learn-logo-notext.png)
 32 | ]
 33 | ]
 34 | 
 35 | ---
 36 | 
 37 | name: text
 38 | class: chapter-slide
 39 | 
 40 | # 1. Text Data
 41 | 
 42 | .footnote-back[
 43 | [Back to Table of Contents](#table-of-contents)
 44 | ]
 45 | 
 46 | ---
 47 | 
 48 | # Types of text data
 49 | 
 50 | <table border="1" class="dataframe">
 51 |   <thead>
 52 |     <tr style="text-align: right;">
 53 |       <th></th>
 54 |       <th>fullName</th>
 55 |       <th>country</th>
 56 |       <th>politicalGroup</th>
 57 |       <th>nationalPoliticalGroup</th>
 58 |     </tr>
 59 |   </thead>
 60 |   <tbody>
 61 |     <tr>
 62 |       <th>0</th>
 63 |       <td>Magdalena ADAMOWICZ</td>
 64 |       <td>Poland</td>
 65 |       <td>Group of the European People's Party (Christian Democrats)</td>
 66 |       <td>Independent</td>
 67 |     </tr>
 68 |     <tr>
 69 |       <th>1</th>
 70 |       <td>Asim ADEMOV</td>
 71 |       <td>Bulgaria</td>
 72 |       <td>Group of the European People's Party (Christian Democrats)</td>
 73 |       <td>Citizens for European Development of Bulgaria</td>
 74 |     </tr>
 75 |     <tr>
 76 |       <th>2</th>
 77 |       <td>Isabella ADINOLFI</td>
 78 |       <td>Italy</td>
 79 |       <td>Non-attached Members</td>
 80 |       <td>Movimento 5 Stelle</td>
 81 |     </tr>
 82 |     <tr>
 83 |       <th>3</th>
 84 |       <td>Matteo ADINOLFI</td>
 85 |       <td>Italy</td>
 86 |       <td>Identity and Democracy Group</td>
 87 |       <td>Lega</td>
 88 |     </tr>
 89 |     <tr>
 90 |       <th>4</th>
 91 |       <td>Alex AGIUS SALIBA</td>
 92 |       <td>Malta</td>
 93 |       <td>Group of the Progressive Alliance of Socialists and Democrats in the European Parliament</td>
 94 |       <td>Partit Laburista</td>
 95 |     </tr>
 96 |   </tbody>
 97 | </table>
 98 | 
 99 | 
100 | ---
101 | 
102 | # Text data we are considering
103 | 
104 | I've just had the evidence that confirmed my suspicions. A bunch of kids, 14 to 22 put on the DVD of "Titanic" on a fantastic state of the art mega screen home entertainment type deal. Only two of them had actually seen it before. But they all had seen the moment of Kate, Leo and Celine Dion so many times that most of them felt they had seen the whole movie. Shortly after the epic started, they started to get restless, some of them left asking the others
105 | 
106 | This independent, B&W, DV feature consistently shocks, amazes and amuses with it's ability to create the most insane situations and then find humor and interest in them. It's all hilarious and ridiculous stuff, yet as absurd as much of the film should be, there is a heart and a reality here that keeps the film grounded, keeps the entire piece from drifting into complete craziness and therein lies the real message here.
107 | 
108 | ---
109 | 
110 | # Bag of words
111 | 
112 | .center[
113 | ![:scale 90%](images/countvectorizer.svg)
114 | ]
115 | 
116 | ---
117 | 
118 | # Text processing in scikit-learn
119 | 
120 | ```py
121 | from sklearn.feature_extraction.text import CountVectorizer
122 | 
123 | sample_text = ["Can we go to the mountain tomorrow?",
124 |                "The mountain is really tall"]
125 | 
126 | vect = CountVectorizer()
127 | vect.fit(sample_text)
128 | 
129 | vect.get_feature_names()
130 | # ['be', 'can', 'careful', 'finished', 'go', 'hill', 'homework', 'is', 'my',
131 | # 'please', 'tall', 'the', 'to', 'very', 'we']
132 | 
133 | X = vect.transform(sample_text)
134 | X.toarray()
135 | # array([[0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1],
136 | #        [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0]])
137 | ```
138 | 
139 | ---
140 | 
141 | class: chapter-slide
142 | 
143 | # Notebook 📓!
144 | ## notebooks/01-text-data.ipynb
145 | 
146 | ---
147 | 
148 | # N-grams
149 | 
150 | - Tries to keep words together
151 | - "really tall" and "not tall" has different contexts
152 | 
153 | ![:scale 90%](images/countvectorizer-ngrams.svg)
154 | 
155 | ---
156 | 
157 | class: chapter-slide
158 | 
159 | # Notebook 📓!
160 | ## notebooks/01-text-data.ipynb
161 | 
162 | ---
163 | 
164 | # Tf-idf rescaling
165 | 
166 | $$
167 | \text{tf-idf}(t, d) = tf(t, d) \cdot \text{idf}(t)
168 | $$
169 | $$
170 | \text{idf}(t) = \text{log}\frac{1 + n_d}{1 + \text{df}(d, t)} + 1
171 | $$
172 | 
173 | - $\text{tf}(t, d)$ = The count of term $t$ in document $d$.
174 | - $n_d$ = total number of documents
175 | - $\text{df}(d, t)$ = number of documents containing term $t$
176 | 
177 | - scikit-learn divides each row by its length (L2 normalization)
178 | 
179 | ```py
180 | from sklearn.feature_extraction.text import TfidfVectorizer
181 | ```
182 | 
183 | ---
184 | 
185 | class: chapter-slide
186 | 
187 | # Notebook 📓!
188 | ## notebooks/01-text-data.ipynb
189 | 
190 | ---
191 | 
192 | name: imbalanced
193 | class: chapter-slide
194 | 
195 | # 2. Imbalanced Data
196 | 
197 | .footnote-back[
198 | [Back to Table of Contents](#table-of-contents)
199 | ]
200 | 
201 | ---
202 | 
203 | class: middle
204 | 
205 | # What is imbalanced data?
206 | 
207 | - Cost are different between classes
208 | - Data is imbalanced
209 | - Some datasets have very few positive classes
210 | 
211 | ---
212 | 
213 | # Different Cost between classes
214 | 
215 | .g[
216 | .g-8[
217 | .smaller-x[
218 | ```py
219 | y_pred = log_reg.predict(X_test)
220 | print(classification_report(y_test, y_pred))
221 | ```
222 | 
223 | ```
224 |               precision    recall  f1-score   support
225 | 
226 |        False       0.99      1.00      0.99      2731
227 |         True       0.75      0.37      0.49        65
228 | 
229 |     accuracy                           0.98      2796
230 |    macro avg       0.87      0.68      0.74      2796
231 | weighted avg       0.98      0.98      0.98      2796
232 | ```
233 | 
234 | ```py
235 | y_pred_20 = log_reg.predict_proba(X_test)[:, 1] > 0.25
236 | print(classification_report(y_test, y_pred_20))
237 | ```
238 | 
239 | ```
240 |               precision    recall  f1-score   support
241 | 
242 |        False       0.99      0.99      0.99      2731
243 |         True       0.63      0.55      0.59        65
244 | 
245 |     accuracy                           0.98      2796
246 |    macro avg       0.81      0.77      0.79      2796
247 | weighted avg       0.98      0.98      0.98      2796
248 | ```
249 | ]
250 | ]
251 | .g-4[
252 | $$
253 | \text{precision} = \frac{TP}{TP + FP}
254 | $$
255 | 
256 | $$
257 | \text{recall} = \frac{TP}{TP + FN}
258 | $$
259 | ]
260 | ]
261 | 
262 | ---
263 | 
264 | class: chapter-slide
265 | 
266 | # Notebook 📕!
267 | ## notebooks/02-imbalanced-data.ipynb
268 | 
269 | ---
270 | 
271 | # Resampling
272 | 
273 | ![:scale 60%](images/resampling_approches.png)
274 | 
275 | ---
276 | 
277 | # scikit-learn Pipelines
278 | ### Unable to handle sampling
279 | 
280 | ![](images/pipeline.svg)
281 | 
282 | ---
283 | 
284 | class: middle
285 | 
286 | # Imbalanced-learn
287 | 
288 | [https://imbalanced-learn.org/stable/](https://imbalanced-learn.org/stable/)
289 | 
290 | - Extends scikit-learn API
291 | - Defines samplers
292 | 
293 | ---
294 | 
295 | # Defines pipeline to handle sampling
296 | 
297 | ```py
298 | from imblearn.pipeline import make_pipeline as make_imb_pipeline
299 | 
300 | from imblearn.under_sampling import RandomUnderSampler
301 | from imblearn.over_sampling import RandomOverSampler
302 | ```
303 | 
304 | Fitting and sampling done in one line:
305 | 
306 | ```py
307 | data_resampled, targets_resampled = obj.fit_resample(data, targets)
308 | ```
309 | 
310 | ---
311 | 
312 | class: chapter-slide
313 | 
314 | # Notebook 📕!
315 | ## notebooks/02-imbalanced-data.ipynb
316 | 
317 | ---
318 | 
319 | class: middle
320 | 
321 | # Class-weights
322 | 
323 | - Re-weight the loss functions
324 | - Native to scikit-learn for most models
325 | - Same effect as over-sampling, but keeps the dataset size the same
326 | 
327 | ---
328 | 
329 | # Class-weights
330 | ## Linear models
331 | 
332 | - Loss of a given sample is weighted inversely proportional to class frequencies
333 | 
334 | ```py
335 | LogisticRegression(class_weight='balanced')
336 | ```
337 | 
338 | ## Tree models
339 | 
340 | - When deciding where to split, the `criterion` is weighted inversely proportional to class frequencies
341 | 
342 | ```py
343 | DecisionTreeClassifier(class_weight='balanced')
344 | ```
345 | 
346 | ---
347 | 
348 | class: chapter-slide
349 | 
350 | # Notebook 📕!
351 | ## notebooks/02-imbalanced-data.ipynb
352 | 
353 | ---
354 | 
355 | class: middle
356 | 
357 | # Balanced Bagging
358 | 
359 | - Random under sampling for each instance in the ensemble
360 | 
361 | ```py
362 | from imblearn.ensemble import BalancedRandomForestClassifier
363 | 
364 | balanced_rf = BalancedRandomForestClassifier(random_state=0)
365 | balanced_rf.fit(X_train, y_train)
366 | ```
367 | 
368 | ---
369 | 
370 | class: chapter-slide
371 | 
372 | # Notebook 📕!
373 | ## notebooks/02-imbalanced-data.ipynb
374 | 
375 | ---
376 | 
377 | # Synthetic Minority Oversampling Technique
378 | ## SMOTE
379 | 
380 | - Adds synthetic interpolated data to the minority class
381 | - For each sample in minority class:
382 |     - Pick random neighbor from k neighbors.
383 |     - Pick point on line connecting the two uniformly
384 | 
385 | ---
386 | 
387 | # Generated Data with SMOTE
388 | 
389 | ![](notebooks/images/smote_generated.png)
390 | 
391 | ---
392 | 
393 | class: chapter-slide
394 | 
395 | ## Notebook 📕!
396 | ### notebooks/02-imbalanced-data.ipynb
397 | 
398 | ---
399 | 
400 | name: poisson
401 | class: chapter-slide
402 | 
403 | # 3. Poisson Regression
404 | 
405 | .footnote-back[
406 | [Back to Table of Contents](#table-of-contents)
407 | ]
408 | 
409 | ---
410 | 
411 | # Generalized Linear Models
412 | 
413 | Predicted values $\hat{y}$ are linked to a linear combination of $X$ via an
414 | inverse link function:
415 | 
416 | $$
417 | \hat{y}(w, X) = h(Xw)
418 | $$
419 | 
420 | The minimization problem becomes:
421 | 
422 | $$
423 | \min_{w}\frac{1}{2n}\sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2
424 | $$
425 | 
426 | - $n$ is the number of samples
427 | - $\alpha$ is the L2 regularization penalty,
428 | - $d(y_i, \hat{y}_i)$ is the unit deviance
429 | 
430 | ---
431 | 
432 | # Unit Deviances
433 | 
434 | ![](images/glm_unit_deviance.png)
435 | 
436 | ![:scale 100%](images/poisson_gamma_tweedie_distributions.png)
437 | 
438 | ---
439 | 
440 | class: middle
441 | 
442 | # Scikit-learn supports GLMS!
443 | 
444 | `TweedieRegressor` implements GLM for the Tweedie distribution with a `power` parameter
445 | ```py
446 | from sklearn.linear_model import TweedieRegressor
447 | ```
448 | 
449 | `power=0`: Normal distribution: Use `Ridge`, `ElasticNet`, etc.
450 | 
451 | ---
452 | 
453 | class: middle
454 | 
455 | # Convenience estimators GLM
456 | 
457 | Same as `TweedieRegressor(power=1, link='log')`
458 | ```py
459 | from sklearn.linear_model import PoissonRegressor
460 | ```
461 | 
462 | Same as `TweedieRegressor(power=2, link='log')`
463 | ```py
464 | from sklearn.linear_model import GammaRegressor
465 | ```
466 | 
467 | ---
468 | 
469 | # Examples of use cases for GLMS
470 | 
471 | - Agriculture / weather modeling:
472 |     - number of rain events per year (Poisson)
473 |     - amount of rainfall per event (Gamma)
474 | 
475 | - Risk modeling / insurance policy pricing:
476 |     -  number of claim events / policyholder per year (Poisson)
477 |     - cost per event (Gamma)
478 | 
479 | - Predictive maintenance:
480 |     - number of production interruption events per year (Poisson)
481 |     - duration of interruption (Gamma)
482 | 
483 | ---
484 | 
485 | # Calibration for Regressors
486 | 
487 | .g.g-middle[
488 | .g-6[
489 | ![](images/calibration-regression.svg)
490 | ]
491 | .g-6[
492 | ![:scale 120%](images/calibration_regression.png)
493 | ]
494 | ]
495 | 
496 | ---
497 | 
498 | class: chapter-slide
499 | 
500 | # Notebook 📘!
501 | ## notebooks/03-poisson-regression.ipynb
502 | 
503 | ---
504 | 
505 | class: title-slide, left
506 | 
507 | # Closing
508 | 
509 | .g.g-middle[
510 | .g-7[
511 | ![:scale 30%](images/scikit-learn-logo-notext.png)
512 | 1. [Text data](#text)
513 | 1. [Imbalanced Data](#imbalanced)
514 | 1. [Poisson Regression](#poisson)
515 | ]
516 | .g-5.center[
517 | <br>
518 | .larger[Thomas J. Fan]<br>
519 | @thomasjpfan<br>
520 | <a href="https://www.github.com/thomasjpfan" target="_blank"><span class="icon icon-github icon-left"></span></a>
521 | <a href="https://www.twitter.com/thomasjpfan" target="_blank"><span class="icon icon-twitter"></span></a>
522 | <a class="this-talk-link", href="https://github.com/thomasjpfan/ml-workshop-advanced" target="_blank">
523 | This workshop on Github: github.com/thomasjpfan/ml-workshop-advanced</a>
524 | ]
525 | ]
526 | 


--------------------------------------------------------------------------------