├── .gitignore ├── BUILDING.md ├── LICENSE ├── Makefile ├── README.md ├── assets ├── auto-render.min.js ├── fonts │ ├── KaTeX_AMS-Regular.eot │ ├── KaTeX_AMS-Regular.ttf │ ├── KaTeX_AMS-Regular.woff │ ├── KaTeX_AMS-Regular.woff2 │ ├── KaTeX_Caligraphic-Bold.eot │ ├── KaTeX_Caligraphic-Bold.ttf │ ├── KaTeX_Caligraphic-Bold.woff │ ├── KaTeX_Caligraphic-Bold.woff2 │ ├── KaTeX_Caligraphic-Regular.eot │ ├── KaTeX_Caligraphic-Regular.ttf │ ├── KaTeX_Caligraphic-Regular.woff │ ├── KaTeX_Caligraphic-Regular.woff2 │ ├── KaTeX_Fraktur-Bold.eot │ ├── KaTeX_Fraktur-Bold.ttf │ ├── KaTeX_Fraktur-Bold.woff │ ├── KaTeX_Fraktur-Bold.woff2 │ ├── KaTeX_Fraktur-Regular.eot │ ├── KaTeX_Fraktur-Regular.ttf │ ├── KaTeX_Fraktur-Regular.woff │ ├── KaTeX_Fraktur-Regular.woff2 │ ├── KaTeX_Main-Bold.eot │ ├── KaTeX_Main-Bold.ttf │ ├── KaTeX_Main-Bold.woff │ ├── KaTeX_Main-Bold.woff2 │ ├── KaTeX_Main-Italic.eot │ ├── KaTeX_Main-Italic.ttf │ ├── KaTeX_Main-Italic.woff │ ├── KaTeX_Main-Italic.woff2 │ ├── KaTeX_Main-Regular.eot │ ├── KaTeX_Main-Regular.ttf │ ├── KaTeX_Main-Regular.woff │ ├── KaTeX_Main-Regular.woff2 │ ├── KaTeX_Math-BoldItalic.eot │ ├── KaTeX_Math-BoldItalic.ttf │ ├── KaTeX_Math-BoldItalic.woff │ ├── KaTeX_Math-BoldItalic.woff2 │ ├── KaTeX_Math-Italic.eot │ ├── KaTeX_Math-Italic.ttf │ ├── KaTeX_Math-Italic.woff │ ├── KaTeX_Math-Italic.woff2 │ ├── KaTeX_Math-Regular.eot │ ├── KaTeX_Math-Regular.ttf │ ├── KaTeX_Math-Regular.woff │ ├── KaTeX_Math-Regular.woff2 │ ├── KaTeX_SansSerif-Bold.eot │ ├── KaTeX_SansSerif-Bold.ttf │ ├── KaTeX_SansSerif-Bold.woff │ ├── KaTeX_SansSerif-Bold.woff2 │ ├── KaTeX_SansSerif-Italic.eot │ ├── KaTeX_SansSerif-Italic.ttf │ ├── KaTeX_SansSerif-Italic.woff │ ├── KaTeX_SansSerif-Italic.woff2 │ ├── KaTeX_SansSerif-Regular.eot │ ├── KaTeX_SansSerif-Regular.ttf │ ├── KaTeX_SansSerif-Regular.woff │ ├── KaTeX_SansSerif-Regular.woff2 │ ├── KaTeX_Script-Regular.eot │ ├── KaTeX_Script-Regular.ttf │ ├── KaTeX_Script-Regular.woff │ ├── KaTeX_Script-Regular.woff2 │ ├── KaTeX_Size1-Regular.eot │ ├── KaTeX_Size1-Regular.ttf │ ├── KaTeX_Size1-Regular.woff │ ├── KaTeX_Size1-Regular.woff2 │ ├── KaTeX_Size2-Regular.eot │ ├── KaTeX_Size2-Regular.ttf │ ├── KaTeX_Size2-Regular.woff │ ├── KaTeX_Size2-Regular.woff2 │ ├── KaTeX_Size3-Regular.eot │ ├── KaTeX_Size3-Regular.ttf │ ├── KaTeX_Size3-Regular.woff │ ├── KaTeX_Size3-Regular.woff2 │ ├── KaTeX_Size4-Regular.eot │ ├── KaTeX_Size4-Regular.ttf │ ├── KaTeX_Size4-Regular.woff │ ├── KaTeX_Size4-Regular.woff2 │ ├── KaTeX_Typewriter-Regular.eot │ ├── KaTeX_Typewriter-Regular.ttf │ ├── KaTeX_Typewriter-Regular.woff │ └── KaTeX_Typewriter-Regular.woff2 ├── github.svg ├── index.html.jinja ├── katex.min.css ├── katex.min.js ├── remark.min.js ├── requirements-slides.txt ├── style.css └── twitter.svg ├── environment.yml ├── images ├── .gitkeep ├── bag_of_words.png ├── calibration-regression.svg ├── calibration_regression.png ├── countvectorizer-ngrams.png ├── countvectorizer-ngrams.svg ├── countvectorizer.png ├── countvectorizer.svg ├── favicon_org.png ├── glm_unit_deviance.png ├── pipeline.svg ├── poisson_gamma_tweedie_distributions.png ├── resampling_approches.png ├── scikit-learn-logo-notext.png └── single_words.png ├── index.html ├── maint_tools └── check_notebooks.sh ├── make.py ├── notebooks ├── .gitkeep ├── 01-text-data.ipynb ├── 02-imbalanced-data.ipynb ├── 03-poisson-regression.ipynb ├── data │ ├── .gitkeep │ ├── claims.csv │ ├── london_bikes.csv │ └── review_polarity.tar.gz ├── images │ ├── kfold_cv.svg │ └── smote_generated.png └── solutions │ ├── 01-ex01-solutions.py │ ├── 01-ex02-solutions.py │ ├── 01-ex03-solutions.py │ ├── 02-ex01-solutions.py │ ├── 02-ex02-solutions.py │ ├── 02-ex03-solutions.py │ ├── 03-ex01-solutions.py │ ├── 03-ex02-solutions.py │ └── 03-ex03-solutions.py ├── requirements.txt └── slides.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | notebooks/data/train 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # pytype static type analyzer 137 | .pytype/ 138 | 139 | # Cython debug symbols 140 | cython_debug/ 141 | 142 | notebooks/data/txt_sentoken 143 | -------------------------------------------------------------------------------- /BUILDING.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ## How do I build slides? 4 | 5 | Install the dependencies: `pip install -r assets/requirements-slides.txt`. 6 | 7 | ```py 8 | python make.py build 9 | ``` 10 | 11 | Remember to rebuild when `slides.md` get updated. 12 | 13 | ## How do I develop and live reload? 14 | 15 | ```py 16 | python make.py live 17 | ``` 18 | 19 | ## How to host on github pages? 20 | 21 | 1. Go to settings. 22 | 2. Enable GitHub Pages. 23 | 24 | ## How to change my favicon? 25 | 26 | Replace favicon with something else 27 | 28 | ## How to save as pdf? 29 | 30 | 1. Install decktape 31 | 32 | ```bash 33 | npm install -g decktape 34 | ``` 35 | 36 | 2. Run decktape 37 | 38 | ```bash 39 | decktape "http://localhost:5500" slides.pdf 40 | ``` 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Thomas Fan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean check 2 | 3 | clean: 4 | jupyter nbconvert --clear-output --inplace notebooks/0*.ipynb 5 | 6 | check: 7 | bash maint_tools/check_notebooks.sh 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Advanced Machine Learning with scikit-learn 2 | ### Text Data, Imbalanced Data, and Poisson Regression 3 | 4 | *By Thomas J. Fan* 5 | 6 | [Link to slides](https://thomasjpfan.github.io/ml-workshop-advanced/) 7 | 8 | Scikit-learn is a Python machine learning library used by data science practitioners from many disciplines. During this training, we will learn about processing text data, working with imbalanced data, and Poisson regression. We will start by processing text data with scikit-learn's vectorizers. Since the output of these vectorizers is sparse, we will also review scikit-learn estimators that can handle sparse data. We will look at estimators with class weights, resampling techniques provided by imbalanced-learn, and using a bagging classifier with balancing. Next, we will explore how to work with imbalanced data where one of the classes appears more frequently than the others. Finally, we will learn about generalized linear models focusing on Poisson regression. Poisson regression models target distributions that are counts or relative frequencies. We will use tree-based models such as Histogram-based Gradient Boosted Trees with a Poisson loss to model relative frequencies. 9 | 10 | ## Obtaining the Material 11 | 12 | ### With git 13 | 14 | The most convenient way to download the material is with git: 15 | 16 | ```bash 17 | git clone https://github.com/thomasjpfan/ml-workshop-advanced 18 | ``` 19 | 20 | Please note that I may add and improve the material until shortly before the session. You can update your copy by running: 21 | 22 | ```bash 23 | git pull origin master 24 | ``` 25 | 26 | ### Download zip 27 | 28 | If you are not familiar with git, you can download this repository as a zip file at: [github.com/thomasjpfan/ml-workshop-advanced/archive/master.zip](https://github.com/thomasjpfan/ml-workshop-advanced/archive/master.zip). Please note that I may add and improve the material until shortly before the session. To update your copy please re-download the material a day before the session. 29 | 30 | ## Running the notebooks 31 | 32 | ### Local Installation 33 | 34 | Local installation requires `conda` to be installed on your machine. The simplest way to install `conda` is to install `miniconda` by using an installer for your operating system provided at [docs.conda.io/en/latest/miniconda.html](https://docs.conda.io/en/latest/miniconda.html). After `conda` is installed, navigate to this repository on your local machine: 35 | 36 | ```bash 37 | cd ml-workshop-advanced 38 | ``` 39 | 40 | Then download and install the dependencies: 41 | 42 | ```bash 43 | conda env create -f environment.yml 44 | ``` 45 | 46 | This will create a virtual environment named `ml-workshop-advanced`. To activate this environment: 47 | 48 | ```bash 49 | conda activate ml-workshop-advanced 50 | ``` 51 | 52 | Finally, to start `jupyterlab` run: 53 | 54 | ```bash 55 | jupyter lab 56 | ``` 57 | 58 | This should open a browser window with the `jupterlab` interface. 59 | 60 | ### Run with Google's Colab 61 | 62 | If you have any issues with installing `conda` or running `jupyter` on your local computer, then you can run the notebooks on Google's Colab: 63 | 64 | 1. [Text Data](https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/01-text-data.ipynb) 65 | 2. [Imbalanced data](https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/02-imbalanced-data.ipynb) 66 | 3. [Poisson regression](https://colab.research.google.com/github/thomasjpfan/ml-workshop-advanced/blob/master/notebooks/03-poisson-regression.ipynb) 67 | 68 | ## License 69 | 70 | This repo is under the [MIT License](LICENSE). 71 | -------------------------------------------------------------------------------- /assets/auto-render.min.js: -------------------------------------------------------------------------------- 1 | (function(e){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=e()}else if(typeof define==="function"&&define.amd){define([],e)}else{var t;if(typeof window!=="undefined"){t=window}else if(typeof global!=="undefined"){t=global}else if(typeof self!=="undefined"){t=self}else{t=this}t.renderMathInElement=e()}})(function(){var e,t,n;return function e(t,n,r){function o(c,a){if(!n[c]){if(!t[c]){var u=typeof require=="function"&&require;if(!a&&u)return u(c,!0);if(i)return i(c,!0);var f=new Error("Cannot find module '"+c+"'");throw f.code="MODULE_NOT_FOUND",f}var s=n[c]={exports:{}};t[c][0].call(s.exports,function(e){var n=t[c][1][e];return o(n?n:e)},s,s.exports,e,t,n,r)}return n[c].exports}var i=typeof require=="function"&&require;for(var c=0;cf){s=a[f++];if(s!=s)return true}else for(;u>f;f++)if(e||f in a){if(a[f]===n)return e||f||0}return!e&&-1}}},{"./_to-index":32,"./_to-iobject":34,"./_to-length":35}],8:[function(e,t,n){var r={}.toString;t.exports=function(e){return r.call(e).slice(8,-1)}},{}],9:[function(e,t,n){var r=t.exports={version:"2.4.0"};if(typeof __e=="number")__e=r},{}],10:[function(e,t,n){var r=e("./_a-function");t.exports=function(e,t,n){r(e);if(t===undefined)return e;switch(n){case 1:return function(n){return e.call(t,n)};case 2:return function(n,r){return e.call(t,n,r)};case 3:return function(n,r,o){return e.call(t,n,r,o)}}return function(){return e.apply(t,arguments)}}},{"./_a-function":5}],11:[function(e,t,n){t.exports=function(e){if(e==undefined)throw TypeError("Can't call method on "+e);return e}},{}],12:[function(e,t,n){t.exports=!e("./_fails")(function(){return Object.defineProperty({},"a",{get:function(){return 7}}).a!=7})},{"./_fails":16}],13:[function(e,t,n){var r=e("./_is-object"),o=e("./_global").document,i=r(o)&&r(o.createElement);t.exports=function(e){return i?o.createElement(e):{}}},{"./_global":17,"./_is-object":22}],14:[function(e,t,n){t.exports="constructor,hasOwnProperty,isPrototypeOf,propertyIsEnumerable,toLocaleString,toString,valueOf".split(",")},{}],15:[function(e,t,n){var r=e("./_global"),o=e("./_core"),i=e("./_ctx"),c=e("./_hide"),a="prototype";var u=function(e,t,n){var f=e&u.F,s=e&u.G,l=e&u.S,p=e&u.P,d=e&u.B,_=e&u.W,v=s?o:o[t]||(o[t]={}),h=v[a],b=s?r:l?r[t]:(r[t]||{})[a],y,g,x;if(s)n=t;for(y in n){g=!f&&b&&b[y]!==undefined;if(g&&y in v)continue;x=g?b[y]:n[y];v[y]=s&&typeof b[y]!="function"?n[y]:d&&g?i(x,r):_&&b[y]==x?function(e){var t=function(t,n,r){if(this instanceof e){switch(arguments.length){case 0:return new e;case 1:return new e(t);case 2:return new e(t,n)}return new e(t,n,r)}return e.apply(this,arguments)};t[a]=e[a];return t}(x):p&&typeof x=="function"?i(Function.call,x):x;if(p){(v.virtual||(v.virtual={}))[y]=x;if(e&u.R&&h&&!h[y])c(h,y,x)}}};u.F=1;u.G=2;u.S=4;u.P=8;u.B=16;u.W=32;u.U=64;u.R=128;t.exports=u},{"./_core":9,"./_ctx":10,"./_global":17,"./_hide":19}],16:[function(e,t,n){t.exports=function(e){try{return!!e()}catch(e){return true}}},{}],17:[function(e,t,n){var r=t.exports=typeof window!="undefined"&&window.Math==Math?window:typeof self!="undefined"&&self.Math==Math?self:Function("return this")();if(typeof __g=="number")__g=r},{}],18:[function(e,t,n){var r={}.hasOwnProperty;t.exports=function(e,t){return r.call(e,t)}},{}],19:[function(e,t,n){var r=e("./_object-dp"),o=e("./_property-desc");t.exports=e("./_descriptors")?function(e,t,n){return r.f(e,t,o(1,n))}:function(e,t,n){e[t]=n;return e}},{"./_descriptors":12,"./_object-dp":24,"./_property-desc":29}],20:[function(e,t,n){t.exports=!e("./_descriptors")&&!e("./_fails")(function(){return Object.defineProperty(e("./_dom-create")("div"),"a",{get:function(){return 7}}).a!=7})},{"./_descriptors":12,"./_dom-create":13,"./_fails":16}],21:[function(e,t,n){var r=e("./_cof");t.exports=Object("z").propertyIsEnumerable(0)?Object:function(e){return r(e)=="String"?e.split(""):Object(e)}},{"./_cof":8}],22:[function(e,t,n){t.exports=function(e){return typeof e==="object"?e!==null:typeof e==="function"}},{}],23:[function(e,t,n){"use strict";var r=e("./_object-keys"),o=e("./_object-gops"),i=e("./_object-pie"),c=e("./_to-object"),a=e("./_iobject"),u=Object.assign;t.exports=!u||e("./_fails")(function(){var e={},t={},n=Symbol(),r="abcdefghijklmnopqrst";e[n]=7;r.split("").forEach(function(e){t[e]=e});return u({},e)[n]!=7||Object.keys(u({},t)).join("")!=r})?function e(t,n){var u=c(t),f=arguments.length,s=1,l=o.f,p=i.f;while(f>s){var d=a(arguments[s++]),_=l?r(d).concat(l(d)):r(d),v=_.length,h=0,b;while(v>h)if(p.call(d,b=_[h++]))u[b]=d[b]}return u}:u},{"./_fails":16,"./_iobject":21,"./_object-gops":25,"./_object-keys":27,"./_object-pie":28,"./_to-object":36}],24:[function(e,t,n){var r=e("./_an-object"),o=e("./_ie8-dom-define"),i=e("./_to-primitive"),c=Object.defineProperty;n.f=e("./_descriptors")?Object.defineProperty:function e(t,n,a){r(t);n=i(n,true);r(a);if(o)try{return c(t,n,a)}catch(e){}if("get"in a||"set"in a)throw TypeError("Accessors not supported!");if("value"in a)t[n]=a.value;return t}},{"./_an-object":6,"./_descriptors":12,"./_ie8-dom-define":20,"./_to-primitive":37}],25:[function(e,t,n){n.f=Object.getOwnPropertySymbols},{}],26:[function(e,t,n){var r=e("./_has"),o=e("./_to-iobject"),i=e("./_array-includes")(false),c=e("./_shared-key")("IE_PROTO");t.exports=function(e,t){var n=o(e),a=0,u=[],f;for(f in n)if(f!=c)r(n,f)&&u.push(f);while(t.length>a)if(r(n,f=t[a++])){~i(u,f)||u.push(f)}return u}},{"./_array-includes":7,"./_has":18,"./_shared-key":30,"./_to-iobject":34}],27:[function(e,t,n){var r=e("./_object-keys-internal"),o=e("./_enum-bug-keys");t.exports=Object.keys||function e(t){return r(t,o)}},{"./_enum-bug-keys":14,"./_object-keys-internal":26}],28:[function(e,t,n){n.f={}.propertyIsEnumerable},{}],29:[function(e,t,n){t.exports=function(e,t){return{enumerable:!(e&1),configurable:!(e&2),writable:!(e&4),value:t}}},{}],30:[function(e,t,n){var r=e("./_shared")("keys"),o=e("./_uid");t.exports=function(e){return r[e]||(r[e]=o(e))}},{"./_shared":31,"./_uid":38}],31:[function(e,t,n){var r=e("./_global"),o="__core-js_shared__",i=r[o]||(r[o]={});t.exports=function(e){return i[e]||(i[e]={})}},{"./_global":17}],32:[function(e,t,n){var r=e("./_to-integer"),o=Math.max,i=Math.min;t.exports=function(e,t){e=r(e);return e<0?o(e+t,0):i(e,t)}},{"./_to-integer":33}],33:[function(e,t,n){var r=Math.ceil,o=Math.floor;t.exports=function(e){return isNaN(e=+e)?0:(e>0?o:r)(e)}},{}],34:[function(e,t,n){var r=e("./_iobject"),o=e("./_defined");t.exports=function(e){return r(o(e))}},{"./_defined":11,"./_iobject":21}],35:[function(e,t,n){var r=e("./_to-integer"),o=Math.min;t.exports=function(e){return e>0?o(r(e),9007199254740991):0}},{"./_to-integer":33}],36:[function(e,t,n){var r=e("./_defined");t.exports=function(e){return Object(r(e))}},{"./_defined":11}],37:[function(e,t,n){var r=e("./_is-object");t.exports=function(e,t){if(!r(e))return e;var n,o;if(t&&typeof(n=e.toString)=="function"&&!r(o=n.call(e)))return o;if(typeof(n=e.valueOf)=="function"&&!r(o=n.call(e)))return o;if(!t&&typeof(n=e.toString)=="function"&&!r(o=n.call(e)))return o;throw TypeError("Can't convert object to primitive value")}},{"./_is-object":22}],38:[function(e,t,n){var r=0,o=Math.random();t.exports=function(e){return"Symbol(".concat(e===undefined?"":e,")_",(++r+o).toString(36))}},{}],39:[function(e,t,n){var r=e("./_export");r(r.S+r.F,"Object",{assign:e("./_object-assign")})},{"./_export":15,"./_object-assign":23}]},{},[1])(1)}); 2 | -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_AMS-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Caligraphic-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Fraktur-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Italic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Main-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-BoldItalic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Italic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Math-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Italic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_SansSerif-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Script-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size1-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size2-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size3-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Size4-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/assets/fonts/KaTeX_Typewriter-Regular.woff2 -------------------------------------------------------------------------------- /assets/github.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /assets/index.html.jinja: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ title }} 6 | 7 | 8 | 9 | {% if use_katex %} 10 | 11 | 12 | 13 | {% endif %} 14 | 15 | 16 | 17 | 18 | 19 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /assets/katex.min.css: -------------------------------------------------------------------------------- 1 | @font-face{font-family:KaTeX_AMS;src:url(fonts/KaTeX_AMS-Regular.woff2) format("woff2"),url(fonts/KaTeX_AMS-Regular.woff) format("woff"),url(fonts/KaTeX_AMS-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Caligraphic;src:url(fonts/KaTeX_Caligraphic-Bold.woff2) format("woff2"),url(fonts/KaTeX_Caligraphic-Bold.woff) format("woff"),url(fonts/KaTeX_Caligraphic-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Caligraphic;src:url(fonts/KaTeX_Caligraphic-Regular.woff2) format("woff2"),url(fonts/KaTeX_Caligraphic-Regular.woff) format("woff"),url(fonts/KaTeX_Caligraphic-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Fraktur;src:url(fonts/KaTeX_Fraktur-Bold.woff2) format("woff2"),url(fonts/KaTeX_Fraktur-Bold.woff) format("woff"),url(fonts/KaTeX_Fraktur-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Fraktur;src:url(fonts/KaTeX_Fraktur-Regular.woff2) format("woff2"),url(fonts/KaTeX_Fraktur-Regular.woff) format("woff"),url(fonts/KaTeX_Fraktur-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Bold.woff2) format("woff2"),url(fonts/KaTeX_Main-Bold.woff) format("woff"),url(fonts/KaTeX_Main-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-BoldItalic.woff2) format("woff2"),url(fonts/KaTeX_Main-BoldItalic.woff) format("woff"),url(fonts/KaTeX_Main-BoldItalic.ttf) format("truetype");font-weight:700;font-style:italic}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Italic.woff2) format("woff2"),url(fonts/KaTeX_Main-Italic.woff) format("woff"),url(fonts/KaTeX_Main-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Regular.woff2) format("woff2"),url(fonts/KaTeX_Main-Regular.woff) format("woff"),url(fonts/KaTeX_Main-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Math;src:url(fonts/KaTeX_Math-BoldItalic.woff2) format("woff2"),url(fonts/KaTeX_Math-BoldItalic.woff) format("woff"),url(fonts/KaTeX_Math-BoldItalic.ttf) format("truetype");font-weight:700;font-style:italic}@font-face{font-family:KaTeX_Math;src:url(fonts/KaTeX_Math-Italic.woff2) format("woff2"),url(fonts/KaTeX_Math-Italic.woff) format("woff"),url(fonts/KaTeX_Math-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Bold.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Bold.woff) format("woff"),url(fonts/KaTeX_SansSerif-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Italic.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Italic.woff) format("woff"),url(fonts/KaTeX_SansSerif-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Regular.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Regular.woff) format("woff"),url(fonts/KaTeX_SansSerif-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Script;src:url(fonts/KaTeX_Script-Regular.woff2) format("woff2"),url(fonts/KaTeX_Script-Regular.woff) format("woff"),url(fonts/KaTeX_Script-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size1;src:url(fonts/KaTeX_Size1-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size1-Regular.woff) format("woff"),url(fonts/KaTeX_Size1-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size2;src:url(fonts/KaTeX_Size2-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size2-Regular.woff) format("woff"),url(fonts/KaTeX_Size2-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size3;src:url(fonts/KaTeX_Size3-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size3-Regular.woff) format("woff"),url(fonts/KaTeX_Size3-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size4;src:url(fonts/KaTeX_Size4-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size4-Regular.woff) format("woff"),url(fonts/KaTeX_Size4-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Typewriter;src:url(fonts/KaTeX_Typewriter-Regular.woff2) format("woff2"),url(fonts/KaTeX_Typewriter-Regular.woff) format("woff"),url(fonts/KaTeX_Typewriter-Regular.ttf) format("truetype");font-weight:400;font-style:normal}.katex{font:normal 1.21em KaTeX_Main,Times New Roman,serif;line-height:1.2;text-indent:0;text-rendering:auto}.katex *{-ms-high-contrast-adjust:none!important}.katex .katex-version:after{content:"0.11.1"}.katex .katex-mathml{position:absolute;clip:rect(1px,1px,1px,1px);padding:0;border:0;height:1px;width:1px;overflow:hidden}.katex .katex-html>.newline{display:block}.katex .base{position:relative;white-space:nowrap;width:min-content}.katex .base,.katex .strut{display:inline-block}.katex .textbf{font-weight:700}.katex .textit{font-style:italic}.katex .textrm{font-family:KaTeX_Main}.katex .textsf{font-family:KaTeX_SansSerif}.katex .texttt{font-family:KaTeX_Typewriter}.katex .mathdefault{font-family:KaTeX_Math;font-style:italic}.katex .mathit{font-family:KaTeX_Main;font-style:italic}.katex .mathrm{font-style:normal}.katex .mathbf{font-family:KaTeX_Main;font-weight:700}.katex .boldsymbol{font-family:KaTeX_Math;font-weight:700;font-style:italic}.katex .amsrm,.katex .mathbb,.katex .textbb{font-family:KaTeX_AMS}.katex .mathcal{font-family:KaTeX_Caligraphic}.katex .mathfrak,.katex .textfrak{font-family:KaTeX_Fraktur}.katex .mathtt{font-family:KaTeX_Typewriter}.katex .mathscr,.katex .textscr{font-family:KaTeX_Script}.katex .mathsf,.katex .textsf{font-family:KaTeX_SansSerif}.katex .mathboldsf,.katex .textboldsf{font-family:KaTeX_SansSerif;font-weight:700}.katex .mathitsf,.katex .textitsf{font-family:KaTeX_SansSerif;font-style:italic}.katex .mainrm{font-family:KaTeX_Main;font-style:normal}.katex .vlist-t{display:inline-table;table-layout:fixed}.katex .vlist-r{display:table-row}.katex .vlist{display:table-cell;vertical-align:bottom;position:relative}.katex .vlist>span{display:block;height:0;position:relative}.katex .vlist>span>span{display:inline-block}.katex .vlist>span>.pstrut{overflow:hidden;width:0}.katex .vlist-t2{margin-right:-2px}.katex .vlist-s{display:table-cell;vertical-align:bottom;font-size:1px;width:2px;min-width:2px}.katex .msupsub{text-align:left}.katex .mfrac>span>span{text-align:center}.katex .mfrac .frac-line{display:inline-block;width:100%;border-bottom-style:solid}.katex .hdashline,.katex .hline,.katex .mfrac .frac-line,.katex .overline .overline-line,.katex .rule,.katex .underline .underline-line{min-height:1px}.katex .mspace{display:inline-block}.katex .clap,.katex .llap,.katex .rlap{width:0;position:relative}.katex .clap>.inner,.katex .llap>.inner,.katex .rlap>.inner{position:absolute}.katex .clap>.fix,.katex .llap>.fix,.katex .rlap>.fix{display:inline-block}.katex .llap>.inner{right:0}.katex .clap>.inner,.katex .rlap>.inner{left:0}.katex .clap>.inner>span{margin-left:-50%;margin-right:50%}.katex .rule{display:inline-block;border:0 solid;position:relative}.katex .hline,.katex .overline .overline-line,.katex .underline .underline-line{display:inline-block;width:100%;border-bottom-style:solid}.katex .hdashline{display:inline-block;width:100%;border-bottom-style:dashed}.katex .sqrt>.root{margin-left:.27777778em;margin-right:-.55555556em}.katex .fontsize-ensurer.reset-size1.size1,.katex .sizing.reset-size1.size1{font-size:1em}.katex .fontsize-ensurer.reset-size1.size2,.katex .sizing.reset-size1.size2{font-size:1.2em}.katex .fontsize-ensurer.reset-size1.size3,.katex .sizing.reset-size1.size3{font-size:1.4em}.katex .fontsize-ensurer.reset-size1.size4,.katex .sizing.reset-size1.size4{font-size:1.6em}.katex .fontsize-ensurer.reset-size1.size5,.katex .sizing.reset-size1.size5{font-size:1.8em}.katex .fontsize-ensurer.reset-size1.size6,.katex .sizing.reset-size1.size6{font-size:2em}.katex .fontsize-ensurer.reset-size1.size7,.katex .sizing.reset-size1.size7{font-size:2.4em}.katex .fontsize-ensurer.reset-size1.size8,.katex .sizing.reset-size1.size8{font-size:2.88em}.katex .fontsize-ensurer.reset-size1.size9,.katex .sizing.reset-size1.size9{font-size:3.456em}.katex .fontsize-ensurer.reset-size1.size10,.katex .sizing.reset-size1.size10{font-size:4.148em}.katex .fontsize-ensurer.reset-size1.size11,.katex .sizing.reset-size1.size11{font-size:4.976em}.katex .fontsize-ensurer.reset-size2.size1,.katex .sizing.reset-size2.size1{font-size:.83333333em}.katex .fontsize-ensurer.reset-size2.size2,.katex .sizing.reset-size2.size2{font-size:1em}.katex .fontsize-ensurer.reset-size2.size3,.katex .sizing.reset-size2.size3{font-size:1.16666667em}.katex .fontsize-ensurer.reset-size2.size4,.katex .sizing.reset-size2.size4{font-size:1.33333333em}.katex .fontsize-ensurer.reset-size2.size5,.katex .sizing.reset-size2.size5{font-size:1.5em}.katex .fontsize-ensurer.reset-size2.size6,.katex .sizing.reset-size2.size6{font-size:1.66666667em}.katex .fontsize-ensurer.reset-size2.size7,.katex .sizing.reset-size2.size7{font-size:2em}.katex .fontsize-ensurer.reset-size2.size8,.katex .sizing.reset-size2.size8{font-size:2.4em}.katex .fontsize-ensurer.reset-size2.size9,.katex .sizing.reset-size2.size9{font-size:2.88em}.katex .fontsize-ensurer.reset-size2.size10,.katex .sizing.reset-size2.size10{font-size:3.45666667em}.katex .fontsize-ensurer.reset-size2.size11,.katex .sizing.reset-size2.size11{font-size:4.14666667em}.katex .fontsize-ensurer.reset-size3.size1,.katex .sizing.reset-size3.size1{font-size:.71428571em}.katex .fontsize-ensurer.reset-size3.size2,.katex .sizing.reset-size3.size2{font-size:.85714286em}.katex .fontsize-ensurer.reset-size3.size3,.katex .sizing.reset-size3.size3{font-size:1em}.katex .fontsize-ensurer.reset-size3.size4,.katex .sizing.reset-size3.size4{font-size:1.14285714em}.katex .fontsize-ensurer.reset-size3.size5,.katex .sizing.reset-size3.size5{font-size:1.28571429em}.katex .fontsize-ensurer.reset-size3.size6,.katex .sizing.reset-size3.size6{font-size:1.42857143em}.katex .fontsize-ensurer.reset-size3.size7,.katex .sizing.reset-size3.size7{font-size:1.71428571em}.katex .fontsize-ensurer.reset-size3.size8,.katex .sizing.reset-size3.size8{font-size:2.05714286em}.katex .fontsize-ensurer.reset-size3.size9,.katex .sizing.reset-size3.size9{font-size:2.46857143em}.katex .fontsize-ensurer.reset-size3.size10,.katex .sizing.reset-size3.size10{font-size:2.96285714em}.katex .fontsize-ensurer.reset-size3.size11,.katex .sizing.reset-size3.size11{font-size:3.55428571em}.katex .fontsize-ensurer.reset-size4.size1,.katex .sizing.reset-size4.size1{font-size:.625em}.katex .fontsize-ensurer.reset-size4.size2,.katex .sizing.reset-size4.size2{font-size:.75em}.katex .fontsize-ensurer.reset-size4.size3,.katex .sizing.reset-size4.size3{font-size:.875em}.katex .fontsize-ensurer.reset-size4.size4,.katex .sizing.reset-size4.size4{font-size:1em}.katex .fontsize-ensurer.reset-size4.size5,.katex .sizing.reset-size4.size5{font-size:1.125em}.katex .fontsize-ensurer.reset-size4.size6,.katex .sizing.reset-size4.size6{font-size:1.25em}.katex .fontsize-ensurer.reset-size4.size7,.katex .sizing.reset-size4.size7{font-size:1.5em}.katex .fontsize-ensurer.reset-size4.size8,.katex .sizing.reset-size4.size8{font-size:1.8em}.katex .fontsize-ensurer.reset-size4.size9,.katex .sizing.reset-size4.size9{font-size:2.16em}.katex .fontsize-ensurer.reset-size4.size10,.katex .sizing.reset-size4.size10{font-size:2.5925em}.katex .fontsize-ensurer.reset-size4.size11,.katex .sizing.reset-size4.size11{font-size:3.11em}.katex .fontsize-ensurer.reset-size5.size1,.katex .sizing.reset-size5.size1{font-size:.55555556em}.katex .fontsize-ensurer.reset-size5.size2,.katex .sizing.reset-size5.size2{font-size:.66666667em}.katex .fontsize-ensurer.reset-size5.size3,.katex .sizing.reset-size5.size3{font-size:.77777778em}.katex .fontsize-ensurer.reset-size5.size4,.katex .sizing.reset-size5.size4{font-size:.88888889em}.katex .fontsize-ensurer.reset-size5.size5,.katex .sizing.reset-size5.size5{font-size:1em}.katex .fontsize-ensurer.reset-size5.size6,.katex .sizing.reset-size5.size6{font-size:1.11111111em}.katex .fontsize-ensurer.reset-size5.size7,.katex .sizing.reset-size5.size7{font-size:1.33333333em}.katex .fontsize-ensurer.reset-size5.size8,.katex .sizing.reset-size5.size8{font-size:1.6em}.katex .fontsize-ensurer.reset-size5.size9,.katex .sizing.reset-size5.size9{font-size:1.92em}.katex .fontsize-ensurer.reset-size5.size10,.katex .sizing.reset-size5.size10{font-size:2.30444444em}.katex .fontsize-ensurer.reset-size5.size11,.katex .sizing.reset-size5.size11{font-size:2.76444444em}.katex .fontsize-ensurer.reset-size6.size1,.katex .sizing.reset-size6.size1{font-size:.5em}.katex .fontsize-ensurer.reset-size6.size2,.katex .sizing.reset-size6.size2{font-size:.6em}.katex .fontsize-ensurer.reset-size6.size3,.katex .sizing.reset-size6.size3{font-size:.7em}.katex .fontsize-ensurer.reset-size6.size4,.katex .sizing.reset-size6.size4{font-size:.8em}.katex .fontsize-ensurer.reset-size6.size5,.katex .sizing.reset-size6.size5{font-size:.9em}.katex .fontsize-ensurer.reset-size6.size6,.katex .sizing.reset-size6.size6{font-size:1em}.katex .fontsize-ensurer.reset-size6.size7,.katex .sizing.reset-size6.size7{font-size:1.2em}.katex .fontsize-ensurer.reset-size6.size8,.katex .sizing.reset-size6.size8{font-size:1.44em}.katex .fontsize-ensurer.reset-size6.size9,.katex .sizing.reset-size6.size9{font-size:1.728em}.katex .fontsize-ensurer.reset-size6.size10,.katex .sizing.reset-size6.size10{font-size:2.074em}.katex .fontsize-ensurer.reset-size6.size11,.katex .sizing.reset-size6.size11{font-size:2.488em}.katex .fontsize-ensurer.reset-size7.size1,.katex .sizing.reset-size7.size1{font-size:.41666667em}.katex .fontsize-ensurer.reset-size7.size2,.katex .sizing.reset-size7.size2{font-size:.5em}.katex .fontsize-ensurer.reset-size7.size3,.katex .sizing.reset-size7.size3{font-size:.58333333em}.katex .fontsize-ensurer.reset-size7.size4,.katex .sizing.reset-size7.size4{font-size:.66666667em}.katex .fontsize-ensurer.reset-size7.size5,.katex .sizing.reset-size7.size5{font-size:.75em}.katex .fontsize-ensurer.reset-size7.size6,.katex .sizing.reset-size7.size6{font-size:.83333333em}.katex .fontsize-ensurer.reset-size7.size7,.katex .sizing.reset-size7.size7{font-size:1em}.katex .fontsize-ensurer.reset-size7.size8,.katex .sizing.reset-size7.size8{font-size:1.2em}.katex .fontsize-ensurer.reset-size7.size9,.katex .sizing.reset-size7.size9{font-size:1.44em}.katex .fontsize-ensurer.reset-size7.size10,.katex .sizing.reset-size7.size10{font-size:1.72833333em}.katex .fontsize-ensurer.reset-size7.size11,.katex .sizing.reset-size7.size11{font-size:2.07333333em}.katex .fontsize-ensurer.reset-size8.size1,.katex .sizing.reset-size8.size1{font-size:.34722222em}.katex .fontsize-ensurer.reset-size8.size2,.katex .sizing.reset-size8.size2{font-size:.41666667em}.katex .fontsize-ensurer.reset-size8.size3,.katex .sizing.reset-size8.size3{font-size:.48611111em}.katex .fontsize-ensurer.reset-size8.size4,.katex .sizing.reset-size8.size4{font-size:.55555556em}.katex .fontsize-ensurer.reset-size8.size5,.katex .sizing.reset-size8.size5{font-size:.625em}.katex .fontsize-ensurer.reset-size8.size6,.katex .sizing.reset-size8.size6{font-size:.69444444em}.katex .fontsize-ensurer.reset-size8.size7,.katex .sizing.reset-size8.size7{font-size:.83333333em}.katex .fontsize-ensurer.reset-size8.size8,.katex .sizing.reset-size8.size8{font-size:1em}.katex .fontsize-ensurer.reset-size8.size9,.katex .sizing.reset-size8.size9{font-size:1.2em}.katex .fontsize-ensurer.reset-size8.size10,.katex .sizing.reset-size8.size10{font-size:1.44027778em}.katex .fontsize-ensurer.reset-size8.size11,.katex .sizing.reset-size8.size11{font-size:1.72777778em}.katex .fontsize-ensurer.reset-size9.size1,.katex .sizing.reset-size9.size1{font-size:.28935185em}.katex .fontsize-ensurer.reset-size9.size2,.katex .sizing.reset-size9.size2{font-size:.34722222em}.katex .fontsize-ensurer.reset-size9.size3,.katex .sizing.reset-size9.size3{font-size:.40509259em}.katex .fontsize-ensurer.reset-size9.size4,.katex .sizing.reset-size9.size4{font-size:.46296296em}.katex .fontsize-ensurer.reset-size9.size5,.katex .sizing.reset-size9.size5{font-size:.52083333em}.katex .fontsize-ensurer.reset-size9.size6,.katex .sizing.reset-size9.size6{font-size:.5787037em}.katex .fontsize-ensurer.reset-size9.size7,.katex .sizing.reset-size9.size7{font-size:.69444444em}.katex .fontsize-ensurer.reset-size9.size8,.katex .sizing.reset-size9.size8{font-size:.83333333em}.katex .fontsize-ensurer.reset-size9.size9,.katex .sizing.reset-size9.size9{font-size:1em}.katex .fontsize-ensurer.reset-size9.size10,.katex .sizing.reset-size9.size10{font-size:1.20023148em}.katex .fontsize-ensurer.reset-size9.size11,.katex .sizing.reset-size9.size11{font-size:1.43981481em}.katex .fontsize-ensurer.reset-size10.size1,.katex .sizing.reset-size10.size1{font-size:.24108004em}.katex .fontsize-ensurer.reset-size10.size2,.katex .sizing.reset-size10.size2{font-size:.28929605em}.katex .fontsize-ensurer.reset-size10.size3,.katex .sizing.reset-size10.size3{font-size:.33751205em}.katex .fontsize-ensurer.reset-size10.size4,.katex .sizing.reset-size10.size4{font-size:.38572806em}.katex .fontsize-ensurer.reset-size10.size5,.katex .sizing.reset-size10.size5{font-size:.43394407em}.katex .fontsize-ensurer.reset-size10.size6,.katex .sizing.reset-size10.size6{font-size:.48216008em}.katex .fontsize-ensurer.reset-size10.size7,.katex .sizing.reset-size10.size7{font-size:.57859209em}.katex .fontsize-ensurer.reset-size10.size8,.katex .sizing.reset-size10.size8{font-size:.69431051em}.katex .fontsize-ensurer.reset-size10.size9,.katex .sizing.reset-size10.size9{font-size:.83317261em}.katex .fontsize-ensurer.reset-size10.size10,.katex .sizing.reset-size10.size10{font-size:1em}.katex .fontsize-ensurer.reset-size10.size11,.katex .sizing.reset-size10.size11{font-size:1.19961427em}.katex .fontsize-ensurer.reset-size11.size1,.katex .sizing.reset-size11.size1{font-size:.20096463em}.katex .fontsize-ensurer.reset-size11.size2,.katex .sizing.reset-size11.size2{font-size:.24115756em}.katex .fontsize-ensurer.reset-size11.size3,.katex .sizing.reset-size11.size3{font-size:.28135048em}.katex .fontsize-ensurer.reset-size11.size4,.katex .sizing.reset-size11.size4{font-size:.32154341em}.katex .fontsize-ensurer.reset-size11.size5,.katex .sizing.reset-size11.size5{font-size:.36173633em}.katex .fontsize-ensurer.reset-size11.size6,.katex .sizing.reset-size11.size6{font-size:.40192926em}.katex .fontsize-ensurer.reset-size11.size7,.katex .sizing.reset-size11.size7{font-size:.48231511em}.katex .fontsize-ensurer.reset-size11.size8,.katex .sizing.reset-size11.size8{font-size:.57877814em}.katex .fontsize-ensurer.reset-size11.size9,.katex .sizing.reset-size11.size9{font-size:.69453376em}.katex .fontsize-ensurer.reset-size11.size10,.katex .sizing.reset-size11.size10{font-size:.83360129em}.katex .fontsize-ensurer.reset-size11.size11,.katex .sizing.reset-size11.size11{font-size:1em}.katex .delimsizing.size1{font-family:KaTeX_Size1}.katex .delimsizing.size2{font-family:KaTeX_Size2}.katex .delimsizing.size3{font-family:KaTeX_Size3}.katex .delimsizing.size4{font-family:KaTeX_Size4}.katex .delimsizing.mult .delim-size1>span{font-family:KaTeX_Size1}.katex .delimsizing.mult .delim-size4>span{font-family:KaTeX_Size4}.katex .nulldelimiter{display:inline-block;width:.12em}.katex .delimcenter,.katex .op-symbol{position:relative}.katex .op-symbol.small-op{font-family:KaTeX_Size1}.katex .op-symbol.large-op{font-family:KaTeX_Size2}.katex .op-limits>.vlist-t{text-align:center}.katex .accent>.vlist-t{text-align:center}.katex .accent .accent-body{position:relative}.katex .accent .accent-body:not(.accent-full){width:0}.katex .overlay{display:block}.katex .mtable .vertical-separator{display:inline-block;min-width:1px}.katex .mtable .arraycolsep{display:inline-block}.katex .mtable .col-align-c>.vlist-t{text-align:center}.katex .mtable .col-align-l>.vlist-t{text-align:left}.katex .mtable .col-align-r>.vlist-t{text-align:right}.katex .svg-align{text-align:left}.katex svg{display:block;position:absolute;width:100%;height:inherit;fill:currentColor;stroke:currentColor;fill-rule:nonzero;fill-opacity:1;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1}.katex svg path{stroke:none}.katex img{border-style:none;min-width:0;min-height:0;max-width:none;max-height:none}.katex .stretchy{width:100%;display:block;position:relative;overflow:hidden}.katex .stretchy:after,.katex .stretchy:before{content:""}.katex .hide-tail{width:100%;position:relative;overflow:hidden}.katex .halfarrow-left{position:absolute;left:0;width:50.2%;overflow:hidden}.katex .halfarrow-right{position:absolute;right:0;width:50.2%;overflow:hidden}.katex .brace-left{position:absolute;left:0;width:25.1%;overflow:hidden}.katex .brace-center{position:absolute;left:25%;width:50%;overflow:hidden}.katex .brace-right{position:absolute;right:0;width:25.1%;overflow:hidden}.katex .x-arrow-pad{padding:0 .5em}.katex .mover,.katex .munder,.katex .x-arrow{text-align:center}.katex .boxpad{padding:0 .3em}.katex .fbox,.katex .fcolorbox{box-sizing:border-box;border:.04em solid}.katex .cancel-pad{padding:0 .2em}.katex .cancel-lap{margin-left:-.2em;margin-right:-.2em}.katex .sout{border-bottom-style:solid;border-bottom-width:.08em}.katex-display{display:block;margin:1em 0;text-align:center}.katex-display>.katex{display:block;text-align:center;white-space:nowrap}.katex-display>.katex>.katex-html{display:block;position:relative}.katex-display>.katex>.katex-html>.tag{position:absolute;right:0}.katex-display.leqno>.katex>.katex-html>.tag{left:0;right:auto}.katex-display.fleqn>.katex{text-align:left} 2 | -------------------------------------------------------------------------------- /assets/requirements-slides.txt: -------------------------------------------------------------------------------- 1 | certifi 2 | Jinja2 3 | livereload 4 | MarkupSafe 5 | six 6 | tornado 7 | -------------------------------------------------------------------------------- /assets/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Helvetica, sans-serif; 3 | color: #354046; 4 | line-height: 1.4; 5 | font-size: 16pt; 6 | word-wrap: break-word; 7 | } 8 | 9 | 10 | /* Remark ------------------------------------------------------------------- */ 11 | 12 | .remark-container { 13 | background: #ddd; 14 | } 15 | 16 | .remark-slide-content { 17 | font-size: 1em; 18 | border-top: solid 5px #520b92; 19 | padding-top: 0.5em; 20 | vertical-align: middle; 21 | } 22 | 23 | .remark-slide-content h1 { 24 | font-weight: bold; 25 | font-size: 1.9em; 26 | margin: 0.25em 0; 27 | color: #520b92; 28 | } 29 | 30 | .remark-slide-content h2 { 31 | font-size: 1.5em; 32 | font-weight: bold; 33 | margin: 0.25em 0; 34 | } 35 | 36 | .remark-slide-content h3 { 37 | font-size: 1.2em; 38 | font-weight: bold; 39 | margin: 0.25em 0; 40 | } 41 | 42 | .remark-slide-content p, 43 | .remark-slide-content ul, 44 | .remark-slide-content ol { 45 | font-size: 1.2em; 46 | margin: 0.7em 0; 47 | } 48 | 49 | .remark-slide-number { 50 | font-size: 0.5em; 51 | bottom: 2em; 52 | } 53 | 54 | /* Styles ------------------------------------------------------------------- */ 55 | 56 | a { 57 | color: #008000; 58 | text-decoration: none; 59 | } 60 | 61 | a:hover { 62 | text-decoration: underline; 63 | } 64 | 65 | .italic { 66 | font-style: italic; 67 | } 68 | 69 | .bold { 70 | font-weight: bold; 71 | } 72 | 73 | em { 74 | font-style: italic; 75 | } 76 | 77 | strong { 78 | font-style: normal; 79 | font-weight: bold; 80 | } 81 | 82 | pre { 83 | margin: 0.3em; 84 | } 85 | 86 | .larger { 87 | font-size: 1.3em; 88 | } 89 | 90 | .smaller { 91 | font-size: 0.8em; 92 | } 93 | 94 | .smaller-x { 95 | font-size: 0.5em; 96 | } 97 | 98 | .katex { 99 | color: black; 100 | } 101 | 102 | .black-slide .katex, 103 | .black-slide h1, 104 | .black-slide a { 105 | color: white; 106 | } 107 | 108 | ul>li, 109 | ol>li { 110 | margin: 0.3em 0; 111 | } 112 | 113 | li>p { 114 | margin: 0.2em 0; 115 | } 116 | 117 | ul>li>ul>li { 118 | font-size: 0.8em; 119 | } 120 | 121 | .caption { 122 | display: block; 123 | text-align: center; 124 | font-size: 0.8em; 125 | font-style: italic; 126 | } 127 | 128 | .circle img { 129 | border-radius: 50%; 130 | } 131 | 132 | .stretch img { 133 | width: 100%; 134 | height: auto; 135 | } 136 | 137 | .footnote { 138 | position: absolute; 139 | bottom: 0.5em; 140 | font-size: 0.8em; 141 | opacity: 0.9; 142 | left: 1.5em; 143 | } 144 | 145 | .footnote a { 146 | color: #0288d1; 147 | /*#20707e;*/ 148 | } 149 | 150 | .footnote-back { 151 | position: absolute; 152 | bottom: 0.5em; 153 | font-size: 0.7em; 154 | opacity: 0.9; 155 | right: 1.5em; 156 | } 157 | 158 | .inactive { 159 | color: lightgray; 160 | } 161 | 162 | .success { 163 | background-color: rgba(40, 167, 69, 0.25); 164 | border: 2px solid rgb(40, 167, 69); 165 | border-radius: 5px; 166 | width: 95%; 167 | display: block; 168 | padding: 1em; 169 | } 170 | 171 | .alert { 172 | color: #721c24; 173 | background-color: #f8d7da; 174 | border: 2px solid #f5c6cb; 175 | border-radius: 5px; 176 | width: 95%; 177 | display: block; 178 | padding: 1em; 179 | } 180 | 181 | .hljs-github .hljs-comment { 182 | color: #520b92; 183 | } 184 | 185 | 186 | /* Specials ----------------------------------------------------------------- */ 187 | 188 | .title-slide { 189 | font-size: 1.1em; 190 | text-align: center; 191 | vertical-align: middle; 192 | } 193 | 194 | .title-slide h1 { 195 | margin-top: 1em; 196 | } 197 | 198 | .chapter-slide { 199 | background-color: #520b92; 200 | color: white; 201 | vertical-align: middle; 202 | font-size: 1.2em; 203 | text-align: center; 204 | } 205 | 206 | .chapter-slide h1, 207 | .chapter-slide a { 208 | color: white; 209 | } 210 | 211 | .black-slide { 212 | background-color: black; 213 | color: white; 214 | border-top: solid 0px; 215 | } 216 | 217 | .red-slide { 218 | background-color: red; 219 | color: white; 220 | border-top: solid 0px; 221 | } 222 | 223 | .this-talk-link { 224 | font-size: 0.8em; 225 | display: block; 226 | font-style: italic; 227 | margin-top: 1em; 228 | } 229 | 230 | 231 | blockquote { 232 | font-size: 1.4em; 233 | width: 75%; 234 | margin: 0 auto; 235 | font-style: italic; 236 | color: #555555; 237 | padding: 1.2em 30px 1.2em 75px; 238 | border-left: 8px solid #008000; 239 | line-height: 1.6; 240 | position: relative; 241 | background: #EDEDED; 242 | } 243 | 244 | 245 | blockquote::before { 246 | font-family: Arial; 247 | content: "\201C"; 248 | color: #008000; 249 | font-size: 4em; 250 | position: absolute; 251 | left: 0; 252 | top: -10px; 253 | } 254 | 255 | blockquote::after { 256 | content: ''; 257 | } 258 | 259 | blockquote cite { 260 | display: block; 261 | color: #333333; 262 | font-style: normal; 263 | font-weight: bold; 264 | margin-top: 1em; 265 | } 266 | 267 | .icon { 268 | background-size: cover; 269 | display: inline-block; 270 | transform: scale(1.5); 271 | margin-top: 12px; 272 | height: 24px; 273 | width: 25px; 274 | } 275 | 276 | .icon-left { 277 | margin-right: 16px; 278 | } 279 | 280 | .icon-twitter { 281 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 282 | } 283 | 284 | .icon-twitter-white { 285 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill:white;fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 286 | 287 | } 288 | 289 | .icon-github-white { 290 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0503063,0.209536)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill:white;fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 291 | } 292 | 293 | .icon-github { 294 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0191268,0.228032)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 295 | } 296 | 297 | .icon-github:hover, 298 | .icon-github-white:hover { 299 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0503063,0.209536)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill:rgb(0,128,0);fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 300 | } 301 | 302 | .icon-twitter:hover, 303 | .icon-twitter-white:hover { 304 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill:rgb(0,128,0);fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 305 | 306 | } 307 | 308 | .remark-slide table { 309 | border: none; 310 | border-collapse: collapse; 311 | border-spacing: 0; 312 | color: black; 313 | table-layout: fixed; 314 | } 315 | 316 | .remark-slide table thead { 317 | border-bottom: 1px solid black; 318 | vertical-align: bottom; 319 | } 320 | 321 | .remark-slide table tr, 322 | .remark-slide table th, 323 | .remark-slide table td { 324 | text-align: right; 325 | vertical-align: middle; 326 | padding: 0.5em 0.5em; 327 | line-height: normal; 328 | white-space: normal; 329 | max-width: none; 330 | border: none; 331 | } 332 | 333 | .remark-slide table th { 334 | font-weight: bold; 335 | } 336 | 337 | .remark-slide table tbody tr:nth-child(odd) { 338 | background: #f5f5f5; 339 | } 340 | 341 | .remark-slide table tbody tr:hover { 342 | background: rgba(66, 165, 245, 0.2); 343 | } 344 | 345 | .remark-slide table.alternationg { 346 | border: none; 347 | border-collapse: collapse; 348 | border-spacing: 0; 349 | } 350 | 351 | .remark-slide table.alternating td, 352 | .remark-slide table.alternating tr { 353 | border: none; 354 | } 355 | 356 | .remark-slide table.alternating tbody tr:nth-child(odd) { 357 | background: #f0f0f0; 358 | } 359 | 360 | /* grid */ 361 | 362 | /* Uncomment and set these variables to customize the grid. */ 363 | 364 | .g { 365 | box-sizing: border-box; 366 | display: -ms-flexbox; 367 | display: -webkit-box; 368 | display: flex; 369 | -ms-flex: 0 1 auto; 370 | -webkit-box-flex: 0; 371 | flex: 0 1 auto; 372 | -ms-flex-direction: row; 373 | -webkit-box-orient: horizontal; 374 | -webkit-box-direction: normal; 375 | flex-direction: row; 376 | -ms-flex-wrap: wrap; 377 | flex-wrap: wrap; 378 | margin: 0.5rem -0.5rem; 379 | } 380 | 381 | .g-1 .g-2, 382 | .g-3, 383 | .g-4, 384 | .g-5, 385 | .g-6, 386 | .g-7, 387 | .g-8, 388 | .g-9, 389 | .g-10, 390 | .g-11, 391 | .g-12 { 392 | box-sizing: border-box; 393 | -ms-flex: 0 0 auto; 394 | -webkit-box-flex: 0; 395 | flex: 0 0 auto; 396 | padding-right: 1rem; 397 | padding-left: 1rem; 398 | } 399 | 400 | .g-1 { 401 | -ms-flex-preferred-size: 8.333%; 402 | flex-basis: 8.333%; 403 | max-width: 8.333%; 404 | } 405 | 406 | .g-2 { 407 | -ms-flex-preferred-size: 16.667%; 408 | flex-basis: 16.667%; 409 | max-width: 16.667%; 410 | } 411 | 412 | .g-3 { 413 | -ms-flex-preferred-size: 25%; 414 | flex-basis: 25%; 415 | max-width: 25%; 416 | } 417 | 418 | .g-4 { 419 | -ms-flex-preferred-size: 33.333%; 420 | flex-basis: 33.333%; 421 | max-width: 33.333%; 422 | } 423 | 424 | .g-5 { 425 | -ms-flex-preferred-size: 41.667%; 426 | flex-basis: 41.667%; 427 | max-width: 41.667%; 428 | } 429 | 430 | .g-6 { 431 | -ms-flex-preferred-size: 50%; 432 | flex-basis: 50%; 433 | max-width: 50%; 434 | } 435 | 436 | .g-7 { 437 | -ms-flex-preferred-size: 58.333%; 438 | flex-basis: 58.333%; 439 | max-width: 58.333%; 440 | } 441 | 442 | .g-8 { 443 | -ms-flex-preferred-size: 66.667%; 444 | flex-basis: 66.667%; 445 | max-width: 66.667%; 446 | } 447 | 448 | .g-9 { 449 | -ms-flex-preferred-size: 75%; 450 | flex-basis: 75%; 451 | max-width: 75%; 452 | } 453 | 454 | .g-10 { 455 | -ms-flex-preferred-size: 83.333%; 456 | flex-basis: 83.333%; 457 | max-width: 83.333%; 458 | } 459 | 460 | .g-11 { 461 | -ms-flex-preferred-size: 91.667%; 462 | flex-basis: 91.667%; 463 | max-width: 91.667%; 464 | } 465 | 466 | .g-12 { 467 | -ms-flex-preferred-size: 100%; 468 | flex-basis: 100%; 469 | max-width: 100%; 470 | } 471 | 472 | .g-start { 473 | -ms-flex-pack: start; 474 | -webkit-box-pack: start; 475 | justify-content: flex-start; 476 | text-align: start; 477 | } 478 | 479 | .g-center { 480 | -ms-flex-pack: center; 481 | -webkit-box-pack: center; 482 | justify-content: center; 483 | text-align: center; 484 | } 485 | 486 | .g-end { 487 | -ms-flex-pack: end; 488 | -webkit-box-pack: end; 489 | justify-content: flex-end; 490 | text-align: end; 491 | } 492 | 493 | .g-top { 494 | -ms-flex-align: start; 495 | -webkit-box-align: start; 496 | align-items: flex-start; 497 | } 498 | 499 | .g-middle { 500 | -ms-flex-align: center; 501 | -webkit-box-align: center; 502 | align-items: center; 503 | } 504 | 505 | .g-bottom { 506 | -ms-flex-align: end; 507 | -webkit-box-align: end; 508 | align-items: flex-end; 509 | } 510 | 511 | .g-around { 512 | -ms-flex-pack: distribute; 513 | justify-content: space-around; 514 | } 515 | 516 | .g-between { 517 | -ms-flex-pack: justify; 518 | -webkit-box-pack: justify; 519 | justify-content: space-between; 520 | } 521 | -------------------------------------------------------------------------------- /assets/twitter.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: ml-workshop-advanced 2 | channels: 3 | - conda-forge 4 | - nodefaults 5 | dependencies: 6 | - python=3.9.7 7 | - scikit-learn=1.0.* 8 | - seaborn=0.11.2 9 | - pandas 10 | - jupyterlab=3.1.* 11 | - imbalanced-learn=0.9.* 12 | -------------------------------------------------------------------------------- /images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/.gitkeep -------------------------------------------------------------------------------- /images/bag_of_words.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/bag_of_words.png -------------------------------------------------------------------------------- /images/calibration_regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/calibration_regression.png -------------------------------------------------------------------------------- /images/countvectorizer-ngrams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/countvectorizer-ngrams.png -------------------------------------------------------------------------------- /images/countvectorizer-ngrams.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
"Can we go to the mountain tomorrow?"
"Can we go to the mountain tomorrow?"
["can", "we", "go", "to", "the", "mountain", "tomorrow"]
["can", "we", "go", "to", "the", "mountain", "tomorrow"]
Unigram tokenzier
Unigram tokenzier
"Can we go to the mountain tomorrow?"
"Can we go to the mountain tomorrow?"
["can we", "we go", "go to", "to the", "the mountain", "mountain tomorrow"]
["can we", "we go", "go to", "to the", "the mountain", "mountain tomorr...
Bigram tokenzier
Bigram tokenzier
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /images/countvectorizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/countvectorizer.png -------------------------------------------------------------------------------- /images/countvectorizer.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
"Can we go to the mountain tomorrow?"
"Can we go to the mountain tomorrow?"
["can", "we", "go", "to", "the", "mountain", "tomorrow"]
["can", "we", "go", "to", "the", "mountain", "tomorrow"]
["ants", ..., "tomorrow", ..., "we", ..., "zoo"]
["ants", ..., "tomorrow", ..., "we", ..., "zoo"]
[0,    ...,   0, 1, 0, ..., 0, 1, 0,  ... 0]
[0,    ...,   0, 1, 0, ..., 0, 1, 0,  ... 0]
tokenizer
tokenizer
Build vocab over all documents
Build vocab over all documents
Sparse Matrix Encoding
Sparse Matrix Encoding
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /images/favicon_org.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/favicon_org.png -------------------------------------------------------------------------------- /images/glm_unit_deviance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/glm_unit_deviance.png -------------------------------------------------------------------------------- /images/pipeline.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
pipe = make_pipeline(T1(), T2(), Estimator())
pipe.fit(X, y)
pipe = make_pipeline(T1(), T2(), Estimator())...
T1
T1
T2
T2
Estimator
Estimator
T1.fit(X, y)
T1.fit(X, y)
T1.transform(X)
T1.transform(X)
X
X
y
y
T2.fit(X1, y)
T2.fit(X1, y)
T2.transform(X1)
T2.transform(X1)
X1
X1
Estimator.fit(X2, y)
Estimator.fit(X2, y)
X2
X2
pipe.predict(X')
pipe.predict(X')
T1.transform(X')
T1.transform(X')
X'
X'
T2.transform(X'1)
T2.transform(X'1)
X'1
X'1
Estimator.predict(X'2)
Estimator.predict(X'2)
X'2
X'2
y'
y'
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /images/poisson_gamma_tweedie_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/poisson_gamma_tweedie_distributions.png -------------------------------------------------------------------------------- /images/resampling_approches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/resampling_approches.png -------------------------------------------------------------------------------- /images/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /images/single_words.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/images/single_words.png -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Advanced Machine Learning with scikit-learn: Text Data, Imbalanced Data, and Poisson Regression 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 543 | 544 | 571 | 572 | 573 | -------------------------------------------------------------------------------- /maint_tools/check_notebooks.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | for f in notebooks/0*.ipynb; do 4 | jupyter nbconvert --execute $f --to notebook --stdout >/dev/null 5 | done 6 | -------------------------------------------------------------------------------- /make.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from jinja2 import Template 3 | from livereload import Server 4 | from pathlib import Path 5 | 6 | 7 | def build(): 8 | print("building index.html") 9 | 10 | assets = Path("assets") 11 | 12 | with (assets / "index.html.jinja").open('r') as f: 13 | template = Template(f.read()) 14 | 15 | with open("slides.md", 'r') as f: 16 | slides = f.readlines() 17 | 18 | # get metadata up to the first title 19 | valid_metadata_keys = set(['title', 'use_katex']) 20 | metadata = {} 21 | for line in slides: 22 | 23 | # first title 24 | if line.startswith("#") or len(metadata) == len(valid_metadata_keys): 25 | break 26 | line_split = line.split(":", maxsplit=1) 27 | if len(line_split) != 2: 28 | continue 29 | 30 | key = line_split[0] 31 | if key not in valid_metadata_keys: 32 | continue 33 | 34 | value = line_split[1].strip() 35 | metadata[key] = value 36 | 37 | if len(metadata) != 2: 38 | raise ValueError("Be sure to include title: and use_katex as metadata " 39 | "in slides.md file") 40 | 41 | output = template.render(title=metadata['title'], 42 | use_katex=metadata['use_katex'] == 'True', 43 | slides="".join(slides)) 44 | 45 | with open("index.html", 'w') as f: 46 | f.write(output) 47 | 48 | 49 | def live(): 50 | print("Serving index.html") 51 | cur_dir = Path('.') 52 | 53 | server = Server() 54 | server.watch("slides.md", build) 55 | server.watch(str(cur_dir / 'assets' / "style.css")) 56 | server.serve(open_url_delay=2) 57 | 58 | 59 | if __name__ == '__main__': 60 | parser = argparse.ArgumentParser(description="Building slides") 61 | parser.add_argument("action", choices=['build', 'live']) 62 | 63 | args = parser.parse_args() 64 | 65 | if args.action == 'build': 66 | build() 67 | else: 68 | live() 69 | -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/.gitkeep -------------------------------------------------------------------------------- /notebooks/02-imbalanced-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Imbalanced data\n", 8 | "\n", 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# Install dependencies for google colab\n", 19 | "import sys\n", 20 | "IN_COLAB = 'google.colab' in sys.modules\n", 21 | "if IN_COLAB:\n", 22 | " %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/requirements.txt" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import sklearn\n", 32 | "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\"" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import numpy as np\n", 42 | "import pandas as pd\n", 43 | "import matplotlib.pyplot as plt\n", 44 | "import warnings\n", 45 | "warnings.filterwarnings(\"ignore\", category=UserWarning)\n", 46 | "\n", 47 | "plt.rcParams['font.size'] = 16\n", 48 | "plt.rcParams['figure.figsize'] = [12, 8]\n", 49 | "\n", 50 | "sklearn.set_config(display='diagram')" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Load Mammography Data" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "from sklearn.datasets import fetch_openml\n", 67 | "\n", 68 | "mammography = fetch_openml(data_id=310)\n", 69 | "X, y = mammography.data, mammography.target" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "y = (y == '1').astype(int)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "np.bincount(y)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Split data into train test split" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "from sklearn.model_selection import train_test_split\n", 104 | "\n", 105 | "X_train, X_test, y_train, y_test = train_test_split(\n", 106 | " X, y, stratify=y, random_state=0)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "tags": [] 113 | }, 114 | "source": [ 115 | "### Base models" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "#### DummyClassifier" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "from sklearn.model_selection import cross_validate\n", 132 | "from sklearn.dummy import DummyClassifier" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "dc = DummyClassifier()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "dc_reg_scores = cross_validate(dc, X_train, y_train, scoring=['roc_auc', 'average_precision', \"accuracy\"], cv=5)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "dc_reg_scores" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "dc_reg_scores[\"test_accuracy\"].mean()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "dc_reg_scores[\"test_average_precision\"].mean()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "dc_reg_scores[\"test_roc_auc\"].mean()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "from sklearn.metrics import get_scorer\n", 196 | "\n", 197 | "def compute_metrics(estimator):\n", 198 | " cv_results = cross_validate(estimator,\n", 199 | " X_train, y_train, scoring=['roc_auc', 'average_precision'],\n", 200 | " cv=5)\n", 201 | " return {\n", 202 | " \"roc_auc\": cv_results[\"test_roc_auc\"].mean(),\n", 203 | " \"avg_precision\": cv_results[\"test_average_precision\"].mean(),\n", 204 | " }" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "tags": [] 211 | }, 212 | "source": [ 213 | "#### Linear model" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "from sklearn.model_selection import cross_validate\n", 223 | "from sklearn.linear_model import LogisticRegression" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "base_log_reg = LogisticRegression()" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "base_log_reg_metrics = compute_metrics(base_log_reg)\n", 242 | "base_log_reg_metrics" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "#### Random Forest" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "from sklearn.ensemble import RandomForestClassifier" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "base_rf = RandomForestClassifier(random_state=42, n_jobs=-1)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "base_rf_metrics = compute_metrics(base_rf)\n", 277 | "base_rf_metrics" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "### Imbalance-learn sampler" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "#### Under sampler" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "np.bincount(y_train)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "from imblearn.under_sampling import RandomUnderSampler" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "under_sampler = RandomUnderSampler(random_state=42)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "X_train_subsample, y_train_subsample = under_sampler.fit_resample(X_train, y_train)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "X_train.shape" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "X_train_subsample.shape" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "np.bincount(y_train_subsample)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "#### Oversampling" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "from imblearn.over_sampling import RandomOverSampler" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "over_sampler = RandomOverSampler(random_state=42)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "X_train_subsample, y_train_subsample = over_sampler.fit_resample(X_train, y_train)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "X_train_subsample.shape" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "np.bincount(y_train_subsample)" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "## Pipelines with imblean\n", 414 | "\n", 415 | "### Linear model with under sampling" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [ 424 | "from imblearn.pipeline import make_pipeline as make_imb_pipeline" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "under_log_reg = make_imb_pipeline(\n", 434 | " RandomUnderSampler(random_state=42), LogisticRegression(random_state=42))" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "base_log_reg_metrics" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "compute_metrics(under_log_reg)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "### Random Forest with under sampling" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "under_rf = make_imb_pipeline(\n", 469 | " RandomUnderSampler(random_state=42), RandomForestClassifier(random_state=42))" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [ 478 | "base_rf_metrics" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [ 487 | "compute_metrics(under_rf)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "### Linear model with over sampling" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": null, 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [ 503 | "over_log_reg = make_imb_pipeline(\n", 504 | " RandomOverSampler(random_state=42), LogisticRegression(random_state=42))" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": {}, 511 | "outputs": [], 512 | "source": [ 513 | "base_log_reg_metrics" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "metadata": {}, 520 | "outputs": [], 521 | "source": [ 522 | "compute_metrics(over_log_reg)" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "## Exercise 1\n", 530 | "\n", 531 | "1. Use `make_imb_pipeline` with `RandomOverSampler(random_state=42)` to create a pipline with random forset called `over_rf`.\n", 532 | "1. Compute our metrics using `compute_metrics`." 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/02-ex01-solutions.py)." 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "# %load solutions/02-ex01-solutions.py" 556 | ] 557 | }, 558 | { 559 | "cell_type": "markdown", 560 | "metadata": {}, 561 | "source": [ 562 | "## Plotting curves for logistic regression" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "metadata": {}, 569 | "outputs": [], 570 | "source": [ 571 | "base_log_reg.fit(X_train, y_train)\n", 572 | "under_log_reg.fit(X_train, y_train)\n", 573 | "over_log_reg.fit(X_train, y_train);" 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "### Plotting" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": null, 586 | "metadata": {}, 587 | "outputs": [], 588 | "source": [ 589 | "from sklearn.metrics import PrecisionRecallDisplay\n", 590 | "from sklearn.metrics import RocCurveDisplay" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": null, 596 | "metadata": {}, 597 | "outputs": [], 598 | "source": [ 599 | "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 8))\n", 600 | "RocCurveDisplay.from_estimator(base_log_reg, X_test, y_test, ax=ax1, name=\"original\")\n", 601 | "RocCurveDisplay.from_estimator(under_log_reg, X_test, y_test, ax=ax1, name=\"undersampling\")\n", 602 | "RocCurveDisplay.from_estimator(over_log_reg, X_test, y_test, ax=ax1, name=\"oversampling\")\n", 603 | "\n", 604 | "PrecisionRecallDisplay.from_estimator(base_log_reg, X_test, y_test, ax=ax2, name=\"original\")\n", 605 | "PrecisionRecallDisplay.from_estimator(under_log_reg, X_test, y_test, ax=ax2, name=\"undersampling\")\n", 606 | "PrecisionRecallDisplay.from_estimator(over_log_reg, X_test, y_test, ax=ax2, name=\"oversampling\");" 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": {}, 612 | "source": [ 613 | "We create a custom plotting function for future use:" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "metadata": {}, 620 | "outputs": [], 621 | "source": [ 622 | "def plot_roc_and_precision_recall_curves(estimators):\n", 623 | " fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 8))\n", 624 | " for name, est in estimators:\n", 625 | " RocCurveDisplay.from_estimator(est, X_test, y_test, ax=ax1, name=name)\n", 626 | " PrecisionRecallDisplay.from_estimator(est, X_test, y_test, ax=ax2, name=name)" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": {}, 633 | "outputs": [], 634 | "source": [ 635 | "plot_roc_and_precision_recall_curves([(\"original\", base_log_reg),\n", 636 | " (\"undersampling\", under_log_reg),\n", 637 | " (\"oversampling\", over_log_reg)])" 638 | ] 639 | }, 640 | { 641 | "cell_type": "markdown", 642 | "metadata": {}, 643 | "source": [ 644 | "## Exercise 2\n", 645 | "\n", 646 | "1. Train the three random forest models, `base_rf`, `under_rf`, `over_rf`.\n", 647 | "1. Plot the roc and precision recall for the three random forest models." 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "metadata": {}, 660 | "source": [ 661 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/02-ex02-solutions.py)." 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": null, 667 | "metadata": {}, 668 | "outputs": [], 669 | "source": [ 670 | "# %load solutions/02-ex02-solutions.py" 671 | ] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "metadata": {}, 676 | "source": [ 677 | "#### Back to slides!" 678 | ] 679 | }, 680 | { 681 | "cell_type": "markdown", 682 | "metadata": {}, 683 | "source": [ 684 | "## Class-Weights" 685 | ] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": {}, 690 | "source": [ 691 | "#### Linear model with class weights" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "class_weight_log_reg = LogisticRegression(class_weight='balanced')\n", 701 | "\n", 702 | "class_weight_log_reg.fit(X_train, y_train)" 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": null, 708 | "metadata": {}, 709 | "outputs": [], 710 | "source": [ 711 | "plot_roc_and_precision_recall_curves([(\"original\", base_log_reg),\n", 712 | " (\"class weighted\", class_weight_log_reg)])" 713 | ] 714 | }, 715 | { 716 | "cell_type": "markdown", 717 | "metadata": {}, 718 | "source": [ 719 | "#### Random forest with class weights " 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "execution_count": null, 725 | "metadata": {}, 726 | "outputs": [], 727 | "source": [ 728 | "class_weight_rf = RandomForestClassifier(class_weight='balanced', random_state=42)\n", 729 | "class_weight_rf.fit(X_train, y_train)" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "base_rf.fit(X_train, y_train)" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": null, 744 | "metadata": {}, 745 | "outputs": [], 746 | "source": [ 747 | "plot_roc_and_precision_recall_curves([(\"original\", base_rf),\n", 748 | " (\"class weighted\", class_weight_rf)])" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "metadata": {}, 754 | "source": [ 755 | "#### Back to slides!" 756 | ] 757 | }, 758 | { 759 | "cell_type": "markdown", 760 | "metadata": {}, 761 | "source": [ 762 | "## Ensemble Resampling" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [ 771 | "from imblearn.ensemble import BalancedRandomForestClassifier\n", 772 | "from sklearn.tree import DecisionTreeClassifier" 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": null, 778 | "metadata": {}, 779 | "outputs": [], 780 | "source": [ 781 | "balanced_rf = BalancedRandomForestClassifier(random_state=0)\n", 782 | "\n", 783 | "balanced_rf.fit(X_train, y_train)" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": {}, 790 | "outputs": [], 791 | "source": [ 792 | "under_rf.fit(X_train, y_train)\n", 793 | "\n", 794 | "over_rf = make_imb_pipeline(RandomOverSampler(random_state=42), RandomForestClassifier(random_state=42))\n", 795 | "over_rf.fit(X_train, y_train)" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": null, 801 | "metadata": {}, 802 | "outputs": [], 803 | "source": [ 804 | "plot_roc_and_precision_recall_curves(\n", 805 | " [\n", 806 | " (\"original\", base_rf),\n", 807 | " (\"undersampling\", under_rf),\n", 808 | " (\"oversampling\", over_rf),\n", 809 | " (\"balanced bagging\", balanced_rf)\n", 810 | " ]\n", 811 | ")" 812 | ] 813 | }, 814 | { 815 | "cell_type": "markdown", 816 | "metadata": {}, 817 | "source": [ 818 | "#### Back to slides!" 819 | ] 820 | }, 821 | { 822 | "cell_type": "markdown", 823 | "metadata": {}, 824 | "source": [ 825 | "## SMOTE" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "execution_count": null, 831 | "metadata": {}, 832 | "outputs": [], 833 | "source": [ 834 | "from imblearn.over_sampling import SMOTE\n", 835 | "\n", 836 | "smote = SMOTE(random_state=42)\n", 837 | "X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)\n", 838 | "\n", 839 | "X_train_smote.shape" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": null, 845 | "metadata": {}, 846 | "outputs": [], 847 | "source": [ 848 | "np.bincount(y_train_smote)" 849 | ] 850 | }, 851 | { 852 | "cell_type": "code", 853 | "execution_count": null, 854 | "metadata": {}, 855 | "outputs": [], 856 | "source": [ 857 | "fig, axes = plt.subplots(1, 2, figsize=(16, 8))\n", 858 | "sorting = np.argsort(y_train)\n", 859 | "\n", 860 | "axes[0].set_title(\"Original\")\n", 861 | "axes[0].scatter(X_train.iloc[sorting, 3], X_train.iloc[sorting, 4], c=plt.cm.tab10(y_train.iloc[sorting]), alpha=.3, s=2)\n", 862 | "\n", 863 | "axes[1].set_title(\"SMOTE\")\n", 864 | "axes[1].scatter(X_train_smote.iloc[:, 3], X_train_smote.iloc[:, 4], c=plt.cm.tab10(y_train_smote), alpha=1, s=2)" 865 | ] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "execution_count": null, 870 | "metadata": {}, 871 | "outputs": [], 872 | "source": [ 873 | "base_log_reg_metrics" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": null, 879 | "metadata": {}, 880 | "outputs": [], 881 | "source": [ 882 | "smote_log_reg = make_imb_pipeline(\n", 883 | " SMOTE(random_state=42), LogisticRegression())\n", 884 | "compute_metrics(smote_log_reg)" 885 | ] 886 | }, 887 | { 888 | "cell_type": "code", 889 | "execution_count": null, 890 | "metadata": {}, 891 | "outputs": [], 892 | "source": [ 893 | "base_rf_metrics" 894 | ] 895 | }, 896 | { 897 | "cell_type": "code", 898 | "execution_count": null, 899 | "metadata": {}, 900 | "outputs": [], 901 | "source": [ 902 | "smote_rf = make_imb_pipeline(SMOTE(random_state=42), RandomForestClassifier(random_state=42, n_jobs=-1))\n", 903 | "compute_metrics(smote_rf)" 904 | ] 905 | }, 906 | { 907 | "cell_type": "markdown", 908 | "metadata": {}, 909 | "source": [ 910 | "## Plotting all the version of random forest" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": null, 916 | "metadata": {}, 917 | "outputs": [], 918 | "source": [ 919 | "smote_rf.fit(X_train, y_train)" 920 | ] 921 | }, 922 | { 923 | "cell_type": "code", 924 | "execution_count": null, 925 | "metadata": {}, 926 | "outputs": [], 927 | "source": [ 928 | "plot_roc_and_precision_recall_curves(\n", 929 | " [\n", 930 | " (\"original\", base_rf),\n", 931 | " (\"smote\", smote_rf),\n", 932 | " ]\n", 933 | ")" 934 | ] 935 | }, 936 | { 937 | "cell_type": "markdown", 938 | "metadata": {}, 939 | "source": [ 940 | "## Exercise 3\n", 941 | "\n", 942 | "1. Train a `HistGradientBoostingClassifer` on the training set.\n", 943 | "2. Construct a pipline with `SMOTE` and `HistGradientBoostingClassifer` fit it on the training set.\n", 944 | "3. Plot the ROC and PR curves between the two models." 945 | ] 946 | }, 947 | { 948 | "cell_type": "code", 949 | "execution_count": null, 950 | "metadata": {}, 951 | "outputs": [], 952 | "source": [ 953 | "from sklearn.ensemble import HistGradientBoostingClassifier" 954 | ] 955 | }, 956 | { 957 | "cell_type": "code", 958 | "execution_count": null, 959 | "metadata": {}, 960 | "outputs": [], 961 | "source": [] 962 | }, 963 | { 964 | "cell_type": "markdown", 965 | "metadata": {}, 966 | "source": [ 967 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/02-ex03-solutions.py)." 968 | ] 969 | }, 970 | { 971 | "cell_type": "code", 972 | "execution_count": null, 973 | "metadata": {}, 974 | "outputs": [], 975 | "source": [ 976 | "# %load solutions/02-ex03-solutions.py" 977 | ] 978 | } 979 | ], 980 | "metadata": { 981 | "kernelspec": { 982 | "display_name": "Python 3 (ipykernel)", 983 | "language": "python", 984 | "name": "python3" 985 | }, 986 | "language_info": { 987 | "codemirror_mode": { 988 | "name": "ipython", 989 | "version": 3 990 | }, 991 | "file_extension": ".py", 992 | "mimetype": "text/x-python", 993 | "name": "python", 994 | "nbconvert_exporter": "python", 995 | "pygments_lexer": "ipython3", 996 | "version": "3.9.7" 997 | } 998 | }, 999 | "nbformat": 4, 1000 | "nbformat_minor": 4 1001 | } 1002 | -------------------------------------------------------------------------------- /notebooks/03-poisson-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "# Poisson regression\n", 10 | "\n", 11 | "\"Open" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# Install dependencies for google colab\n", 21 | "import sys\n", 22 | "IN_COLAB = 'google.colab' in sys.modules\n", 23 | "if IN_COLAB:\n", 24 | " %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/requirements.txt" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import sklearn\n", 34 | "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "import numpy as np\n", 44 | "import pandas as pd\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "import seaborn as sns\n", 47 | "\n", 48 | "sns.set_theme(font_scale=1.5, rc={'figure.figsize': [12, 8]})\n", 49 | "sklearn.set_config(display='diagram')" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Load London Bike Data" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "from pathlib import Path\n", 66 | "\n", 67 | "data_path = Path(\"data\")\n", 68 | "bikes_path = data_path / \"london_bikes.csv\"" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "- \"timestamp\" - timestamp field for grouping the data \n", 76 | "- \"cnt\" - the count of a new bike shares \n", 77 | "- \"t1\" - real temperature in C \n", 78 | "- \"t2\" - temperature in C \"feels like\" \n", 79 | "- \"hum\" - humidity in percentage \n", 80 | "- \"windspeed\" - wind speed in km/h \n", 81 | "- \"weathercode\" - category of the weather \n", 82 | "- \"isholiday\" - boolean field - 1 holiday / 0 non holiday \n", 83 | "- \"isweekend\" - boolean field - 1 if the day is weekend \n", 84 | "- \"season\" - category field meteorological seasons: 0-spring ; 1-summer; 2-fall; 3-winter." 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "if IN_COLAB:\n", 94 | " BIKES_URL = \"https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/notebooks/data/london_bikes.csv\"\n", 95 | " bikes = pd.read_csv(BIKES_URL, parse_dates=['timestamp'])\n", 96 | "else:\n", 97 | " bikes = pd.read_csv(bikes_path, parse_dates=['timestamp'])" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "bikes.head()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "bikes['timestamp'].head()" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "bikes['hr'] = bikes['timestamp'].dt.hour" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "bikes['weather_code'].unique()" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "bikes['season'].unique()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "X = bikes[['t1', 't2', 'hum', 'wind_speed', 'weather_code', 'is_holiday', 'is_weekend', 'season', 'hr']]\n", 152 | "y = bikes['cnt']" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Split data" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "from sklearn.model_selection import train_test_split\n", 169 | "\n", 170 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "### Column Transformer" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "from sklearn.compose import ColumnTransformer\n", 187 | "from sklearn.preprocessing import OneHotEncoder\n", 188 | "from sklearn.preprocessing import StandardScaler\n", 189 | "\n", 190 | "numerical_features = ['t1', 't2', 'hum', 'wind_speed']\n", 191 | "cat_features = ['weather_code', 'season', 'hr', 'is_holiday', 'is_weekend']\n", 192 | "\n", 193 | "ct = ColumnTransformer([\n", 194 | " ('numerical', StandardScaler(), numerical_features),\n", 195 | " ('categorical', OneHotEncoder(sparse=False, handle_unknown='ignore', drop='if_binary'), cat_features)\n", 196 | "])" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### Pipeline" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "from sklearn.linear_model import PoissonRegressor\n", 213 | "from sklearn.pipeline import Pipeline\n", 214 | "from sklearn.preprocessing import StandardScaler\n", 215 | "\n", 216 | "pois_reg = Pipeline([\n", 217 | " ('prep', ct),\n", 218 | " ('reg', PoissonRegressor(alpha=1.0, max_iter=300))\n", 219 | "])\n", 220 | "\n", 221 | "pois_reg" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "pois_reg.fit(X_train, y_train);" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "pois_reg.score(X_test, y_test)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "### What about ridge?" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "from sklearn.linear_model import Ridge\n", 256 | "\n", 257 | "ct = ColumnTransformer([\n", 258 | " ('numerical', StandardScaler(), numerical_features),\n", 259 | " ('categorical', OneHotEncoder(sparse=False, handle_unknown='ignore'), cat_features)\n", 260 | " \n", 261 | "])\n", 262 | "\n", 263 | "ridge = Pipeline([\n", 264 | " ('prep', ct),\n", 265 | " ('reg', Ridge(random_state=42))\n", 266 | "])\n", 267 | "\n", 268 | "ridge.fit(X_train, y_train)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "ridge.score(X_test, y_test)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "from sklearn.metrics import mean_squared_error\n", 287 | "from sklearn.metrics import mean_poisson_deviance\n", 288 | "\n", 289 | "def compute_metrics(y_true, y_pred, sample_weight=None):\n", 290 | " \n", 291 | " mask = y_pred > 0\n", 292 | " if (~mask).any():\n", 293 | " n_masked, n_samples = (~mask).sum(), mask.shape[0]\n", 294 | " print(f\"WARNING: Estimator yields invalid, non-positive predictions \"\n", 295 | " f\" for {n_masked} samples out of {n_samples}. These predictions \"\n", 296 | " f\"are ignored when computing the Poisson deviance.\")\n", 297 | " \n", 298 | " y_true = y_true[mask]\n", 299 | " y_pred = y_pred[mask]\n", 300 | " if sample_weight is not None:\n", 301 | " sample_weight = sample_weight[mask]\n", 302 | " \n", 303 | " return {\n", 304 | " 'mse': mean_squared_error(y_true, y_pred, sample_weight=sample_weight),\n", 305 | " 'mean poisson deviance': mean_poisson_deviance(y_true, y_pred, sample_weight=sample_weight)\n", 306 | " }" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "### Compute metrics" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "ridge_pred = ridge.predict(X_test)\n", 323 | "compute_metrics(y_test, ridge_pred)" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "poisson_pred = pois_reg.predict(X_test)\n", 333 | "compute_metrics(y_test, poisson_pred)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## Plotting the prediction distrubutions" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6), sharey=True)\n", 350 | "ax1.hist(y_test, bins=30, alpha=0.5)\n", 351 | "ax1.set_title(\"Test data\")\n", 352 | "ax2.hist(poisson_pred, bins=30, alpha=0.5)\n", 353 | "ax2.set_title(\"Poisson predictions\")\n", 354 | "ax3.hist(ridge_pred, bins=30, alpha=0.5)\n", 355 | "ax3.set_title(\"Ridge predictions\")" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "## Exercise 1\n", 363 | "\n", 364 | "1. Train a `HistGradientBoostingRegressor` with `random_state=42` on the training set.\n", 365 | "1. Compute the predictions on the test set and save it as `hist_pred`.\n", 366 | "1. Compute the metrics for the predicitons on the model using `compute_metrics`.\n", 367 | "1. Train a `HistGradientBoostingRegressor` with `loss='poisson'` and `random_state=42` on the training set.\n", 368 | "1. Compute the predictions from this estimator and save it as `hist_poisson_pred`.\n", 369 | "1. Compute the metrics for the predicitons on the model using `compute_metrics`.\n", 370 | "1. **Extra:** Plot the prediction distrubutions for the two models and the original data.\n", 371 | " - **Hint** You may copy the code right above this cell." 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "from sklearn.ensemble import HistGradientBoostingRegressor" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/03-ex01-solutions.py)." 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "# %load solutions/03-ex01-solutions.py" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "### Back to slides!" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "## Calibration Curve for Regressors" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "from sklearn.utils import gen_even_slices\n", 427 | "\n", 428 | "def _calibration_curve_weighted(y_true, y_pred, n_bins=10, sample_weight=None):\n", 429 | " y_true = np.asarray(y_true)\n", 430 | " y_pred = np.asarray(y_pred)\n", 431 | " \n", 432 | " idx_sort = np.argsort(y_pred)\n", 433 | " y_pred_bin = np.zeros(n_bins)\n", 434 | " y_true_bin = np.zeros(n_bins)\n", 435 | " \n", 436 | " if sample_weight is not None:\n", 437 | " sample_weight = np.asarray(sample_weight)\n", 438 | " \n", 439 | " for i, sl in enumerate(gen_even_slices(len(y_true), n_bins)):\n", 440 | " if sample_weight is None:\n", 441 | " y_pred_bin[i] = np.average(y_pred[idx_sort][sl])\n", 442 | " y_true_bin[i] = np.average(y_true[idx_sort][sl])\n", 443 | " else:\n", 444 | " weights = sample_weight[idx_sort][sl]\n", 445 | " y_pred_bin[i] = np.average(y_pred[idx_sort][sl], weights=weights)\n", 446 | " y_true_bin[i] = np.average(y_true[idx_sort][sl], weights=weights)\n", 447 | " return y_pred_bin, y_true_bin\n", 448 | "\n", 449 | "def plot_calibration_curve_regression(y_true, y_pred, n_bins=10, ax=None, title=\"\", sample_weight=None):\n", 450 | " if ax is None:\n", 451 | " fig, ax = plt.subplots(figsize=(8, 8))\n", 452 | " y_pred_bin, y_true_bin = _calibration_curve_weighted(y_test, y_pred, sample_weight=sample_weight)\n", 453 | " \n", 454 | " bin_centers = np.arange(1, len(y_pred_bin) + 1)\n", 455 | " ax.plot(bin_centers, y_pred_bin, marker='x', linestyle=\"--\", label=\"predictions\")\n", 456 | " ax.plot(bin_centers, y_true_bin, marker='o', linestyle=\"--\", label=\"observations\")\n", 457 | " ax.set(xlabel=\"Bin number\", xticks=bin_centers, title=title)\n", 458 | " ax.legend()\n", 459 | " return ax" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "Train a `hist_poisson` to compare calibration curves" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "from sklearn.ensemble import HistGradientBoostingRegressor\n", 476 | "\n", 477 | "hist_poisson = HistGradientBoostingRegressor(loss='poisson', random_state=42)\n", 478 | "hist_poisson.fit(X_train, y_train)\n", 479 | "\n", 480 | "hist_poisson_pred = hist_poisson.predict(X_test)" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": null, 486 | "metadata": {}, 487 | "outputs": [], 488 | "source": [ 489 | "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 8))\n", 490 | "plot_calibration_curve_regression(y_test, ridge_pred, ax=ax1, title=\"Ridge\")\n", 491 | "plot_calibration_curve_regression(y_test, poisson_pred, ax=ax2, title=\"Poisson Regression\")\n", 492 | "plot_calibration_curve_regression(y_test, hist_poisson_pred, ax=ax3, title=\"Hist Poisson\");" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "## Claims dataset" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "claims_path = data_path / \"claims.csv\"\n", 509 | "if IN_COLAB:\n", 510 | " CLAIMS_URL = \"https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/master/notebooks/data/claims.csv\"\n", 511 | " claims = pd.read_csv(CLAIMS_URL)\n", 512 | "else:\n", 513 | " claims = pd.read_csv(claims_path)" 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": {}, 519 | "source": [ 520 | "- ClaimPerYear: number of claims on the given policy;\n", 521 | "- Exposure: total exposure in yearly units;\n", 522 | "- Area: area code (categorical, ordinal);\n", 523 | "- VehPower: power of the car (categorical, ordinal);\n", 524 | "- VehAge: age of the car in years;\n", 525 | "- DrivAge: age of the (most common) driver in years;\n", 526 | "- BonusMalus: bonus-malus level between 50 and 230 (with reference level 100);\n", 527 | "- VehBrand: car brand (categorical, nominal);\n", 528 | "- VehGas: diesel or regular fuel car (binary);\n", 529 | "- Density: density of inhabitants per km2 in the city of the living place of the driver;\n", 530 | "- Region: regions in France (prior to 2016)" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [ 539 | "claims.head()" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [ 548 | "exposure = claims['Exposure']" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": null, 554 | "metadata": {}, 555 | "outputs": [], 556 | "source": [ 557 | "y = claims[\"ClaimsPerYear\"]" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [ 566 | "X = claims.drop([\"Exposure\", \"ClaimsPerYear\"], axis=\"columns\")" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": {}, 573 | "outputs": [], 574 | "source": [ 575 | "X.head()" 576 | ] 577 | }, 578 | { 579 | "cell_type": "markdown", 580 | "metadata": {}, 581 | "source": [ 582 | "### Split data" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [ 591 | "X_train, X_test, y_train, y_test, exposure_train, exposure_test = train_test_split(\n", 592 | " X, y, exposure, random_state=42)" 593 | ] 594 | }, 595 | { 596 | "cell_type": "markdown", 597 | "metadata": {}, 598 | "source": [ 599 | "### Train simple dummy regresor" 600 | ] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "execution_count": null, 605 | "metadata": {}, 606 | "outputs": [], 607 | "source": [ 608 | "from sklearn.dummy import DummyRegressor\n", 609 | "\n", 610 | "dummy = DummyRegressor()\n", 611 | "dummy.fit(X_train, y_train, sample_weight=exposure_train)" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": null, 617 | "metadata": {}, 618 | "outputs": [], 619 | "source": [ 620 | "dummy_pred = dummy.predict(X_test)\n", 621 | "compute_metrics(y_test, dummy_pred, sample_weight=exposure_test)" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": null, 627 | "metadata": {}, 628 | "outputs": [], 629 | "source": [ 630 | "dummy_pred[:10]" 631 | ] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": {}, 636 | "source": [ 637 | "## Ridge" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": null, 643 | "metadata": {}, 644 | "outputs": [], 645 | "source": [ 646 | "X['Density'].hist(bins=25);" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": null, 652 | "metadata": {}, 653 | "outputs": [], 654 | "source": [ 655 | "from sklearn.pipeline import make_pipeline\n", 656 | "from sklearn.preprocessing import FunctionTransformer, OneHotEncoder\n", 657 | "from sklearn.preprocessing import KBinsDiscretizer\n", 658 | "from sklearn.compose import ColumnTransformer\n", 659 | "\n", 660 | "linear_model_preprocessor = ColumnTransformer(\n", 661 | " [\n", 662 | " (\"passthrough_numeric\", StandardScaler(),\n", 663 | " [\"BonusMalus\"]),\n", 664 | " (\"binned_numeric\", KBinsDiscretizer(n_bins=10),\n", 665 | " [\"VehAge\", \"DrivAge\"]),\n", 666 | " (\"log_scaled_numeric\", FunctionTransformer(np.log),\n", 667 | " [\"Density\"]),\n", 668 | " (\"onehot_categorical\", OneHotEncoder(handle_unknown='ignore'),\n", 669 | " [\"VehBrand\", \"VehPower\", \"VehGas\", \"Region\", \"Area\"]),\n", 670 | " ],\n", 671 | ")" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": null, 677 | "metadata": {}, 678 | "outputs": [], 679 | "source": [ 680 | "linear_model_preprocessor.fit_transform(X_train)" 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": null, 686 | "metadata": {}, 687 | "outputs": [], 688 | "source": [ 689 | "from sklearn.linear_model import Ridge\n", 690 | "from sklearn.preprocessing import MaxAbsScaler\n", 691 | "\n", 692 | "ridge = Pipeline([\n", 693 | " (\"preprocessor\", linear_model_preprocessor),\n", 694 | " (\"reg\", Ridge(alpha=1e-6))])\n", 695 | "ridge.fit(X_train, y_train, reg__sample_weight=exposure_train)" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": null, 701 | "metadata": {}, 702 | "outputs": [], 703 | "source": [ 704 | "ridge_pred = ridge.predict(X_test)\n", 705 | "ridge_metrics = compute_metrics(y_test, ridge_pred, sample_weight=exposure_test)\n", 706 | "ridge_metrics" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": null, 712 | "metadata": {}, 713 | "outputs": [], 714 | "source": [ 715 | "plot_calibration_curve_regression(y_test, ridge_pred, title=\"Ridge\", sample_weight=exposure_test);" 716 | ] 717 | }, 718 | { 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "## Exercise 2\n", 723 | "\n", 724 | "1. Construct a pipeline with `PoissonRegressor(alpha=1e-4)` with the same preprocesser we have above.\n", 725 | " - **Hint**: You may reuse `linear_model_preprocessor` and `MaxAbsScaler`\n", 726 | "2. Training the pipeline on the training set. **Hint**: Remember to set the the sample weight!\n", 727 | "3. Plot the calibration curve using `plot_calibration_curve_regression`. **Hint**: Remember to include the sample weights!" 728 | ] 729 | }, 730 | { 731 | "cell_type": "code", 732 | "execution_count": null, 733 | "metadata": {}, 734 | "outputs": [], 735 | "source": [] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "metadata": {}, 740 | "source": [ 741 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/03-ex02-solutions.py)." 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": null, 747 | "metadata": {}, 748 | "outputs": [], 749 | "source": [ 750 | "# %load solutions/03-ex02-solutions.py" 751 | ] 752 | }, 753 | { 754 | "cell_type": "markdown", 755 | "metadata": { 756 | "tags": [] 757 | }, 758 | "source": [ 759 | "## Exercise 3\n", 760 | "\n", 761 | "1. Build a preprocessor for a tree based model.\n", 762 | " - **Hint**: Use `ColumnTransformer`, encode categories with `OrdinalEncoder` and passthrough the numerical features.\n", 763 | " - **Extra**: You can use `make_column_selector` to select the correct dtypes.\n", 764 | "2. Use the preprocessor from step 1 to build a pipeline with `HistGradientBoostingRegressor` with `loss=\"poisson\"`.\n", 765 | "3. Fit the model from step 2 while also setting `sample_weight` to `exposure_train`.\n", 766 | "4. Use `compute_metrics` to compute the mse and the mean poisson deviance.\n", 767 | " - **Hint** Rememver to incldue the sample weight!\n", 768 | "5. Plot the calibration curve using `plot_calibration_curve_regression`.\n", 769 | " - **Hint** remember to include the sample weights!" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": null, 775 | "metadata": {}, 776 | "outputs": [], 777 | "source": [ 778 | "from sklearn.preprocessing import OrdinalEncoder\n", 779 | "from sklearn.compose import make_column_selector" 780 | ] 781 | }, 782 | { 783 | "cell_type": "code", 784 | "execution_count": null, 785 | "metadata": {}, 786 | "outputs": [], 787 | "source": [] 788 | }, 789 | { 790 | "cell_type": "markdown", 791 | "metadata": {}, 792 | "source": [ 793 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan//ml-workshop-advanced/blob/master/notebooks/solutions/03-ex03-solutions.py)." 794 | ] 795 | }, 796 | { 797 | "cell_type": "code", 798 | "execution_count": null, 799 | "metadata": {}, 800 | "outputs": [], 801 | "source": [ 802 | "# %load solutions/03-ex03-solutions.py" 803 | ] 804 | } 805 | ], 806 | "metadata": { 807 | "kernelspec": { 808 | "display_name": "Python 3 (ipykernel)", 809 | "language": "python", 810 | "name": "python3" 811 | }, 812 | "language_info": { 813 | "codemirror_mode": { 814 | "name": "ipython", 815 | "version": 3 816 | }, 817 | "file_extension": ".py", 818 | "mimetype": "text/x-python", 819 | "name": "python", 820 | "nbconvert_exporter": "python", 821 | "pygments_lexer": "ipython3", 822 | "version": "3.9.7" 823 | } 824 | }, 825 | "nbformat": 4, 826 | "nbformat_minor": 4 827 | } 828 | -------------------------------------------------------------------------------- /notebooks/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/data/.gitkeep -------------------------------------------------------------------------------- /notebooks/data/review_polarity.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/data/review_polarity.tar.gz -------------------------------------------------------------------------------- /notebooks/images/smote_generated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-advanced/112193babf7a2656db745cc8bbb4fb18cfc17361/notebooks/images/smote_generated.png -------------------------------------------------------------------------------- /notebooks/solutions/01-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import RandomForestClassifier 2 | 3 | rfc = RandomForestClassifier(random_state=42, max_depth=3) 4 | 5 | rfc.fit(X_train, y_train) 6 | 7 | rfc.score(X_test, y_test) 8 | 9 | rfc_feature_importances = rfc.feature_importances_ 10 | 11 | rf_top_20 = rfc_feature_importances.argsort()[-20:] 12 | 13 | feature_names[rf_top_20] 14 | -------------------------------------------------------------------------------- /notebooks/solutions/01-ex02-solutions.py: -------------------------------------------------------------------------------- 1 | rfc_pipe = Pipeline([ 2 | ("vectorizer", CountVectorizer(min_df=2, stop_words='english')), 3 | ("rf", RandomForestClassifier(random_state=42, max_depth=3)) 4 | ]) 5 | 6 | rfc_pipe.fit(text_train, y_train) 7 | 8 | rfc_pipe.score(text_test, y_test) 9 | -------------------------------------------------------------------------------- /notebooks/solutions/01-ex03-solutions.py: -------------------------------------------------------------------------------- 1 | log_tfid = Pipeline([ 2 | ("vectorizer", TfidfVectorizer(stop_words='english')), 3 | ("log_reg", LogisticRegression(solver='liblinear')) 4 | ]) 5 | 6 | len(text_train) 7 | 8 | len(text_test) 9 | 10 | log_tfid.fit(text_train, y_train) 11 | 12 | log_tfid.score(text_test, y_test) 13 | 14 | feature_names = log_tfid["vectorizer"].get_feature_names_out() 15 | log_reg_coefs = log_tfid["log_reg"].coef_ 16 | 17 | plot_important_features(log_reg_coefs, feature_names) 18 | -------------------------------------------------------------------------------- /notebooks/solutions/02-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | over_rf = make_imb_pipeline(RandomOverSampler(random_state=0), RandomForestClassifier(random_state=42)) 2 | 3 | base_rf_metrics 4 | 5 | compute_metrics(over_rf) 6 | -------------------------------------------------------------------------------- /notebooks/solutions/02-ex02-solutions.py: -------------------------------------------------------------------------------- 1 | base_rf.fit(X_train, y_train) 2 | under_rf.fit(X_train, y_train) 3 | over_rf.fit(X_train, y_train) 4 | 5 | plot_roc_and_precision_recall_curves([ 6 | ("original", base_rf), 7 | ("undersampling", under_rf), 8 | ("oversampling", over_rf), 9 | ]) -------------------------------------------------------------------------------- /notebooks/solutions/02-ex03-solutions.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import HistGradientBoostingClassifier 2 | 3 | base_hist = HistGradientBoostingClassifier(random_state=42) 4 | base_hist.fit(X_train, y_train) 5 | 6 | smote_hist = make_imb_pipeline( 7 | SMOTE(random_state=42), HistGradientBoostingClassifier(random_state=42)) 8 | smote_hist.fit(X_train, y_train) 9 | 10 | 11 | plot_roc_and_precision_recall_curves( 12 | [ 13 | ("original", base_hist), 14 | ("smote", smote_hist), 15 | ] 16 | ) -------------------------------------------------------------------------------- /notebooks/solutions/03-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import HistGradientBoostingRegressor 2 | 3 | hist = HistGradientBoostingRegressor(random_state=42) 4 | 5 | hist.fit(X_train, y_train) 6 | 7 | hist_pred = hist.predict(X_test) 8 | 9 | compute_metrics(y_test, hist_pred) 10 | 11 | hist_poisson = HistGradientBoostingRegressor(loss='poisson', random_state=42) 12 | hist_poisson.fit(X_train, y_train) 13 | 14 | hist_poisson_pred = hist_poisson.predict(X_test) 15 | 16 | compute_metrics(y_test, hist_poisson_pred) 17 | 18 | fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6), sharey=True) 19 | ax1.hist(y_test, bins=30, alpha=0.5) 20 | ax1.set_title("Test data") 21 | ax2.hist(hist_pred, bins=30, alpha=0.5) 22 | ax2.set_title("Hist predictions") 23 | ax3.hist(hist_poisson_pred, bins=30, alpha=0.5) 24 | ax3.set_title("Hist Poisson predictions") 25 | -------------------------------------------------------------------------------- /notebooks/solutions/03-ex02-solutions.py: -------------------------------------------------------------------------------- 1 | poisson_reg = Pipeline([ 2 | ("preprocessor", linear_model_preprocessor), 3 | ("reg", PoissonRegressor(alpha=1e-4)), 4 | ]) 5 | 6 | poisson_reg.fit(X_train, y_train, reg__sample_weight=exposure_train) 7 | 8 | poisson_pred = poisson_reg.predict(X_test) 9 | compute_metrics(y_test, poisson_pred, sample_weight=exposure_test) 10 | 11 | fig, ax = plt.subplots(figsize=(8, 8)) 12 | plot_calibration_curve_regression(y_test, poisson_pred, ax=ax, title="Poisson", sample_weight=exposure_test); 13 | -------------------------------------------------------------------------------- /notebooks/solutions/03-ex03-solutions.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import OrdinalEncoder 2 | from sklearn.compose import make_column_selector 3 | 4 | tree_preprocessor = ColumnTransformer([ 5 | ("categorical", 6 | OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), 7 | make_column_selector(dtype_include='object') 8 | ), 9 | ("numerical", "passthrough", make_column_selector(dtype_include='number')) 10 | ]) 11 | 12 | hist_poisson = Pipeline([ 13 | ("preprocessor", tree_preprocessor), 14 | ("hist", HistGradientBoostingRegressor(loss="poisson", random_state=42)) 15 | ]) 16 | 17 | hist_poisson.fit(X_train, y_train, hist__sample_weight=exposure_train) 18 | 19 | hist_poisson_pred = hist_poisson.predict(X_test) 20 | 21 | compute_metrics(y_test, hist_poisson_pred, sample_weight=exposure_test) 22 | 23 | plot_calibration_curve_regression(y_test, hist_poisson_pred, sample_weight=exposure_test, title="Hist Poisson") 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==1.0.* 2 | seaborn==0.11.2 3 | pandas 4 | imbalanced-learn==0.8.* 5 | -------------------------------------------------------------------------------- /slides.md: -------------------------------------------------------------------------------- 1 | title: Advanced Machine Learning with scikit-learn: Text Data, Imbalanced Data, and Poisson Regression 2 | use_katex: True 3 | class: title-slide 4 | 5 | # Advanced Machine Learning with scikit-learn 6 | ## Text Data, Imbalanced Data, and Poisson Regression 7 | 8 | ![](images/scikit-learn-logo-notext.png) 9 | 10 | .larger[Thomas J. Fan]
11 | @thomasjpfan
12 | 13 | 14 | 15 | This workshop on Github: github.com/thomasjpfan/ml-workshop-advanced 16 | 17 | --- 18 | 19 | name: table-of-contents 20 | class: title-slide, left 21 | 22 | # Table of Contents 23 | 24 | .g[ 25 | .g-6[ 26 | 1. [Text Data](#text) 27 | 1. [Imbalanced Data](#imbalanced) 28 | 1. [Poisson Regression](#poisson) 29 | ] 30 | .g-6.g-center[ 31 | ![](images/scikit-learn-logo-notext.png) 32 | ] 33 | ] 34 | 35 | --- 36 | 37 | name: text 38 | class: chapter-slide 39 | 40 | # 1. Text Data 41 | 42 | .footnote-back[ 43 | [Back to Table of Contents](#table-of-contents) 44 | ] 45 | 46 | --- 47 | 48 | # Types of text data 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 |
fullNamecountrypoliticalGroupnationalPoliticalGroup
0Magdalena ADAMOWICZPolandGroup of the European People's Party (Christian Democrats)Independent
1Asim ADEMOVBulgariaGroup of the European People's Party (Christian Democrats)Citizens for European Development of Bulgaria
2Isabella ADINOLFIItalyNon-attached MembersMovimento 5 Stelle
3Matteo ADINOLFIItalyIdentity and Democracy GroupLega
4Alex AGIUS SALIBAMaltaGroup of the Progressive Alliance of Socialists and Democrats in the European ParliamentPartit Laburista
98 | 99 | 100 | --- 101 | 102 | # Text data we are considering 103 | 104 | I've just had the evidence that confirmed my suspicions. A bunch of kids, 14 to 22 put on the DVD of "Titanic" on a fantastic state of the art mega screen home entertainment type deal. Only two of them had actually seen it before. But they all had seen the moment of Kate, Leo and Celine Dion so many times that most of them felt they had seen the whole movie. Shortly after the epic started, they started to get restless, some of them left asking the others 105 | 106 | This independent, B&W, DV feature consistently shocks, amazes and amuses with it's ability to create the most insane situations and then find humor and interest in them. It's all hilarious and ridiculous stuff, yet as absurd as much of the film should be, there is a heart and a reality here that keeps the film grounded, keeps the entire piece from drifting into complete craziness and therein lies the real message here. 107 | 108 | --- 109 | 110 | # Bag of words 111 | 112 | .center[ 113 | ![:scale 90%](images/countvectorizer.svg) 114 | ] 115 | 116 | --- 117 | 118 | # Text processing in scikit-learn 119 | 120 | ```py 121 | from sklearn.feature_extraction.text import CountVectorizer 122 | 123 | sample_text = ["Can we go to the mountain tomorrow?", 124 | "The mountain is really tall"] 125 | 126 | vect = CountVectorizer() 127 | vect.fit(sample_text) 128 | 129 | vect.get_feature_names() 130 | # ['be', 'can', 'careful', 'finished', 'go', 'hill', 'homework', 'is', 'my', 131 | # 'please', 'tall', 'the', 'to', 'very', 'we'] 132 | 133 | X = vect.transform(sample_text) 134 | X.toarray() 135 | # array([[0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1], 136 | # [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0]]) 137 | ``` 138 | 139 | --- 140 | 141 | class: chapter-slide 142 | 143 | # Notebook 📓! 144 | ## notebooks/01-text-data.ipynb 145 | 146 | --- 147 | 148 | # N-grams 149 | 150 | - Tries to keep words together 151 | - "really tall" and "not tall" has different contexts 152 | 153 | ![:scale 90%](images/countvectorizer-ngrams.svg) 154 | 155 | --- 156 | 157 | class: chapter-slide 158 | 159 | # Notebook 📓! 160 | ## notebooks/01-text-data.ipynb 161 | 162 | --- 163 | 164 | # Tf-idf rescaling 165 | 166 | $$ 167 | \text{tf-idf}(t, d) = tf(t, d) \cdot \text{idf}(t) 168 | $$ 169 | $$ 170 | \text{idf}(t) = \text{log}\frac{1 + n_d}{1 + \text{df}(d, t)} + 1 171 | $$ 172 | 173 | - $\text{tf}(t, d)$ = The count of term $t$ in document $d$. 174 | - $n_d$ = total number of documents 175 | - $\text{df}(d, t)$ = number of documents containing term $t$ 176 | 177 | - scikit-learn divides each row by its length (L2 normalization) 178 | 179 | ```py 180 | from sklearn.feature_extraction.text import TfidfVectorizer 181 | ``` 182 | 183 | --- 184 | 185 | class: chapter-slide 186 | 187 | # Notebook 📓! 188 | ## notebooks/01-text-data.ipynb 189 | 190 | --- 191 | 192 | name: imbalanced 193 | class: chapter-slide 194 | 195 | # 2. Imbalanced Data 196 | 197 | .footnote-back[ 198 | [Back to Table of Contents](#table-of-contents) 199 | ] 200 | 201 | --- 202 | 203 | class: middle 204 | 205 | # What is imbalanced data? 206 | 207 | - Cost are different between classes 208 | - Data is imbalanced 209 | - Some datasets have very few positive classes 210 | 211 | --- 212 | 213 | # Different Cost between classes 214 | 215 | .g[ 216 | .g-8[ 217 | .smaller-x[ 218 | ```py 219 | y_pred = log_reg.predict(X_test) 220 | print(classification_report(y_test, y_pred)) 221 | ``` 222 | 223 | ``` 224 | precision recall f1-score support 225 | 226 | False 0.99 1.00 0.99 2731 227 | True 0.75 0.37 0.49 65 228 | 229 | accuracy 0.98 2796 230 | macro avg 0.87 0.68 0.74 2796 231 | weighted avg 0.98 0.98 0.98 2796 232 | ``` 233 | 234 | ```py 235 | y_pred_20 = log_reg.predict_proba(X_test)[:, 1] > 0.25 236 | print(classification_report(y_test, y_pred_20)) 237 | ``` 238 | 239 | ``` 240 | precision recall f1-score support 241 | 242 | False 0.99 0.99 0.99 2731 243 | True 0.63 0.55 0.59 65 244 | 245 | accuracy 0.98 2796 246 | macro avg 0.81 0.77 0.79 2796 247 | weighted avg 0.98 0.98 0.98 2796 248 | ``` 249 | ] 250 | ] 251 | .g-4[ 252 | $$ 253 | \text{precision} = \frac{TP}{TP + FP} 254 | $$ 255 | 256 | $$ 257 | \text{recall} = \frac{TP}{TP + FN} 258 | $$ 259 | ] 260 | ] 261 | 262 | --- 263 | 264 | class: chapter-slide 265 | 266 | # Notebook 📕! 267 | ## notebooks/02-imbalanced-data.ipynb 268 | 269 | --- 270 | 271 | # Resampling 272 | 273 | ![:scale 60%](images/resampling_approches.png) 274 | 275 | --- 276 | 277 | # scikit-learn Pipelines 278 | ### Unable to handle sampling 279 | 280 | ![](images/pipeline.svg) 281 | 282 | --- 283 | 284 | class: middle 285 | 286 | # Imbalanced-learn 287 | 288 | [https://imbalanced-learn.org/stable/](https://imbalanced-learn.org/stable/) 289 | 290 | - Extends scikit-learn API 291 | - Defines samplers 292 | 293 | --- 294 | 295 | # Defines pipeline to handle sampling 296 | 297 | ```py 298 | from imblearn.pipeline import make_pipeline as make_imb_pipeline 299 | 300 | from imblearn.under_sampling import RandomUnderSampler 301 | from imblearn.over_sampling import RandomOverSampler 302 | ``` 303 | 304 | Fitting and sampling done in one line: 305 | 306 | ```py 307 | data_resampled, targets_resampled = obj.fit_resample(data, targets) 308 | ``` 309 | 310 | --- 311 | 312 | class: chapter-slide 313 | 314 | # Notebook 📕! 315 | ## notebooks/02-imbalanced-data.ipynb 316 | 317 | --- 318 | 319 | class: middle 320 | 321 | # Class-weights 322 | 323 | - Re-weight the loss functions 324 | - Native to scikit-learn for most models 325 | - Same effect as over-sampling, but keeps the dataset size the same 326 | 327 | --- 328 | 329 | # Class-weights 330 | ## Linear models 331 | 332 | - Loss of a given sample is weighted inversely proportional to class frequencies 333 | 334 | ```py 335 | LogisticRegression(class_weight='balanced') 336 | ``` 337 | 338 | ## Tree models 339 | 340 | - When deciding where to split, the `criterion` is weighted inversely proportional to class frequencies 341 | 342 | ```py 343 | DecisionTreeClassifier(class_weight='balanced') 344 | ``` 345 | 346 | --- 347 | 348 | class: chapter-slide 349 | 350 | # Notebook 📕! 351 | ## notebooks/02-imbalanced-data.ipynb 352 | 353 | --- 354 | 355 | class: middle 356 | 357 | # Balanced Bagging 358 | 359 | - Random under sampling for each instance in the ensemble 360 | 361 | ```py 362 | from imblearn.ensemble import BalancedRandomForestClassifier 363 | 364 | balanced_rf = BalancedRandomForestClassifier(random_state=0) 365 | balanced_rf.fit(X_train, y_train) 366 | ``` 367 | 368 | --- 369 | 370 | class: chapter-slide 371 | 372 | # Notebook 📕! 373 | ## notebooks/02-imbalanced-data.ipynb 374 | 375 | --- 376 | 377 | # Synthetic Minority Oversampling Technique 378 | ## SMOTE 379 | 380 | - Adds synthetic interpolated data to the minority class 381 | - For each sample in minority class: 382 | - Pick random neighbor from k neighbors. 383 | - Pick point on line connecting the two uniformly 384 | 385 | --- 386 | 387 | # Generated Data with SMOTE 388 | 389 | ![](notebooks/images/smote_generated.png) 390 | 391 | --- 392 | 393 | class: chapter-slide 394 | 395 | ## Notebook 📕! 396 | ### notebooks/02-imbalanced-data.ipynb 397 | 398 | --- 399 | 400 | name: poisson 401 | class: chapter-slide 402 | 403 | # 3. Poisson Regression 404 | 405 | .footnote-back[ 406 | [Back to Table of Contents](#table-of-contents) 407 | ] 408 | 409 | --- 410 | 411 | # Generalized Linear Models 412 | 413 | Predicted values $\hat{y}$ are linked to a linear combination of $X$ via an 414 | inverse link function: 415 | 416 | $$ 417 | \hat{y}(w, X) = h(Xw) 418 | $$ 419 | 420 | The minimization problem becomes: 421 | 422 | $$ 423 | \min_{w}\frac{1}{2n}\sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2 424 | $$ 425 | 426 | - $n$ is the number of samples 427 | - $\alpha$ is the L2 regularization penalty, 428 | - $d(y_i, \hat{y}_i)$ is the unit deviance 429 | 430 | --- 431 | 432 | # Unit Deviances 433 | 434 | ![](images/glm_unit_deviance.png) 435 | 436 | ![:scale 100%](images/poisson_gamma_tweedie_distributions.png) 437 | 438 | --- 439 | 440 | class: middle 441 | 442 | # Scikit-learn supports GLMS! 443 | 444 | `TweedieRegressor` implements GLM for the Tweedie distribution with a `power` parameter 445 | ```py 446 | from sklearn.linear_model import TweedieRegressor 447 | ``` 448 | 449 | `power=0`: Normal distribution: Use `Ridge`, `ElasticNet`, etc. 450 | 451 | --- 452 | 453 | class: middle 454 | 455 | # Convenience estimators GLM 456 | 457 | Same as `TweedieRegressor(power=1, link='log')` 458 | ```py 459 | from sklearn.linear_model import PoissonRegressor 460 | ``` 461 | 462 | Same as `TweedieRegressor(power=2, link='log')` 463 | ```py 464 | from sklearn.linear_model import GammaRegressor 465 | ``` 466 | 467 | --- 468 | 469 | # Examples of use cases for GLMS 470 | 471 | - Agriculture / weather modeling: 472 | - number of rain events per year (Poisson) 473 | - amount of rainfall per event (Gamma) 474 | 475 | - Risk modeling / insurance policy pricing: 476 | - number of claim events / policyholder per year (Poisson) 477 | - cost per event (Gamma) 478 | 479 | - Predictive maintenance: 480 | - number of production interruption events per year (Poisson) 481 | - duration of interruption (Gamma) 482 | 483 | --- 484 | 485 | # Calibration for Regressors 486 | 487 | .g.g-middle[ 488 | .g-6[ 489 | ![](images/calibration-regression.svg) 490 | ] 491 | .g-6[ 492 | ![:scale 120%](images/calibration_regression.png) 493 | ] 494 | ] 495 | 496 | --- 497 | 498 | class: chapter-slide 499 | 500 | # Notebook 📘! 501 | ## notebooks/03-poisson-regression.ipynb 502 | 503 | --- 504 | 505 | class: title-slide, left 506 | 507 | # Closing 508 | 509 | .g.g-middle[ 510 | .g-7[ 511 | ![:scale 30%](images/scikit-learn-logo-notext.png) 512 | 1. [Text data](#text) 513 | 1. [Imbalanced Data](#imbalanced) 514 | 1. [Poisson Regression](#poisson) 515 | ] 516 | .g-5.center[ 517 |
518 | .larger[Thomas J. Fan]
519 | @thomasjpfan
520 | 521 | 522 | 523 | This workshop on Github: github.com/thomasjpfan/ml-workshop-advanced 524 | ] 525 | ] 526 | --------------------------------------------------------------------------------