├── .gitignore ├── BUILDING.md ├── LICENSE ├── Makefile ├── README.md ├── assets ├── auto-render.min.js ├── fonts │ ├── KaTeX_AMS-Regular.eot │ ├── KaTeX_AMS-Regular.ttf │ ├── KaTeX_AMS-Regular.woff │ ├── KaTeX_AMS-Regular.woff2 │ ├── KaTeX_Caligraphic-Bold.eot │ ├── KaTeX_Caligraphic-Bold.ttf │ ├── KaTeX_Caligraphic-Bold.woff │ ├── KaTeX_Caligraphic-Bold.woff2 │ ├── KaTeX_Caligraphic-Regular.eot │ ├── KaTeX_Caligraphic-Regular.ttf │ ├── KaTeX_Caligraphic-Regular.woff │ ├── KaTeX_Caligraphic-Regular.woff2 │ ├── KaTeX_Fraktur-Bold.eot │ ├── KaTeX_Fraktur-Bold.ttf │ ├── KaTeX_Fraktur-Bold.woff │ ├── KaTeX_Fraktur-Bold.woff2 │ ├── KaTeX_Fraktur-Regular.eot │ ├── KaTeX_Fraktur-Regular.ttf │ ├── KaTeX_Fraktur-Regular.woff │ ├── KaTeX_Fraktur-Regular.woff2 │ ├── KaTeX_Main-Bold.eot │ ├── KaTeX_Main-Bold.ttf │ ├── KaTeX_Main-Bold.woff │ ├── KaTeX_Main-Bold.woff2 │ ├── KaTeX_Main-Italic.eot │ ├── KaTeX_Main-Italic.ttf │ ├── KaTeX_Main-Italic.woff │ ├── KaTeX_Main-Italic.woff2 │ ├── KaTeX_Main-Regular.eot │ ├── KaTeX_Main-Regular.ttf │ ├── KaTeX_Main-Regular.woff │ ├── KaTeX_Main-Regular.woff2 │ ├── KaTeX_Math-BoldItalic.eot │ ├── KaTeX_Math-BoldItalic.ttf │ ├── KaTeX_Math-BoldItalic.woff │ ├── KaTeX_Math-BoldItalic.woff2 │ ├── KaTeX_Math-Italic.eot │ ├── KaTeX_Math-Italic.ttf │ ├── KaTeX_Math-Italic.woff │ ├── KaTeX_Math-Italic.woff2 │ ├── KaTeX_Math-Regular.eot │ ├── KaTeX_Math-Regular.ttf │ ├── KaTeX_Math-Regular.woff │ ├── KaTeX_Math-Regular.woff2 │ ├── KaTeX_SansSerif-Bold.eot │ ├── KaTeX_SansSerif-Bold.ttf │ ├── KaTeX_SansSerif-Bold.woff │ ├── KaTeX_SansSerif-Bold.woff2 │ ├── KaTeX_SansSerif-Italic.eot │ ├── KaTeX_SansSerif-Italic.ttf │ ├── KaTeX_SansSerif-Italic.woff │ ├── KaTeX_SansSerif-Italic.woff2 │ ├── KaTeX_SansSerif-Regular.eot │ ├── KaTeX_SansSerif-Regular.ttf │ ├── KaTeX_SansSerif-Regular.woff │ ├── KaTeX_SansSerif-Regular.woff2 │ ├── KaTeX_Script-Regular.eot │ ├── KaTeX_Script-Regular.ttf │ ├── KaTeX_Script-Regular.woff │ ├── KaTeX_Script-Regular.woff2 │ ├── KaTeX_Size1-Regular.eot │ ├── KaTeX_Size1-Regular.ttf │ ├── KaTeX_Size1-Regular.woff │ ├── KaTeX_Size1-Regular.woff2 │ ├── KaTeX_Size2-Regular.eot │ ├── KaTeX_Size2-Regular.ttf │ ├── KaTeX_Size2-Regular.woff │ ├── KaTeX_Size2-Regular.woff2 │ ├── KaTeX_Size3-Regular.eot │ ├── KaTeX_Size3-Regular.ttf │ ├── KaTeX_Size3-Regular.woff │ ├── KaTeX_Size3-Regular.woff2 │ ├── KaTeX_Size4-Regular.eot │ ├── KaTeX_Size4-Regular.ttf │ ├── KaTeX_Size4-Regular.woff │ ├── KaTeX_Size4-Regular.woff2 │ ├── KaTeX_Typewriter-Regular.eot │ ├── KaTeX_Typewriter-Regular.ttf │ ├── KaTeX_Typewriter-Regular.woff │ └── KaTeX_Typewriter-Regular.woff2 ├── github.svg ├── index.html.jinja ├── katex.min.css ├── katex.min.js ├── remark.min.js ├── requirements-slides.txt ├── style.css └── twitter.svg ├── environment.yml ├── images ├── .gitkeep ├── LICENSE ├── bergstra_random.jpeg ├── column_transformer_schematic.png ├── data-representation.svg ├── favicon_org.png ├── grid_search_cross_validation.png ├── gridsearch_workflow.png ├── group_kfold.png ├── kfold_cv.png ├── med_knn_rf_comparison.png ├── ml-workflow-sklearn.svg ├── repeated_stratified_kfold.png ├── scikit-learn-logo-notext.png ├── shuffle_split_cv.png ├── split-data-three.svg ├── stratified_cv.png ├── time_series_cv.png ├── time_series_walk_forward_cv.png └── train-test.svg ├── index.html ├── maint_tools └── check_notebooks.sh ├── make.py ├── notebooks ├── .gitkeep ├── 00-review-sklearn.ipynb ├── 01-cross-validation.ipynb ├── 02-parameter-tuning.ipynb ├── 03-missing-values.ipynb ├── 04-pandas-interoperability.ipynb ├── data │ ├── approval_processed.csv │ ├── approval_topline.csv │ └── iris_w_missing.csv ├── images │ ├── approval_ratings.png │ ├── approval_ratings_random.png │ ├── approval_ratings_structured.png │ ├── group_kfold.svg │ ├── halvingcv.svg │ ├── kfold_cv.png │ ├── kfold_cv.svg │ ├── knn_boundary_n_neighbors.png │ ├── knn_model_complexity.png │ ├── med_knn_rf_comparison.png │ ├── overfitting_validation_set_1.svg │ ├── overfitting_validation_set_2.svg │ ├── repeated_stratified_kfold.svg │ ├── shuffle_split_cv.svg │ ├── stratified_cv.svg │ ├── time_series_cv.svg │ └── time_series_walk_forward_cv.svg ├── slide-images.ipynb └── solutions │ ├── 00-ex01-solutions.py │ ├── 01-ex01-solutions.py │ ├── 01-ex02-solutions.py │ ├── 02-ex01-solutions.py │ ├── 03-ex01-solutions.py │ └── 04-ex01-solutions.py ├── requirements.txt └── slides.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | -------------------------------------------------------------------------------- /BUILDING.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ## How do I build slides? 4 | 5 | Install the dependencies: `pip install -r assets/requirements-slides.txt`. 6 | 7 | ```py 8 | python make.py build 9 | ``` 10 | 11 | Remember to rebuild when `slides.md` get updated. 12 | 13 | ## How do I develop and live reload? 14 | 15 | ```py 16 | python make.py live 17 | ``` 18 | 19 | ## How to host on github pages? 20 | 21 | 1. Go to settings. 22 | 2. Enable GitHub Pages. 23 | 24 | ## How to change my favicon? 25 | 26 | Replace favicon with something else 27 | 28 | ## How to save as pdf? 29 | 30 | 1. Install decktape 31 | 32 | ```bash 33 | npm install -g decktape 34 | ``` 35 | 36 | 2. Run decktape 37 | 38 | ```bash 39 | decktape "http://localhost:5500" slides.pdf 40 | ``` 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Thomas Fan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean check 2 | 3 | clean: 4 | jupyter nbconvert --clear-output --inplace notebooks/0*.ipynb 5 | 6 | check: 7 | bash maint_tools/check_notebooks.sh 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intermediate Machine Learning with scikit-learn 2 | ### Cross validation, Parameter Tuning, Pandas Interoperability, and Missing Values 3 | 4 | *By Thomas J. Fan* 5 | 6 | [Link to slides](https://thomasjpfan.github.io/ml-workshop-intermediate-1-of-2/) 7 | 8 | Scikit-learn is a Python machine learning library used by data science practitioners from many disciplines. We will learn about cross-validation, tuning machine learning algorithms, and pandas interoperability during this training. Cross-validation enables us to evaluate our machine learning models by splitting our data into multiple training and testing datasets. We will learn to handle missing values with imputation using univariate and multivariate techniques. Next, we will explore tuning algorithms in scikit-learn with grid search and random search. We will learn about categorical features and how to use scikit-learn's encoders to convert these categorical features into numerical features for a machine-learning algorithm to consume. Finally, we will apply the machine learning techniques on a house pricing dataset with scikit-learn's Histogram-based Gradient Boosted Trees. scikit-learn's boosted tree implementation is based on LightGBM and has similar performance characteristics. 9 | 10 | ## Obtaining the Material 11 | 12 | ### With git 13 | 14 | The most convenient way to download the material is with git: 15 | 16 | ```bash 17 | git clone https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2 18 | ``` 19 | 20 | Please note that I may add and improve the material until shortly before the session. You can update your copy by running: 21 | 22 | ```bash 23 | git pull origin master 24 | ``` 25 | 26 | ### Download zip 27 | 28 | If you are not familiar with git, you can download this repository as a zip file at: [github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/archive/master.zip](https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/archive/master.zip). Please note that I may add and improve the material until shortly before the session. To update your copy please re-download the material a day before the session. 29 | 30 | ## Running the notebooks 31 | 32 | ### Local Installation 33 | 34 | Local installation requires `conda` to be installed on your machine. The simplest way to install `conda` is to install `miniconda` by using an installer for your operating system provided at [docs.conda.io/en/latest/miniconda.html](https://docs.conda.io/en/latest/miniconda.html). After `conda` is installed, navigate to this repository on your local machine: 35 | 36 | ```bash 37 | cd ml-workshop-intermediate-1-of-2 38 | ``` 39 | 40 | Then download and install the dependencies: 41 | 42 | ```bash 43 | conda env create -f environment.yml 44 | ``` 45 | 46 | This will create a virtual environment named `ml-workshop-intermediate-1-of-2`. To activate this environment: 47 | 48 | ```bash 49 | conda activate ml-workshop-intermediate-1-of-2 50 | ``` 51 | 52 | Finally, to start `jupyterlab` run: 53 | 54 | ```bash 55 | jupyter lab 56 | ``` 57 | 58 | This should open a browser window with the `jupterlab` interface. 59 | 60 | ### Run with Google's Colab 61 | 62 | If you have any issues with installing `conda` or running `jupyter` on your local computer, then you can run the notebooks on Google's Colab: 63 | 64 | 0. [Quick Review of scikit-learn](https://colab.research.google.com/github/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/00-review-sklearn.ipynb) 65 | 1. [Cross-Validation in scikit-learn](https://colab.research.google.com/github/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/01-cross-validation.ipynb) 66 | 2. [Parameter tuning](https://colab.research.google.com/github/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/02-parameter-tuning.ipynb) 67 | 3. [Missing values in scikit-learn](https://colab.research.google.com/github/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/03-missing-values.ipynb) 68 | 4. [Pandas Interoperability](https://colab.research.google.com/github/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/04-pandas-interoperability.ipynb) 69 | 70 | ## License 71 | 72 | This repo is under the [MIT License](LICENSE). 73 | -------------------------------------------------------------------------------- /assets/auto-render.min.js: -------------------------------------------------------------------------------- 1 | (function(e){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=e()}else if(typeof define==="function"&&define.amd){define([],e)}else{var t;if(typeof window!=="undefined"){t=window}else if(typeof global!=="undefined"){t=global}else if(typeof self!=="undefined"){t=self}else{t=this}t.renderMathInElement=e()}})(function(){var e,t,n;return function e(t,n,r){function o(c,a){if(!n[c]){if(!t[c]){var u=typeof require=="function"&&require;if(!a&&u)return u(c,!0);if(i)return i(c,!0);var f=new Error("Cannot find module '"+c+"'");throw f.code="MODULE_NOT_FOUND",f}var s=n[c]={exports:{}};t[c][0].call(s.exports,function(e){var n=t[c][1][e];return o(n?n:e)},s,s.exports,e,t,n,r)}return n[c].exports}var i=typeof require=="function"&&require;for(var c=0;cf){s=a[f++];if(s!=s)return true}else for(;u>f;f++)if(e||f in a){if(a[f]===n)return e||f||0}return!e&&-1}}},{"./_to-index":32,"./_to-iobject":34,"./_to-length":35}],8:[function(e,t,n){var r={}.toString;t.exports=function(e){return r.call(e).slice(8,-1)}},{}],9:[function(e,t,n){var r=t.exports={version:"2.4.0"};if(typeof __e=="number")__e=r},{}],10:[function(e,t,n){var r=e("./_a-function");t.exports=function(e,t,n){r(e);if(t===undefined)return e;switch(n){case 1:return function(n){return e.call(t,n)};case 2:return function(n,r){return e.call(t,n,r)};case 3:return function(n,r,o){return e.call(t,n,r,o)}}return function(){return e.apply(t,arguments)}}},{"./_a-function":5}],11:[function(e,t,n){t.exports=function(e){if(e==undefined)throw TypeError("Can't call method on "+e);return e}},{}],12:[function(e,t,n){t.exports=!e("./_fails")(function(){return Object.defineProperty({},"a",{get:function(){return 7}}).a!=7})},{"./_fails":16}],13:[function(e,t,n){var r=e("./_is-object"),o=e("./_global").document,i=r(o)&&r(o.createElement);t.exports=function(e){return i?o.createElement(e):{}}},{"./_global":17,"./_is-object":22}],14:[function(e,t,n){t.exports="constructor,hasOwnProperty,isPrototypeOf,propertyIsEnumerable,toLocaleString,toString,valueOf".split(",")},{}],15:[function(e,t,n){var r=e("./_global"),o=e("./_core"),i=e("./_ctx"),c=e("./_hide"),a="prototype";var u=function(e,t,n){var f=e&u.F,s=e&u.G,l=e&u.S,p=e&u.P,d=e&u.B,_=e&u.W,v=s?o:o[t]||(o[t]={}),h=v[a],b=s?r:l?r[t]:(r[t]||{})[a],y,g,x;if(s)n=t;for(y in n){g=!f&&b&&b[y]!==undefined;if(g&&y in v)continue;x=g?b[y]:n[y];v[y]=s&&typeof b[y]!="function"?n[y]:d&&g?i(x,r):_&&b[y]==x?function(e){var t=function(t,n,r){if(this instanceof e){switch(arguments.length){case 0:return new e;case 1:return new e(t);case 2:return new e(t,n)}return new e(t,n,r)}return e.apply(this,arguments)};t[a]=e[a];return t}(x):p&&typeof x=="function"?i(Function.call,x):x;if(p){(v.virtual||(v.virtual={}))[y]=x;if(e&u.R&&h&&!h[y])c(h,y,x)}}};u.F=1;u.G=2;u.S=4;u.P=8;u.B=16;u.W=32;u.U=64;u.R=128;t.exports=u},{"./_core":9,"./_ctx":10,"./_global":17,"./_hide":19}],16:[function(e,t,n){t.exports=function(e){try{return!!e()}catch(e){return true}}},{}],17:[function(e,t,n){var r=t.exports=typeof window!="undefined"&&window.Math==Math?window:typeof self!="undefined"&&self.Math==Math?self:Function("return this")();if(typeof __g=="number")__g=r},{}],18:[function(e,t,n){var r={}.hasOwnProperty;t.exports=function(e,t){return r.call(e,t)}},{}],19:[function(e,t,n){var r=e("./_object-dp"),o=e("./_property-desc");t.exports=e("./_descriptors")?function(e,t,n){return r.f(e,t,o(1,n))}:function(e,t,n){e[t]=n;return e}},{"./_descriptors":12,"./_object-dp":24,"./_property-desc":29}],20:[function(e,t,n){t.exports=!e("./_descriptors")&&!e("./_fails")(function(){return Object.defineProperty(e("./_dom-create")("div"),"a",{get:function(){return 7}}).a!=7})},{"./_descriptors":12,"./_dom-create":13,"./_fails":16}],21:[function(e,t,n){var r=e("./_cof");t.exports=Object("z").propertyIsEnumerable(0)?Object:function(e){return r(e)=="String"?e.split(""):Object(e)}},{"./_cof":8}],22:[function(e,t,n){t.exports=function(e){return typeof e==="object"?e!==null:typeof e==="function"}},{}],23:[function(e,t,n){"use strict";var r=e("./_object-keys"),o=e("./_object-gops"),i=e("./_object-pie"),c=e("./_to-object"),a=e("./_iobject"),u=Object.assign;t.exports=!u||e("./_fails")(function(){var e={},t={},n=Symbol(),r="abcdefghijklmnopqrst";e[n]=7;r.split("").forEach(function(e){t[e]=e});return u({},e)[n]!=7||Object.keys(u({},t)).join("")!=r})?function e(t,n){var u=c(t),f=arguments.length,s=1,l=o.f,p=i.f;while(f>s){var d=a(arguments[s++]),_=l?r(d).concat(l(d)):r(d),v=_.length,h=0,b;while(v>h)if(p.call(d,b=_[h++]))u[b]=d[b]}return u}:u},{"./_fails":16,"./_iobject":21,"./_object-gops":25,"./_object-keys":27,"./_object-pie":28,"./_to-object":36}],24:[function(e,t,n){var r=e("./_an-object"),o=e("./_ie8-dom-define"),i=e("./_to-primitive"),c=Object.defineProperty;n.f=e("./_descriptors")?Object.defineProperty:function e(t,n,a){r(t);n=i(n,true);r(a);if(o)try{return c(t,n,a)}catch(e){}if("get"in a||"set"in a)throw TypeError("Accessors not supported!");if("value"in a)t[n]=a.value;return t}},{"./_an-object":6,"./_descriptors":12,"./_ie8-dom-define":20,"./_to-primitive":37}],25:[function(e,t,n){n.f=Object.getOwnPropertySymbols},{}],26:[function(e,t,n){var r=e("./_has"),o=e("./_to-iobject"),i=e("./_array-includes")(false),c=e("./_shared-key")("IE_PROTO");t.exports=function(e,t){var n=o(e),a=0,u=[],f;for(f in n)if(f!=c)r(n,f)&&u.push(f);while(t.length>a)if(r(n,f=t[a++])){~i(u,f)||u.push(f)}return u}},{"./_array-includes":7,"./_has":18,"./_shared-key":30,"./_to-iobject":34}],27:[function(e,t,n){var r=e("./_object-keys-internal"),o=e("./_enum-bug-keys");t.exports=Object.keys||function e(t){return r(t,o)}},{"./_enum-bug-keys":14,"./_object-keys-internal":26}],28:[function(e,t,n){n.f={}.propertyIsEnumerable},{}],29:[function(e,t,n){t.exports=function(e,t){return{enumerable:!(e&1),configurable:!(e&2),writable:!(e&4),value:t}}},{}],30:[function(e,t,n){var r=e("./_shared")("keys"),o=e("./_uid");t.exports=function(e){return r[e]||(r[e]=o(e))}},{"./_shared":31,"./_uid":38}],31:[function(e,t,n){var r=e("./_global"),o="__core-js_shared__",i=r[o]||(r[o]={});t.exports=function(e){return i[e]||(i[e]={})}},{"./_global":17}],32:[function(e,t,n){var r=e("./_to-integer"),o=Math.max,i=Math.min;t.exports=function(e,t){e=r(e);return e<0?o(e+t,0):i(e,t)}},{"./_to-integer":33}],33:[function(e,t,n){var r=Math.ceil,o=Math.floor;t.exports=function(e){return isNaN(e=+e)?0:(e>0?o:r)(e)}},{}],34:[function(e,t,n){var r=e("./_iobject"),o=e("./_defined");t.exports=function(e){return r(o(e))}},{"./_defined":11,"./_iobject":21}],35:[function(e,t,n){var r=e("./_to-integer"),o=Math.min;t.exports=function(e){return e>0?o(r(e),9007199254740991):0}},{"./_to-integer":33}],36:[function(e,t,n){var r=e("./_defined");t.exports=function(e){return Object(r(e))}},{"./_defined":11}],37:[function(e,t,n){var r=e("./_is-object");t.exports=function(e,t){if(!r(e))return e;var n,o;if(t&&typeof(n=e.toString)=="function"&&!r(o=n.call(e)))return o;if(typeof(n=e.valueOf)=="function"&&!r(o=n.call(e)))return o;if(!t&&typeof(n=e.toString)=="function"&&!r(o=n.call(e)))return o;throw TypeError("Can't convert object to primitive value")}},{"./_is-object":22}],38:[function(e,t,n){var r=0,o=Math.random();t.exports=function(e){return"Symbol(".concat(e===undefined?"":e,")_",(++r+o).toString(36))}},{}],39:[function(e,t,n){var r=e("./_export");r(r.S+r.F,"Object",{assign:e("./_object-assign")})},{"./_export":15,"./_object-assign":23}]},{},[1])(1)}); 2 | -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_AMS-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_AMS-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_AMS-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_AMS-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_AMS-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Caligraphic-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Caligraphic-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Fraktur-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Fraktur-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Italic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Italic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Italic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Italic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Main-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Main-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-BoldItalic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-BoldItalic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-BoldItalic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-BoldItalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-BoldItalic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Italic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Italic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Italic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Italic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Math-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Math-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Bold.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Bold.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Bold.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Bold.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Italic.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Italic.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Italic.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Italic.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_SansSerif-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_SansSerif-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Script-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Script-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Script-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Script-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Script-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size1-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size1-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size1-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size1-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size1-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size2-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size2-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size2-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size2-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size2-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size3-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size3-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size3-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size3-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size3-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size4-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size4-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size4-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Size4-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Size4-Regular.woff2 -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Typewriter-Regular.eot -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Typewriter-Regular.ttf -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Typewriter-Regular.woff -------------------------------------------------------------------------------- /assets/fonts/KaTeX_Typewriter-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/assets/fonts/KaTeX_Typewriter-Regular.woff2 -------------------------------------------------------------------------------- /assets/github.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /assets/index.html.jinja: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ title }} 6 | 7 | 8 | 9 | {% if use_katex %} 10 | 11 | 12 | 13 | {% endif %} 14 | 15 | 16 | 17 | 18 | 19 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /assets/katex.min.css: -------------------------------------------------------------------------------- 1 | @font-face{font-family:KaTeX_AMS;src:url(fonts/KaTeX_AMS-Regular.woff2) format("woff2"),url(fonts/KaTeX_AMS-Regular.woff) format("woff"),url(fonts/KaTeX_AMS-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Caligraphic;src:url(fonts/KaTeX_Caligraphic-Bold.woff2) format("woff2"),url(fonts/KaTeX_Caligraphic-Bold.woff) format("woff"),url(fonts/KaTeX_Caligraphic-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Caligraphic;src:url(fonts/KaTeX_Caligraphic-Regular.woff2) format("woff2"),url(fonts/KaTeX_Caligraphic-Regular.woff) format("woff"),url(fonts/KaTeX_Caligraphic-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Fraktur;src:url(fonts/KaTeX_Fraktur-Bold.woff2) format("woff2"),url(fonts/KaTeX_Fraktur-Bold.woff) format("woff"),url(fonts/KaTeX_Fraktur-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Fraktur;src:url(fonts/KaTeX_Fraktur-Regular.woff2) format("woff2"),url(fonts/KaTeX_Fraktur-Regular.woff) format("woff"),url(fonts/KaTeX_Fraktur-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Bold.woff2) format("woff2"),url(fonts/KaTeX_Main-Bold.woff) format("woff"),url(fonts/KaTeX_Main-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-BoldItalic.woff2) format("woff2"),url(fonts/KaTeX_Main-BoldItalic.woff) format("woff"),url(fonts/KaTeX_Main-BoldItalic.ttf) format("truetype");font-weight:700;font-style:italic}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Italic.woff2) format("woff2"),url(fonts/KaTeX_Main-Italic.woff) format("woff"),url(fonts/KaTeX_Main-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:KaTeX_Main;src:url(fonts/KaTeX_Main-Regular.woff2) format("woff2"),url(fonts/KaTeX_Main-Regular.woff) format("woff"),url(fonts/KaTeX_Main-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Math;src:url(fonts/KaTeX_Math-BoldItalic.woff2) format("woff2"),url(fonts/KaTeX_Math-BoldItalic.woff) format("woff"),url(fonts/KaTeX_Math-BoldItalic.ttf) format("truetype");font-weight:700;font-style:italic}@font-face{font-family:KaTeX_Math;src:url(fonts/KaTeX_Math-Italic.woff2) format("woff2"),url(fonts/KaTeX_Math-Italic.woff) format("woff"),url(fonts/KaTeX_Math-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Bold.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Bold.woff) format("woff"),url(fonts/KaTeX_SansSerif-Bold.ttf) format("truetype");font-weight:700;font-style:normal}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Italic.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Italic.woff) format("woff"),url(fonts/KaTeX_SansSerif-Italic.ttf) format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"KaTeX_SansSerif";src:url(fonts/KaTeX_SansSerif-Regular.woff2) format("woff2"),url(fonts/KaTeX_SansSerif-Regular.woff) format("woff"),url(fonts/KaTeX_SansSerif-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Script;src:url(fonts/KaTeX_Script-Regular.woff2) format("woff2"),url(fonts/KaTeX_Script-Regular.woff) format("woff"),url(fonts/KaTeX_Script-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size1;src:url(fonts/KaTeX_Size1-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size1-Regular.woff) format("woff"),url(fonts/KaTeX_Size1-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size2;src:url(fonts/KaTeX_Size2-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size2-Regular.woff) format("woff"),url(fonts/KaTeX_Size2-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size3;src:url(fonts/KaTeX_Size3-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size3-Regular.woff) format("woff"),url(fonts/KaTeX_Size3-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Size4;src:url(fonts/KaTeX_Size4-Regular.woff2) format("woff2"),url(fonts/KaTeX_Size4-Regular.woff) format("woff"),url(fonts/KaTeX_Size4-Regular.ttf) format("truetype");font-weight:400;font-style:normal}@font-face{font-family:KaTeX_Typewriter;src:url(fonts/KaTeX_Typewriter-Regular.woff2) format("woff2"),url(fonts/KaTeX_Typewriter-Regular.woff) format("woff"),url(fonts/KaTeX_Typewriter-Regular.ttf) format("truetype");font-weight:400;font-style:normal}.katex{font:normal 1.21em KaTeX_Main,Times New Roman,serif;line-height:1.2;text-indent:0;text-rendering:auto}.katex *{-ms-high-contrast-adjust:none!important}.katex .katex-version:after{content:"0.11.1"}.katex .katex-mathml{position:absolute;clip:rect(1px,1px,1px,1px);padding:0;border:0;height:1px;width:1px;overflow:hidden}.katex .katex-html>.newline{display:block}.katex .base{position:relative;white-space:nowrap;width:min-content}.katex .base,.katex .strut{display:inline-block}.katex .textbf{font-weight:700}.katex .textit{font-style:italic}.katex .textrm{font-family:KaTeX_Main}.katex .textsf{font-family:KaTeX_SansSerif}.katex .texttt{font-family:KaTeX_Typewriter}.katex .mathdefault{font-family:KaTeX_Math;font-style:italic}.katex .mathit{font-family:KaTeX_Main;font-style:italic}.katex .mathrm{font-style:normal}.katex .mathbf{font-family:KaTeX_Main;font-weight:700}.katex .boldsymbol{font-family:KaTeX_Math;font-weight:700;font-style:italic}.katex .amsrm,.katex .mathbb,.katex .textbb{font-family:KaTeX_AMS}.katex .mathcal{font-family:KaTeX_Caligraphic}.katex .mathfrak,.katex .textfrak{font-family:KaTeX_Fraktur}.katex .mathtt{font-family:KaTeX_Typewriter}.katex .mathscr,.katex .textscr{font-family:KaTeX_Script}.katex .mathsf,.katex .textsf{font-family:KaTeX_SansSerif}.katex .mathboldsf,.katex .textboldsf{font-family:KaTeX_SansSerif;font-weight:700}.katex .mathitsf,.katex .textitsf{font-family:KaTeX_SansSerif;font-style:italic}.katex .mainrm{font-family:KaTeX_Main;font-style:normal}.katex .vlist-t{display:inline-table;table-layout:fixed}.katex .vlist-r{display:table-row}.katex .vlist{display:table-cell;vertical-align:bottom;position:relative}.katex .vlist>span{display:block;height:0;position:relative}.katex .vlist>span>span{display:inline-block}.katex .vlist>span>.pstrut{overflow:hidden;width:0}.katex .vlist-t2{margin-right:-2px}.katex .vlist-s{display:table-cell;vertical-align:bottom;font-size:1px;width:2px;min-width:2px}.katex .msupsub{text-align:left}.katex .mfrac>span>span{text-align:center}.katex .mfrac .frac-line{display:inline-block;width:100%;border-bottom-style:solid}.katex .hdashline,.katex .hline,.katex .mfrac .frac-line,.katex .overline .overline-line,.katex .rule,.katex .underline .underline-line{min-height:1px}.katex .mspace{display:inline-block}.katex .clap,.katex .llap,.katex .rlap{width:0;position:relative}.katex .clap>.inner,.katex .llap>.inner,.katex .rlap>.inner{position:absolute}.katex .clap>.fix,.katex .llap>.fix,.katex .rlap>.fix{display:inline-block}.katex .llap>.inner{right:0}.katex .clap>.inner,.katex .rlap>.inner{left:0}.katex .clap>.inner>span{margin-left:-50%;margin-right:50%}.katex .rule{display:inline-block;border:0 solid;position:relative}.katex .hline,.katex .overline .overline-line,.katex .underline .underline-line{display:inline-block;width:100%;border-bottom-style:solid}.katex .hdashline{display:inline-block;width:100%;border-bottom-style:dashed}.katex .sqrt>.root{margin-left:.27777778em;margin-right:-.55555556em}.katex .fontsize-ensurer.reset-size1.size1,.katex .sizing.reset-size1.size1{font-size:1em}.katex .fontsize-ensurer.reset-size1.size2,.katex .sizing.reset-size1.size2{font-size:1.2em}.katex .fontsize-ensurer.reset-size1.size3,.katex .sizing.reset-size1.size3{font-size:1.4em}.katex .fontsize-ensurer.reset-size1.size4,.katex .sizing.reset-size1.size4{font-size:1.6em}.katex .fontsize-ensurer.reset-size1.size5,.katex .sizing.reset-size1.size5{font-size:1.8em}.katex .fontsize-ensurer.reset-size1.size6,.katex .sizing.reset-size1.size6{font-size:2em}.katex .fontsize-ensurer.reset-size1.size7,.katex .sizing.reset-size1.size7{font-size:2.4em}.katex .fontsize-ensurer.reset-size1.size8,.katex .sizing.reset-size1.size8{font-size:2.88em}.katex .fontsize-ensurer.reset-size1.size9,.katex .sizing.reset-size1.size9{font-size:3.456em}.katex .fontsize-ensurer.reset-size1.size10,.katex .sizing.reset-size1.size10{font-size:4.148em}.katex .fontsize-ensurer.reset-size1.size11,.katex .sizing.reset-size1.size11{font-size:4.976em}.katex .fontsize-ensurer.reset-size2.size1,.katex .sizing.reset-size2.size1{font-size:.83333333em}.katex .fontsize-ensurer.reset-size2.size2,.katex .sizing.reset-size2.size2{font-size:1em}.katex .fontsize-ensurer.reset-size2.size3,.katex .sizing.reset-size2.size3{font-size:1.16666667em}.katex .fontsize-ensurer.reset-size2.size4,.katex .sizing.reset-size2.size4{font-size:1.33333333em}.katex .fontsize-ensurer.reset-size2.size5,.katex .sizing.reset-size2.size5{font-size:1.5em}.katex .fontsize-ensurer.reset-size2.size6,.katex .sizing.reset-size2.size6{font-size:1.66666667em}.katex .fontsize-ensurer.reset-size2.size7,.katex .sizing.reset-size2.size7{font-size:2em}.katex .fontsize-ensurer.reset-size2.size8,.katex .sizing.reset-size2.size8{font-size:2.4em}.katex .fontsize-ensurer.reset-size2.size9,.katex .sizing.reset-size2.size9{font-size:2.88em}.katex .fontsize-ensurer.reset-size2.size10,.katex .sizing.reset-size2.size10{font-size:3.45666667em}.katex .fontsize-ensurer.reset-size2.size11,.katex .sizing.reset-size2.size11{font-size:4.14666667em}.katex .fontsize-ensurer.reset-size3.size1,.katex .sizing.reset-size3.size1{font-size:.71428571em}.katex .fontsize-ensurer.reset-size3.size2,.katex .sizing.reset-size3.size2{font-size:.85714286em}.katex .fontsize-ensurer.reset-size3.size3,.katex .sizing.reset-size3.size3{font-size:1em}.katex .fontsize-ensurer.reset-size3.size4,.katex .sizing.reset-size3.size4{font-size:1.14285714em}.katex .fontsize-ensurer.reset-size3.size5,.katex .sizing.reset-size3.size5{font-size:1.28571429em}.katex .fontsize-ensurer.reset-size3.size6,.katex .sizing.reset-size3.size6{font-size:1.42857143em}.katex .fontsize-ensurer.reset-size3.size7,.katex .sizing.reset-size3.size7{font-size:1.71428571em}.katex .fontsize-ensurer.reset-size3.size8,.katex .sizing.reset-size3.size8{font-size:2.05714286em}.katex .fontsize-ensurer.reset-size3.size9,.katex .sizing.reset-size3.size9{font-size:2.46857143em}.katex .fontsize-ensurer.reset-size3.size10,.katex .sizing.reset-size3.size10{font-size:2.96285714em}.katex .fontsize-ensurer.reset-size3.size11,.katex .sizing.reset-size3.size11{font-size:3.55428571em}.katex .fontsize-ensurer.reset-size4.size1,.katex .sizing.reset-size4.size1{font-size:.625em}.katex .fontsize-ensurer.reset-size4.size2,.katex .sizing.reset-size4.size2{font-size:.75em}.katex .fontsize-ensurer.reset-size4.size3,.katex .sizing.reset-size4.size3{font-size:.875em}.katex .fontsize-ensurer.reset-size4.size4,.katex .sizing.reset-size4.size4{font-size:1em}.katex .fontsize-ensurer.reset-size4.size5,.katex .sizing.reset-size4.size5{font-size:1.125em}.katex .fontsize-ensurer.reset-size4.size6,.katex .sizing.reset-size4.size6{font-size:1.25em}.katex .fontsize-ensurer.reset-size4.size7,.katex .sizing.reset-size4.size7{font-size:1.5em}.katex .fontsize-ensurer.reset-size4.size8,.katex .sizing.reset-size4.size8{font-size:1.8em}.katex .fontsize-ensurer.reset-size4.size9,.katex .sizing.reset-size4.size9{font-size:2.16em}.katex .fontsize-ensurer.reset-size4.size10,.katex .sizing.reset-size4.size10{font-size:2.5925em}.katex .fontsize-ensurer.reset-size4.size11,.katex .sizing.reset-size4.size11{font-size:3.11em}.katex .fontsize-ensurer.reset-size5.size1,.katex .sizing.reset-size5.size1{font-size:.55555556em}.katex .fontsize-ensurer.reset-size5.size2,.katex .sizing.reset-size5.size2{font-size:.66666667em}.katex .fontsize-ensurer.reset-size5.size3,.katex .sizing.reset-size5.size3{font-size:.77777778em}.katex .fontsize-ensurer.reset-size5.size4,.katex .sizing.reset-size5.size4{font-size:.88888889em}.katex .fontsize-ensurer.reset-size5.size5,.katex .sizing.reset-size5.size5{font-size:1em}.katex .fontsize-ensurer.reset-size5.size6,.katex .sizing.reset-size5.size6{font-size:1.11111111em}.katex .fontsize-ensurer.reset-size5.size7,.katex .sizing.reset-size5.size7{font-size:1.33333333em}.katex .fontsize-ensurer.reset-size5.size8,.katex .sizing.reset-size5.size8{font-size:1.6em}.katex .fontsize-ensurer.reset-size5.size9,.katex .sizing.reset-size5.size9{font-size:1.92em}.katex .fontsize-ensurer.reset-size5.size10,.katex .sizing.reset-size5.size10{font-size:2.30444444em}.katex .fontsize-ensurer.reset-size5.size11,.katex .sizing.reset-size5.size11{font-size:2.76444444em}.katex .fontsize-ensurer.reset-size6.size1,.katex .sizing.reset-size6.size1{font-size:.5em}.katex .fontsize-ensurer.reset-size6.size2,.katex .sizing.reset-size6.size2{font-size:.6em}.katex .fontsize-ensurer.reset-size6.size3,.katex .sizing.reset-size6.size3{font-size:.7em}.katex .fontsize-ensurer.reset-size6.size4,.katex .sizing.reset-size6.size4{font-size:.8em}.katex .fontsize-ensurer.reset-size6.size5,.katex .sizing.reset-size6.size5{font-size:.9em}.katex .fontsize-ensurer.reset-size6.size6,.katex .sizing.reset-size6.size6{font-size:1em}.katex .fontsize-ensurer.reset-size6.size7,.katex .sizing.reset-size6.size7{font-size:1.2em}.katex .fontsize-ensurer.reset-size6.size8,.katex .sizing.reset-size6.size8{font-size:1.44em}.katex .fontsize-ensurer.reset-size6.size9,.katex .sizing.reset-size6.size9{font-size:1.728em}.katex .fontsize-ensurer.reset-size6.size10,.katex .sizing.reset-size6.size10{font-size:2.074em}.katex .fontsize-ensurer.reset-size6.size11,.katex .sizing.reset-size6.size11{font-size:2.488em}.katex .fontsize-ensurer.reset-size7.size1,.katex .sizing.reset-size7.size1{font-size:.41666667em}.katex .fontsize-ensurer.reset-size7.size2,.katex .sizing.reset-size7.size2{font-size:.5em}.katex .fontsize-ensurer.reset-size7.size3,.katex .sizing.reset-size7.size3{font-size:.58333333em}.katex .fontsize-ensurer.reset-size7.size4,.katex .sizing.reset-size7.size4{font-size:.66666667em}.katex .fontsize-ensurer.reset-size7.size5,.katex .sizing.reset-size7.size5{font-size:.75em}.katex .fontsize-ensurer.reset-size7.size6,.katex .sizing.reset-size7.size6{font-size:.83333333em}.katex .fontsize-ensurer.reset-size7.size7,.katex .sizing.reset-size7.size7{font-size:1em}.katex .fontsize-ensurer.reset-size7.size8,.katex .sizing.reset-size7.size8{font-size:1.2em}.katex .fontsize-ensurer.reset-size7.size9,.katex .sizing.reset-size7.size9{font-size:1.44em}.katex .fontsize-ensurer.reset-size7.size10,.katex .sizing.reset-size7.size10{font-size:1.72833333em}.katex .fontsize-ensurer.reset-size7.size11,.katex .sizing.reset-size7.size11{font-size:2.07333333em}.katex .fontsize-ensurer.reset-size8.size1,.katex .sizing.reset-size8.size1{font-size:.34722222em}.katex .fontsize-ensurer.reset-size8.size2,.katex .sizing.reset-size8.size2{font-size:.41666667em}.katex .fontsize-ensurer.reset-size8.size3,.katex .sizing.reset-size8.size3{font-size:.48611111em}.katex .fontsize-ensurer.reset-size8.size4,.katex .sizing.reset-size8.size4{font-size:.55555556em}.katex .fontsize-ensurer.reset-size8.size5,.katex .sizing.reset-size8.size5{font-size:.625em}.katex .fontsize-ensurer.reset-size8.size6,.katex .sizing.reset-size8.size6{font-size:.69444444em}.katex .fontsize-ensurer.reset-size8.size7,.katex .sizing.reset-size8.size7{font-size:.83333333em}.katex .fontsize-ensurer.reset-size8.size8,.katex .sizing.reset-size8.size8{font-size:1em}.katex .fontsize-ensurer.reset-size8.size9,.katex .sizing.reset-size8.size9{font-size:1.2em}.katex .fontsize-ensurer.reset-size8.size10,.katex .sizing.reset-size8.size10{font-size:1.44027778em}.katex .fontsize-ensurer.reset-size8.size11,.katex .sizing.reset-size8.size11{font-size:1.72777778em}.katex .fontsize-ensurer.reset-size9.size1,.katex .sizing.reset-size9.size1{font-size:.28935185em}.katex .fontsize-ensurer.reset-size9.size2,.katex .sizing.reset-size9.size2{font-size:.34722222em}.katex .fontsize-ensurer.reset-size9.size3,.katex .sizing.reset-size9.size3{font-size:.40509259em}.katex .fontsize-ensurer.reset-size9.size4,.katex .sizing.reset-size9.size4{font-size:.46296296em}.katex .fontsize-ensurer.reset-size9.size5,.katex .sizing.reset-size9.size5{font-size:.52083333em}.katex .fontsize-ensurer.reset-size9.size6,.katex .sizing.reset-size9.size6{font-size:.5787037em}.katex .fontsize-ensurer.reset-size9.size7,.katex .sizing.reset-size9.size7{font-size:.69444444em}.katex .fontsize-ensurer.reset-size9.size8,.katex .sizing.reset-size9.size8{font-size:.83333333em}.katex .fontsize-ensurer.reset-size9.size9,.katex .sizing.reset-size9.size9{font-size:1em}.katex .fontsize-ensurer.reset-size9.size10,.katex .sizing.reset-size9.size10{font-size:1.20023148em}.katex .fontsize-ensurer.reset-size9.size11,.katex .sizing.reset-size9.size11{font-size:1.43981481em}.katex .fontsize-ensurer.reset-size10.size1,.katex .sizing.reset-size10.size1{font-size:.24108004em}.katex .fontsize-ensurer.reset-size10.size2,.katex .sizing.reset-size10.size2{font-size:.28929605em}.katex .fontsize-ensurer.reset-size10.size3,.katex .sizing.reset-size10.size3{font-size:.33751205em}.katex .fontsize-ensurer.reset-size10.size4,.katex .sizing.reset-size10.size4{font-size:.38572806em}.katex .fontsize-ensurer.reset-size10.size5,.katex .sizing.reset-size10.size5{font-size:.43394407em}.katex .fontsize-ensurer.reset-size10.size6,.katex .sizing.reset-size10.size6{font-size:.48216008em}.katex .fontsize-ensurer.reset-size10.size7,.katex .sizing.reset-size10.size7{font-size:.57859209em}.katex .fontsize-ensurer.reset-size10.size8,.katex .sizing.reset-size10.size8{font-size:.69431051em}.katex .fontsize-ensurer.reset-size10.size9,.katex .sizing.reset-size10.size9{font-size:.83317261em}.katex .fontsize-ensurer.reset-size10.size10,.katex .sizing.reset-size10.size10{font-size:1em}.katex .fontsize-ensurer.reset-size10.size11,.katex .sizing.reset-size10.size11{font-size:1.19961427em}.katex .fontsize-ensurer.reset-size11.size1,.katex .sizing.reset-size11.size1{font-size:.20096463em}.katex .fontsize-ensurer.reset-size11.size2,.katex .sizing.reset-size11.size2{font-size:.24115756em}.katex .fontsize-ensurer.reset-size11.size3,.katex .sizing.reset-size11.size3{font-size:.28135048em}.katex .fontsize-ensurer.reset-size11.size4,.katex .sizing.reset-size11.size4{font-size:.32154341em}.katex .fontsize-ensurer.reset-size11.size5,.katex .sizing.reset-size11.size5{font-size:.36173633em}.katex .fontsize-ensurer.reset-size11.size6,.katex .sizing.reset-size11.size6{font-size:.40192926em}.katex .fontsize-ensurer.reset-size11.size7,.katex .sizing.reset-size11.size7{font-size:.48231511em}.katex .fontsize-ensurer.reset-size11.size8,.katex .sizing.reset-size11.size8{font-size:.57877814em}.katex .fontsize-ensurer.reset-size11.size9,.katex .sizing.reset-size11.size9{font-size:.69453376em}.katex .fontsize-ensurer.reset-size11.size10,.katex .sizing.reset-size11.size10{font-size:.83360129em}.katex .fontsize-ensurer.reset-size11.size11,.katex .sizing.reset-size11.size11{font-size:1em}.katex .delimsizing.size1{font-family:KaTeX_Size1}.katex .delimsizing.size2{font-family:KaTeX_Size2}.katex .delimsizing.size3{font-family:KaTeX_Size3}.katex .delimsizing.size4{font-family:KaTeX_Size4}.katex .delimsizing.mult .delim-size1>span{font-family:KaTeX_Size1}.katex .delimsizing.mult .delim-size4>span{font-family:KaTeX_Size4}.katex .nulldelimiter{display:inline-block;width:.12em}.katex .delimcenter,.katex .op-symbol{position:relative}.katex .op-symbol.small-op{font-family:KaTeX_Size1}.katex .op-symbol.large-op{font-family:KaTeX_Size2}.katex .op-limits>.vlist-t{text-align:center}.katex .accent>.vlist-t{text-align:center}.katex .accent .accent-body{position:relative}.katex .accent .accent-body:not(.accent-full){width:0}.katex .overlay{display:block}.katex .mtable .vertical-separator{display:inline-block;min-width:1px}.katex .mtable .arraycolsep{display:inline-block}.katex .mtable .col-align-c>.vlist-t{text-align:center}.katex .mtable .col-align-l>.vlist-t{text-align:left}.katex .mtable .col-align-r>.vlist-t{text-align:right}.katex .svg-align{text-align:left}.katex svg{display:block;position:absolute;width:100%;height:inherit;fill:currentColor;stroke:currentColor;fill-rule:nonzero;fill-opacity:1;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1}.katex svg path{stroke:none}.katex img{border-style:none;min-width:0;min-height:0;max-width:none;max-height:none}.katex .stretchy{width:100%;display:block;position:relative;overflow:hidden}.katex .stretchy:after,.katex .stretchy:before{content:""}.katex .hide-tail{width:100%;position:relative;overflow:hidden}.katex .halfarrow-left{position:absolute;left:0;width:50.2%;overflow:hidden}.katex .halfarrow-right{position:absolute;right:0;width:50.2%;overflow:hidden}.katex .brace-left{position:absolute;left:0;width:25.1%;overflow:hidden}.katex .brace-center{position:absolute;left:25%;width:50%;overflow:hidden}.katex .brace-right{position:absolute;right:0;width:25.1%;overflow:hidden}.katex .x-arrow-pad{padding:0 .5em}.katex .mover,.katex .munder,.katex .x-arrow{text-align:center}.katex .boxpad{padding:0 .3em}.katex .fbox,.katex .fcolorbox{box-sizing:border-box;border:.04em solid}.katex .cancel-pad{padding:0 .2em}.katex .cancel-lap{margin-left:-.2em;margin-right:-.2em}.katex .sout{border-bottom-style:solid;border-bottom-width:.08em}.katex-display{display:block;margin:1em 0;text-align:center}.katex-display>.katex{display:block;text-align:center;white-space:nowrap}.katex-display>.katex>.katex-html{display:block;position:relative}.katex-display>.katex>.katex-html>.tag{position:absolute;right:0}.katex-display.leqno>.katex>.katex-html>.tag{left:0;right:auto}.katex-display.fleqn>.katex{text-align:left} 2 | -------------------------------------------------------------------------------- /assets/requirements-slides.txt: -------------------------------------------------------------------------------- 1 | certifi 2 | Jinja2 3 | livereload 4 | MarkupSafe 5 | six 6 | tornado 7 | -------------------------------------------------------------------------------- /assets/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Helvetica, sans-serif; 3 | color: #354046; 4 | line-height: 1.4; 5 | font-size: 16pt; 6 | word-wrap: break-word; 7 | } 8 | 9 | 10 | /* Remark ------------------------------------------------------------------- */ 11 | 12 | .remark-container { 13 | background: #ddd; 14 | } 15 | 16 | .remark-slide-content { 17 | font-size: 1em; 18 | border-top: solid 5px #520b92; 19 | padding-top: 0.5em; 20 | vertical-align: middle; 21 | } 22 | 23 | .remark-slide-content h1 { 24 | font-weight: bold; 25 | font-size: 1.9em; 26 | margin: 0.25em 0; 27 | color: #520b92; 28 | } 29 | 30 | .remark-slide-content h2 { 31 | font-size: 1.5em; 32 | font-weight: bold; 33 | margin: 0.25em 0; 34 | } 35 | 36 | .remark-slide-content h3 { 37 | font-size: 1.2em; 38 | font-weight: bold; 39 | margin: 0.25em 0; 40 | } 41 | 42 | .remark-slide-content p, 43 | .remark-slide-content ul, 44 | .remark-slide-content ol { 45 | font-size: 1.2em; 46 | margin: 0.5em 0; 47 | } 48 | 49 | .remark-slide-number { 50 | font-size: 0.5em; 51 | bottom: 2em; 52 | } 53 | 54 | /* Styles ------------------------------------------------------------------- */ 55 | 56 | a { 57 | color: #008000; 58 | text-decoration: none; 59 | } 60 | 61 | a:hover { 62 | text-decoration: underline; 63 | } 64 | 65 | .italic { 66 | font-style: italic; 67 | } 68 | 69 | .bold { 70 | font-weight: bold; 71 | } 72 | 73 | em { 74 | font-style: italic; 75 | } 76 | 77 | strong { 78 | font-style: normal; 79 | font-weight: bold; 80 | } 81 | 82 | pre { 83 | margin: 0.2em; 84 | } 85 | 86 | .larger { 87 | font-size: 1.3em; 88 | } 89 | 90 | .smaller { 91 | font-size: 0.8em; 92 | } 93 | 94 | .smaller-x { 95 | font-size: 0.5em; 96 | } 97 | 98 | .katex { 99 | color: black; 100 | } 101 | 102 | .black-slide .katex, 103 | .black-slide h1, 104 | .black-slide a { 105 | color: white; 106 | } 107 | 108 | ul>li, 109 | ol>li { 110 | margin: 0.3em 0; 111 | } 112 | 113 | li>p { 114 | margin: 0.2em 0; 115 | } 116 | 117 | ul>li>ul>li { 118 | font-size: 0.8em; 119 | } 120 | 121 | .caption { 122 | display: block; 123 | text-align: center; 124 | font-size: 0.8em; 125 | font-style: italic; 126 | } 127 | 128 | .circle img { 129 | border-radius: 50%; 130 | } 131 | 132 | .stretch img { 133 | width: 100%; 134 | height: auto; 135 | } 136 | 137 | .footnote { 138 | position: absolute; 139 | bottom: 0.5em; 140 | font-size: 0.8em; 141 | opacity: 0.9; 142 | left: 1.5em; 143 | } 144 | 145 | .footnote a { 146 | color: #0288d1; 147 | /*#20707e;*/ 148 | } 149 | 150 | .footnote-back { 151 | position: absolute; 152 | bottom: 0.5em; 153 | font-size: 0.7em; 154 | opacity: 0.9; 155 | right: 1.5em; 156 | } 157 | 158 | .inactive { 159 | color: lightgray; 160 | } 161 | 162 | .success { 163 | background-color: rgba(40, 167, 69, 0.25); 164 | border: 2px solid rgb(40, 167, 69); 165 | border-radius: 5px; 166 | width: 95%; 167 | display: block; 168 | padding: 1em; 169 | } 170 | 171 | .alert { 172 | color: #721c24; 173 | background-color: #f8d7da; 174 | border: 2px solid #f5c6cb; 175 | border-radius: 5px; 176 | width: 95%; 177 | display: block; 178 | padding: 1em; 179 | } 180 | 181 | .hljs-github .hljs-comment { 182 | color: #520b92; 183 | } 184 | 185 | 186 | /* Specials ----------------------------------------------------------------- */ 187 | 188 | .title-slide { 189 | font-size: 1.1em; 190 | text-align: center; 191 | vertical-align: middle; 192 | } 193 | 194 | .title-slide h1 { 195 | margin-top: 1em; 196 | } 197 | 198 | .chapter-slide { 199 | background-color: #520b92; 200 | color: white; 201 | vertical-align: middle; 202 | font-size: 1.2em; 203 | text-align: center; 204 | } 205 | 206 | .chapter-slide h1, 207 | .chapter-slide a { 208 | color: white; 209 | } 210 | 211 | .black-slide { 212 | background-color: black; 213 | color: white; 214 | border-top: solid 0px; 215 | } 216 | 217 | .red-slide { 218 | background-color: red; 219 | color: white; 220 | border-top: solid 0px; 221 | } 222 | 223 | .this-talk-link { 224 | font-size: 0.8em; 225 | display: block; 226 | font-style: italic; 227 | margin-top: 1em; 228 | } 229 | 230 | 231 | blockquote { 232 | font-size: 1.4em; 233 | width: 75%; 234 | margin: 0 auto; 235 | font-style: italic; 236 | color: #555555; 237 | padding: 1.2em 30px 1.2em 75px; 238 | border-left: 8px solid #008000; 239 | line-height: 1.6; 240 | position: relative; 241 | background: #EDEDED; 242 | } 243 | 244 | 245 | blockquote::before { 246 | font-family: Arial; 247 | content: "\201C"; 248 | color: #008000; 249 | font-size: 4em; 250 | position: absolute; 251 | left: 0; 252 | top: -10px; 253 | } 254 | 255 | blockquote::after { 256 | content: ''; 257 | } 258 | 259 | blockquote cite { 260 | display: block; 261 | color: #333333; 262 | font-style: normal; 263 | font-weight: bold; 264 | margin-top: 1em; 265 | } 266 | 267 | .icon { 268 | background-size: cover; 269 | display: inline-block; 270 | transform: scale(1.5); 271 | margin-top: 12px; 272 | height: 24px; 273 | width: 25px; 274 | } 275 | 276 | .icon-left { 277 | margin-right: 16px; 278 | } 279 | 280 | .icon-twitter { 281 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 282 | } 283 | 284 | .icon-twitter-white { 285 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill:white;fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 286 | 287 | } 288 | 289 | .icon-github-white { 290 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0503063,0.209536)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill:white;fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 291 | } 292 | 293 | .icon-github { 294 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0191268,0.228032)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 295 | } 296 | 297 | .icon-github:hover, 298 | .icon-github-white:hover { 299 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 25 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(1,0,0,1,0.0503063,0.209536)'%3E %3Cpath d='M12,0C5.374,0 0,5.373 0,12C0,17.302 3.438,21.8 8.207,23.387C8.806,23.498 9,23.126 9,22.81L9,20.576C5.662,21.302 4.967,19.16 4.967,19.16C4.421,17.773 3.634,17.404 3.634,17.404C2.545,16.659 3.717,16.675 3.717,16.675C4.922,16.759 5.556,17.912 5.556,17.912C6.626,19.746 8.363,19.216 9.048,18.909C9.155,18.134 9.466,17.604 9.81,17.305C7.145,17 4.343,15.971 4.343,11.374C4.343,10.063 4.812,8.993 5.579,8.153C5.455,7.85 5.044,6.629 5.696,4.977C5.696,4.977 6.704,4.655 8.997,6.207C9.954,5.941 10.98,5.808 12,5.803C13.02,5.808 14.047,5.941 15.006,6.207C17.297,4.655 18.303,4.977 18.303,4.977C18.956,6.63 18.545,7.851 18.421,8.153C19.191,8.993 19.656,10.064 19.656,11.374C19.656,15.983 16.849,16.998 14.177,17.295C14.607,17.667 15,18.397 15,19.517L15,22.81C15,23.129 15.192,23.504 15.801,23.386C20.566,21.797 24,17.3 24,12C24,5.373 18.627,0 12,0Z' style='fill:rgb(0,128,0);fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 300 | } 301 | 302 | .icon-twitter:hover, 303 | .icon-twitter-white:hover { 304 | background: url("data:image/svg+xml,%3Csvg width='100%25' height='100%25' viewBox='0 0 24 24' version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' xml:space='preserve' xmlns:serif='http://www.serif.com/' style='fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;'%3E %3Cg transform='matrix(0.995894,0,0,0.995894,0.0514823,0.156485)'%3E %3Cpath d='M24,4.557C23.117,4.949 22.168,5.213 21.172,5.332C22.189,4.723 22.97,3.758 23.337,2.608C22.386,3.172 21.332,3.582 20.21,3.803C19.313,2.846 18.032,2.248 16.616,2.248C13.437,2.248 11.101,5.214 11.819,8.293C7.728,8.088 4.1,6.128 1.671,3.149C0.381,5.362 1.002,8.257 3.194,9.723C2.388,9.697 1.628,9.476 0.965,9.107C0.911,11.388 2.546,13.522 4.914,13.997C4.221,14.185 3.462,14.229 2.69,14.081C3.316,16.037 5.134,17.46 7.29,17.5C5.22,19.123 2.612,19.848 0,19.54C2.179,20.937 4.768,21.752 7.548,21.752C16.69,21.752 21.855,14.031 21.543,7.106C22.505,6.411 23.34,5.544 24,4.557Z' style='fill:rgb(0,128,0);fill-rule:nonzero;'/%3E %3C/g%3E %3C/svg%3E ") no-repeat; 305 | 306 | } 307 | 308 | .remark-slide table { 309 | border: none; 310 | border-collapse: collapse; 311 | border-spacing: 0; 312 | color: black; 313 | table-layout: fixed; 314 | } 315 | 316 | .remark-slide table thead { 317 | border-bottom: 1px solid black; 318 | vertical-align: bottom; 319 | } 320 | 321 | .remark-slide table tr, 322 | .remark-slide table th, 323 | .remark-slide table td { 324 | text-align: right; 325 | vertical-align: middle; 326 | padding: 0.5em 0.5em; 327 | line-height: normal; 328 | white-space: normal; 329 | max-width: none; 330 | border: none; 331 | } 332 | 333 | .remark-slide table th { 334 | font-weight: bold; 335 | } 336 | 337 | .remark-slide table tbody tr:nth-child(odd) { 338 | background: #f5f5f5; 339 | } 340 | 341 | .remark-slide table tbody tr:hover { 342 | background: rgba(66, 165, 245, 0.2); 343 | } 344 | 345 | .remark-slide table.alternationg { 346 | border: none; 347 | border-collapse: collapse; 348 | border-spacing: 0; 349 | } 350 | 351 | .remark-slide table.alternating td, 352 | .remark-slide table.alternating tr { 353 | border: none; 354 | } 355 | 356 | .remark-slide table.alternating tbody tr:nth-child(odd) { 357 | background: #f0f0f0; 358 | } 359 | 360 | /* grid */ 361 | 362 | /* Uncomment and set these variables to customize the grid. */ 363 | 364 | .g { 365 | box-sizing: border-box; 366 | display: -ms-flexbox; 367 | display: -webkit-box; 368 | display: flex; 369 | -ms-flex: 0 1 auto; 370 | -webkit-box-flex: 0; 371 | flex: 0 1 auto; 372 | -ms-flex-direction: row; 373 | -webkit-box-orient: horizontal; 374 | -webkit-box-direction: normal; 375 | flex-direction: row; 376 | -ms-flex-wrap: wrap; 377 | flex-wrap: wrap; 378 | margin: 0.5rem -0.5rem; 379 | } 380 | 381 | .g-1 .g-2, 382 | .g-3, 383 | .g-4, 384 | .g-5, 385 | .g-6, 386 | .g-7, 387 | .g-8, 388 | .g-9, 389 | .g-10, 390 | .g-11, 391 | .g-12 { 392 | box-sizing: border-box; 393 | -ms-flex: 0 0 auto; 394 | -webkit-box-flex: 0; 395 | flex: 0 0 auto; 396 | padding-right: 1rem; 397 | padding-left: 1rem; 398 | } 399 | 400 | .g-1 { 401 | -ms-flex-preferred-size: 8.333%; 402 | flex-basis: 8.333%; 403 | max-width: 8.333%; 404 | } 405 | 406 | .g-2 { 407 | -ms-flex-preferred-size: 16.667%; 408 | flex-basis: 16.667%; 409 | max-width: 16.667%; 410 | } 411 | 412 | .g-3 { 413 | -ms-flex-preferred-size: 25%; 414 | flex-basis: 25%; 415 | max-width: 25%; 416 | } 417 | 418 | .g-4 { 419 | -ms-flex-preferred-size: 33.333%; 420 | flex-basis: 33.333%; 421 | max-width: 33.333%; 422 | } 423 | 424 | .g-5 { 425 | -ms-flex-preferred-size: 41.667%; 426 | flex-basis: 41.667%; 427 | max-width: 41.667%; 428 | } 429 | 430 | .g-6 { 431 | -ms-flex-preferred-size: 50%; 432 | flex-basis: 50%; 433 | max-width: 50%; 434 | } 435 | 436 | .g-7 { 437 | -ms-flex-preferred-size: 58.333%; 438 | flex-basis: 58.333%; 439 | max-width: 58.333%; 440 | } 441 | 442 | .g-8 { 443 | -ms-flex-preferred-size: 66.667%; 444 | flex-basis: 66.667%; 445 | max-width: 66.667%; 446 | } 447 | 448 | .g-9 { 449 | -ms-flex-preferred-size: 75%; 450 | flex-basis: 75%; 451 | max-width: 75%; 452 | } 453 | 454 | .g-10 { 455 | -ms-flex-preferred-size: 83.333%; 456 | flex-basis: 83.333%; 457 | max-width: 83.333%; 458 | } 459 | 460 | .g-11 { 461 | -ms-flex-preferred-size: 91.667%; 462 | flex-basis: 91.667%; 463 | max-width: 91.667%; 464 | } 465 | 466 | .g-12 { 467 | -ms-flex-preferred-size: 100%; 468 | flex-basis: 100%; 469 | max-width: 100%; 470 | } 471 | 472 | .g-start { 473 | -ms-flex-pack: start; 474 | -webkit-box-pack: start; 475 | justify-content: flex-start; 476 | text-align: start; 477 | } 478 | 479 | .g-center { 480 | -ms-flex-pack: center; 481 | -webkit-box-pack: center; 482 | justify-content: center; 483 | text-align: center; 484 | } 485 | 486 | .g-end { 487 | -ms-flex-pack: end; 488 | -webkit-box-pack: end; 489 | justify-content: flex-end; 490 | text-align: end; 491 | } 492 | 493 | .g-top { 494 | -ms-flex-align: start; 495 | -webkit-box-align: start; 496 | align-items: flex-start; 497 | } 498 | 499 | .g-middle { 500 | -ms-flex-align: center; 501 | -webkit-box-align: center; 502 | align-items: center; 503 | } 504 | 505 | .g-bottom { 506 | -ms-flex-align: end; 507 | -webkit-box-align: end; 508 | align-items: flex-end; 509 | } 510 | 511 | .g-around { 512 | -ms-flex-pack: distribute; 513 | justify-content: space-around; 514 | } 515 | 516 | .g-between { 517 | -ms-flex-pack: justify; 518 | -webkit-box-pack: justify; 519 | justify-content: space-between; 520 | } 521 | -------------------------------------------------------------------------------- /assets/twitter.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: ml-workshop-intermediate-1-of-2 2 | channels: 3 | - conda-forge 4 | - nodefaults 5 | dependencies: 6 | - python=3.9.7 7 | - scikit-learn=1.0.* 8 | - seaborn=0.11.2 9 | - pandas 10 | - jupyterlab=3.1.* 11 | -------------------------------------------------------------------------------- /images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/.gitkeep -------------------------------------------------------------------------------- /images/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Thomas Fan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /images/bergstra_random.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/bergstra_random.jpeg -------------------------------------------------------------------------------- /images/column_transformer_schematic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/column_transformer_schematic.png -------------------------------------------------------------------------------- /images/favicon_org.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/favicon_org.png -------------------------------------------------------------------------------- /images/grid_search_cross_validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/grid_search_cross_validation.png -------------------------------------------------------------------------------- /images/gridsearch_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/gridsearch_workflow.png -------------------------------------------------------------------------------- /images/group_kfold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/group_kfold.png -------------------------------------------------------------------------------- /images/kfold_cv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/kfold_cv.png -------------------------------------------------------------------------------- /images/med_knn_rf_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/med_knn_rf_comparison.png -------------------------------------------------------------------------------- /images/repeated_stratified_kfold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/repeated_stratified_kfold.png -------------------------------------------------------------------------------- /images/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /images/shuffle_split_cv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/shuffle_split_cv.png -------------------------------------------------------------------------------- /images/stratified_cv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/stratified_cv.png -------------------------------------------------------------------------------- /images/time_series_cv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/time_series_cv.png -------------------------------------------------------------------------------- /images/time_series_walk_forward_cv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/images/time_series_walk_forward_cv.png -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Intermediate Machine Learning with scikit-learn: Cross validation, Parameter Tuning, Pandas Interoperability, and Missing Values 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 545 | 546 | 573 | 574 | 575 | -------------------------------------------------------------------------------- /maint_tools/check_notebooks.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | for f in notebooks/0*.ipynb; do 4 | jupyter nbconvert --execute $f --to notebook --stdout >/dev/null 5 | done 6 | -------------------------------------------------------------------------------- /make.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from jinja2 import Template 3 | from livereload import Server 4 | from pathlib import Path 5 | 6 | 7 | def build(): 8 | print("building index.html") 9 | 10 | assets = Path("assets") 11 | 12 | with (assets / "index.html.jinja").open('r') as f: 13 | template = Template(f.read()) 14 | 15 | with open("slides.md", 'r') as f: 16 | slides = f.readlines() 17 | 18 | # get metadata up to the first title 19 | valid_metadata_keys = set(['title', 'use_katex']) 20 | metadata = {} 21 | for line in slides: 22 | 23 | # first title 24 | if line.startswith("#") or len(metadata) == len(valid_metadata_keys): 25 | break 26 | line_split = line.split(":", maxsplit=1) 27 | if len(line_split) != 2: 28 | continue 29 | 30 | key = line_split[0] 31 | if key not in valid_metadata_keys: 32 | continue 33 | 34 | value = line_split[1].strip() 35 | metadata[key] = value 36 | 37 | if len(metadata) != 2: 38 | raise ValueError("Be sure to include title: and use_katex as metadata " 39 | "in slides.md file") 40 | 41 | output = template.render(title=metadata['title'], 42 | use_katex=metadata['use_katex'] == 'True', 43 | slides="".join(slides)) 44 | 45 | with open("index.html", 'w') as f: 46 | f.write(output) 47 | 48 | 49 | def live(): 50 | print("Serving index.html") 51 | cur_dir = Path('.') 52 | 53 | server = Server() 54 | server.watch("slides.md", build) 55 | server.watch(str(cur_dir / 'assets' / "style.css")) 56 | server.serve(open_url_delay=2) 57 | 58 | 59 | if __name__ == '__main__': 60 | parser = argparse.ArgumentParser(description="Building slides") 61 | parser.add_argument("action", choices=['build', 'live']) 62 | 63 | args = parser.parse_args() 64 | 65 | if args.action == 'build': 66 | build() 67 | else: 68 | live() 69 | -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/.gitkeep -------------------------------------------------------------------------------- /notebooks/00-review-sklearn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "# Quick Review of scikit-learn\n", 10 | "\n", 11 | "\"Open" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# Install dependencies for google colab\n", 21 | "import sys\n", 22 | "if 'google.colab' in sys.modules:\n", 23 | " %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/master/requirements.txt" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import sklearn\n", 33 | "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import seaborn as sns\n", 43 | "sns.set_theme(context=\"notebook\", font_scale=1.2,\n", 44 | " rc={\"figure.figsize\": [10, 6]})\n", 45 | "sklearn.set_config(display=\"diagram\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "from sklearn.datasets import fetch_openml\n", 55 | "\n", 56 | "steel = fetch_openml(data_id=1504, as_frame=True)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "print(steel.DESCR)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "_ = steel.data.hist(figsize=(30, 15), layout=(5, 8))" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "### Split Data" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "from sklearn.model_selection import train_test_split\n", 91 | "X, y = steel.data, steel.target\n", 92 | "\n", 93 | "X_train, X_test, y_train, y_test = train_test_split(\n", 94 | " X, y, random_state=42, stratify=y)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "### Train DummyClassifer" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "from sklearn.dummy import DummyClassifier\n", 111 | "\n", 112 | "dc = DummyClassifier(strategy='prior').fit(X_train, y_train)\n", 113 | "dc.score(X_test, y_test)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "### Train KNN based model" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "from sklearn.preprocessing import StandardScaler\n", 130 | "from sklearn.pipeline import make_pipeline\n", 131 | "from sklearn.neighbors import KNeighborsClassifier\n", 132 | "\n", 133 | "knc = make_pipeline(\n", 134 | " StandardScaler(),\n", 135 | " KNeighborsClassifier()\n", 136 | ")\n", 137 | "knc.fit(X_train, y_train)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "knc.score(X_test, y_test)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "## Exercise 1\n", 154 | "\n", 155 | "1. Load the wisconsin breast cancer dataset from `sklearn.datasets.load_breast_cancer` with `as_frame=True`.\n", 156 | "2. Is the labels imbalanced? (**Hint**: `value_counts`)\n", 157 | "3. Split the data into a training and test set. (**Hint**: Be sure to use `stratify`)\n", 158 | "4. Create a pipeline with a `StandardScaler` and `LogisticRegression` and fit on the training set.\n", 159 | "5. Evalute the pipeline on the test set.\n", 160 | "6. **Extra**: Use `sklearn.metrics.f1_score` to compute the f1 score on the test set." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "from sklearn.datasets import load_breast_cancer\n", 170 | "from sklearn.linear_model import LogisticRegression\n", 171 | "from sklearn.metrics import f1_score" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/solutions/00-ex01-solutions.py). " 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "# %load solutions/00-ex01-solutions.py" 195 | ] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3 (ipykernel)", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.9.7" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 4 219 | } 220 | -------------------------------------------------------------------------------- /notebooks/01-cross-validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "# Cross-Validation in scikit-learn\n", 10 | "\n", 11 | "\"Open" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# Install dependencies for google colab\n", 21 | "import sys\n", 22 | "if 'google.colab' in sys.modules:\n", 23 | " %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/master/requirements.txt" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import sklearn\n", 33 | "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import seaborn as sns\n", 43 | "sns.set_theme(context=\"notebook\", font_scale=1.2, rc={\"figure.figsize\": [10, 6]})\n", 44 | "sklearn.set_config(display=\"diagram\")" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Load sample data" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "from sklearn.datasets import fetch_openml\n", 61 | "from sklearn.model_selection import train_test_split\n", 62 | "\n", 63 | "spam = fetch_openml(data_id=44, as_frame=True)\n", 64 | "X, y = spam.data, spam.target\n", 65 | "y = y.cat.codes" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "print(spam.DESCR)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "X_train, X_test, y_train, y_test = train_test_split(\n", 84 | " X, y, random_state=42, stratify=y)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## Cross validation for model selection" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Try DummyClassifier" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "from sklearn.model_selection import cross_val_score\n", 108 | "from sklearn.dummy import DummyClassifier" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "dummy_clf = DummyClassifier(strategy=\"prior\")\n", 118 | "dummy_scores = cross_val_score(dummy_clf, X_train, y_train)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "dummy_scores" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "dummy_scores.mean()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "### Try KNeighborsClassifier" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "from sklearn.neighbors import KNeighborsClassifier\n", 153 | "from sklearn.pipeline import make_pipeline\n", 154 | "from sklearn.preprocessing import StandardScaler\n", 155 | "\n", 156 | "knc = make_pipeline(StandardScaler(), KNeighborsClassifier())\n", 157 | "knc_scores = cross_val_score(knc, X_train, y_train)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "knc_scores" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "knc_scores.mean()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "### Try LogisticRegression" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "from sklearn.pipeline import make_pipeline\n", 192 | "from sklearn.preprocessing import StandardScaler\n", 193 | "from sklearn.linear_model import LogisticRegression" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "log_reg = make_pipeline(\n", 203 | " StandardScaler(),\n", 204 | " LogisticRegression(random_state=0)\n", 205 | ")" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "log_reg_scores = cross_val_score(log_reg, X_train, y_train)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "log_reg_scores" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "log_reg_scores.mean()" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "### Which model do we choose?\n", 240 | "\n", 241 | "1. Dummy\n", 242 | "2. KNeighborsClassifier\n", 243 | "3. LogisticRegression" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## Exercise 1\n", 251 | "\n", 252 | "1. Is the target, `y`, balanced? (**Hint**: `value_counts`)\n", 253 | "2. Train the best model on the training set and evaluate on the test data.\n", 254 | "3. Run `cross_val_score` with `scoring='roc_auc'` to return the roc auc score. Which model performs the best in this case?" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/solutions/01-ex01-solutions.py). " 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "# %load solutions/01-ex01-solutions.py" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "#### Back to slides!" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "## Cross validation Strategies" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "### KFold" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "from sklearn.model_selection import KFold\n", 308 | "\n", 309 | "cross_val_score(\n", 310 | " log_reg, X_train, y_train,\n", 311 | " cv=KFold(n_splits=4))" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "## Repeated KFold" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "from sklearn.model_selection import RepeatedKFold\n", 328 | "\n", 329 | "scores = cross_val_score(\n", 330 | " log_reg, X_train, y_train,\n", 331 | " cv=RepeatedKFold(n_splits=4, n_repeats=2))" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "scores" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "scores.shape" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": [ 356 | "## StratifiedKFold" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "from sklearn.model_selection import StratifiedKFold\n", 366 | "\n", 367 | "scores = cross_val_score(\n", 368 | " log_reg, X_train, y_train,\n", 369 | " cv=StratifiedKFold(n_splits=4))" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "scores" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | "This is a binary classification problem:" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [ 394 | "y.value_counts()" 395 | ] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "Scikit-learn will use `StratifiedKFold` by default for classifiers:" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "cross_val_score(log_reg, X_train, y_train, cv=4)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "## RepeatedStratifiedKFold" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "from sklearn.model_selection import RepeatedStratifiedKFold\n", 427 | "\n", 428 | "scores = cross_val_score(\n", 429 | " log_reg, X_train, y_train,\n", 430 | " cv=RepeatedStratifiedKFold(n_splits=4, n_repeats=3))" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "scores" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "scores.shape" 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "## Exercise 2\n", 456 | "\n", 457 | "1. Use `sklearn.model_selection.cross_validate` instead of of `cross_val_score` with `cv=4`.\n", 458 | "2. What additional information does `cross_validate` provide?\n", 459 | "3. Set `scoring=['f1', 'accuracy', 'roc_auc']` in `cross_validate`'s evalute on multiple metrics." 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "from sklearn.model_selection import cross_validate\n", 469 | "import pandas as pd" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/solutions/01-ex02-solutions.py). " 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "# %load solutions/01-ex02-solutions.py" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "### Appendix: TimeSeriesSplit" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "from sklearn.model_selection import TimeSeriesSplit\n", 509 | "import numpy as np\n", 510 | "\n", 511 | "X = np.arange(10)" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": null, 517 | "metadata": {}, 518 | "outputs": [], 519 | "source": [ 520 | "tscv = TimeSeriesSplit(n_splits=3)\n", 521 | "for train_index, test_index in tscv.split(X):\n", 522 | " print(\"TRAIN:\", train_index, \"TEST:\", test_index)" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "With `gap=2`:" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": null, 535 | "metadata": {}, 536 | "outputs": [], 537 | "source": [ 538 | "tscv_gap = TimeSeriesSplit(n_splits=3, gap=2)\n", 539 | "for train_index, test_index in tscv_gap.split(X):\n", 540 | " print(\"TRAIN:\", train_index, \"TEST:\", test_index)" 541 | ] 542 | } 543 | ], 544 | "metadata": { 545 | "kernelspec": { 546 | "display_name": "Python 3 (ipykernel)", 547 | "language": "python", 548 | "name": "python3" 549 | }, 550 | "language_info": { 551 | "codemirror_mode": { 552 | "name": "ipython", 553 | "version": 3 554 | }, 555 | "file_extension": ".py", 556 | "mimetype": "text/x-python", 557 | "name": "python", 558 | "nbconvert_exporter": "python", 559 | "pygments_lexer": "ipython3", 560 | "version": "3.9.7" 561 | } 562 | }, 563 | "nbformat": 4, 564 | "nbformat_minor": 4 565 | } 566 | -------------------------------------------------------------------------------- /notebooks/02-parameter-tuning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Parameter tuning\n", 8 | "\n", 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# Install dependencies for google colab\n", 19 | "import sys\n", 20 | "if 'google.colab' in sys.modules:\n", 21 | " %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/master/requirements.txt" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import sklearn\n", 31 | "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\"" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import seaborn as sns\n", 41 | "sns.set_theme(context=\"notebook\", font_scale=1.4, rc={\"figure.figsize\": [10, 6]})" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "First let's load the iris dataset" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "from sklearn.datasets import load_digits\n", 58 | "from sklearn.model_selection import train_test_split\n", 59 | "\n", 60 | "digits = load_digits()\n", 61 | "X, y = digits.data, digits.target\n", 62 | "\n", 63 | "X_train, X_test, y_train, y_test = train_test_split(\n", 64 | " X, y, random_state=42, stratify=y\n", 65 | ")" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "X[0]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "import matplotlib.pyplot as plt" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "fig, axes = plt.subplots(4, 4)\n", 93 | "for i, ax in zip(range(16), axes.ravel()):\n", 94 | " ax.imshow(X[i].reshape(8, 8), cmap=\"gray_r\")\n", 95 | " ax.set(xticks=(), yticks=(), title=y[i])\n", 96 | "plt.tight_layout()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Create a classifier to parameter search" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "from sklearn.model_selection import GridSearchCV\n", 113 | "from sklearn.ensemble import RandomForestClassifier\n", 114 | "import numpy as np" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "params = {\n", 124 | " 'max_depth': [2, 4, 8, 12, 16],\n", 125 | " 'max_features': [4, 8, 16, 32]\n", 126 | "}" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "grid_search = GridSearchCV(\n", 136 | " RandomForestClassifier(random_state=42), param_grid=params,\n", 137 | " verbose=1,\n", 138 | " n_jobs=8) # Update to the number of physical cpu cores" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "grid_search.fit(X_train, y_train)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "grid_search.best_score_" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "grid_search.best_params_" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "grid_search.score(X_test, y_test)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "import pandas as pd\n", 184 | "cv_df = pd.DataFrame(grid_search.cv_results_)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "cv_df.head()" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "res = (cv_df.pivot(index='param_max_depth', columns='param_max_features', values='mean_test_score')\n", 203 | " .rename_axis(index='max_depth', columns='max_features'))" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "_ = sns.heatmap(res, cmap='viridis')" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": { 218 | "tags": [] 219 | }, 220 | "source": [ 221 | "## Exercise 1\n", 222 | "\n", 223 | "1. Use a `RandomSearchCV` with the following parameter distrubution for `RandomForestClassifier`:\n", 224 | "\n", 225 | "```python\n", 226 | "from scipy.stats import randint\n", 227 | "\n", 228 | "param_dist = {\n", 229 | " \"max_features\": randint(1, 11),\n", 230 | " \"min_samples_split\": randint(2, 11)\n", 231 | "}\n", 232 | "```\n", 233 | "\n", 234 | "Set `random_state=0` to have reproducable results, `verbose=1` to show the progress, and `n_iter=20`.\n", 235 | "\n", 236 | "2. What were the best hyper-parameters found by the random search?\n", 237 | "3. Evaluate the model on the test set.\n", 238 | "4. **Extra**: Try to use `HalvingRandomSearchCV` with the same `param_dist`. What is the best hyper-parameters found by this search? Evaluate on the test set. Does these hyperparameters performe better than the one found in step 1? (**Hint**: `n_iter` is not required)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "from scipy.stats import randint\n", 248 | "\n", 249 | "from sklearn.ensemble import RandomForestClassifier\n", 250 | "from sklearn.model_selection import RandomizedSearchCV\n", 251 | "from sklearn.experimental import enable_halving_search_cv\n", 252 | "from sklearn.model_selection import HalvingRandomSearchCV\n", 253 | "\n", 254 | "param_dist = {\n", 255 | " \"max_features\": randint(1, 11),\n", 256 | " \"min_samples_split\": randint(2, 11)\n", 257 | "}" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/solutions/02-ex01-solutions.py). " 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "# %load solutions/02-ex01-solutions.py" 281 | ] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 3 (ipykernel)", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.9.7" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 4 305 | } 306 | -------------------------------------------------------------------------------- /notebooks/03-missing-values.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Missing values in scikit-learn\n", 8 | "\n", 9 | "\"Open" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# Install dependencies for google colab\n", 19 | "import sys\n", 20 | "IN_COLAB = 'google.colab' in sys.modules\n", 21 | "if IN_COLAB:\n", 22 | " %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/master/requirements.txt" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import sklearn\n", 32 | "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\"" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## SimpleImputer" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "from sklearn.impute import SimpleImputer\n", 49 | "import numpy as np\n", 50 | "import sklearn\n", 51 | "sklearn.set_config(display='diagram')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "X = np.array([\n", 61 | " [1.0, np.nan, 5.0],\n", 62 | " [2.0, 3.0, np.nan],\n", 63 | " [3.4, 2.0, 4.0],\n", 64 | " [4.0, 3.0, 6.5],\n", 65 | " [4.0, 1.0, 6.5],\n", 66 | "])" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Default uses mean" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "imputer = SimpleImputer()\n", 83 | "imputer.fit_transform(X)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "### Add indicator!" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "imputer = SimpleImputer(add_indicator=True)\n", 100 | "imputer.fit_transform(X)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "### Other strategies" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "imputer = SimpleImputer(strategy='median')\n", 117 | "imputer.fit_transform(X)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "imputer = SimpleImputer(strategy='most_frequent')\n", 127 | "imputer.fit_transform(X)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Categorical data" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "import pandas as pd" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "df = pd.DataFrame({\n", 153 | " 'a': ['dog', 'cat', 'snake', np.nan]\n", 154 | "})" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "df" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "This will error for categorical data!" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "imputer = SimpleImputer()\n", 180 | "\n", 181 | "try:\n", 182 | " imputer.fit_transform(df)\n", 183 | "except ValueError as exc:\n", 184 | " print(exc)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "imputer = SimpleImputer(strategy='constant', fill_value='sk_missing')\n", 194 | "imputer.fit_transform(df)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "## pandas categorical" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "df['a'] = df['a'].astype('category')" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "df" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "df.dtypes" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "imputer.fit_transform(df)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "## Exercise 1\n", 245 | "\n", 246 | "1. Load the wisconsion cancer dataset using `sklearn.datasets.fetch_openml` by setting `data_id=15` and `as_frame=True`.\n", 247 | "2. Print the description of the dataset.\n", 248 | "3. How many samples and features are there in the dataset?\n", 249 | "4. Which feature(s) or the dataset are missing? **Hint**: Use panda's `isna().sum()`\n", 250 | "5. Use a `SimpleImputer` with `add_indicator=True` to `fit_transform` the dataset. What is the shape of the transformed data?\n", 251 | "6. **Extra**: Split the data into training and test datasets. Build a pipeline with the `SimpleImputer`, `StandardScaler`, and `LogisticRegression`. Train the pipeline on the training dataset and evaluate the performance of the model on the test set." 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "from sklearn.datasets import fetch_openml\n", 261 | "from sklearn.model_selection import train_test_split\n", 262 | "from sklearn.pipeline import make_pipeline\n", 263 | "from sklearn.preprocessing import StandardScaler\n", 264 | "from sklearn.linear_model import LogisticRegression" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/solutions/03-ex01-solutions.py)." 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "# %load solutions/03-ex01-solutions.py" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "## HistGradientBoosting Native support for missing values\n", 295 | "\n", 296 | "First we load data with missing values" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "from sklearn.datasets import fetch_openml\n", 306 | "from sklearn.model_selection import train_test_split\n", 307 | "\n", 308 | "cancer = fetch_openml(data_id=15, as_frame=True)\n", 309 | "\n", 310 | "X, y = cancer.data, cancer.target\n", 311 | "X_train, X_test, y_train, y_test = train_test_split(\n", 312 | " X, y, random_state=42, stratify=y\n", 313 | ")" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "from sklearn.ensemble import HistGradientBoostingClassifier" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "hist = HistGradientBoostingClassifier(random_state=42)\n", 332 | "hist.fit(X_train, y_train)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "hist.score(X_test, y_test)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "## Grid searching the imputer" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "from sklearn.model_selection import GridSearchCV\n", 358 | "from sklearn.pipeline import Pipeline\n", 359 | "from sklearn.ensemble import RandomForestClassifier" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [ 368 | "if IN_COLAB:\n", 369 | " iris_path = (\"https://raw.githubusercontent.com/thomasjpfan/\"\n", 370 | " \"ml-workshop-intermediate-1-of-2/master/notebooks/data/iris_w_missing.csv\")\n", 371 | "else:\n", 372 | " iris_path = \"data/iris_w_missing.csv\"\n", 373 | " \n", 374 | "iris = pd.read_csv(iris_path)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "iris.head()" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "X = iris.drop('target', axis='columns')\n", 393 | "y = iris['target']" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "X_train, X_test, y_train, y_test = train_test_split(\n", 403 | " X, y, random_state=0, stratify=y\n", 404 | ")" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "pipe = Pipeline([\n", 414 | " ('imputer', SimpleImputer(add_indicator=True)),\n", 415 | " ('rf', RandomForestClassifier(random_state=42))\n", 416 | "])" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "## scikit-learn uses `get_params` to find names" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "pipe.get_params()" 433 | ] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": {}, 438 | "source": [ 439 | "## Is it better to add the indicator?" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "from sklearn.model_selection import GridSearchCV\n", 449 | "\n", 450 | "params = {\n", 451 | " 'imputer__add_indicator': [True, False]\n", 452 | "}\n", 453 | "\n", 454 | "grid_search = GridSearchCV(pipe, param_grid=params, verbose=1)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "grid_search.fit(X_train, y_train)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "grid_search.best_params_" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "grid_search.best_score_" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "grid_search.score(X_test, y_test)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": {}, 496 | "source": [ 497 | "## Compare to `make_pipeline`" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": {}, 504 | "outputs": [], 505 | "source": [ 506 | "from sklearn.pipeline import make_pipeline\n", 507 | "\n", 508 | "pipe2 = make_pipeline(SimpleImputer(add_indicator=True),\n", 509 | " RandomForestClassifier(random_state=42))" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "pipe2.get_params()" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": {}, 524 | "source": [ 525 | "## Which imputer to use?" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": null, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "from sklearn.impute import KNNImputer\n", 535 | "from sklearn.ensemble import RandomForestClassifier\n", 536 | "from sklearn.ensemble import RandomForestRegressor\n", 537 | "from sklearn.experimental import enable_iterative_imputer\n", 538 | "from sklearn.impute import IterativeImputer\n", 539 | "\n", 540 | "params = {\n", 541 | " 'imputer': [\n", 542 | " SimpleImputer(strategy='median', add_indicator=True),\n", 543 | " SimpleImputer(strategy='mean', add_indicator=True),\n", 544 | " KNNImputer(add_indicator=True),\n", 545 | " IterativeImputer(estimator=RandomForestRegressor(random_state=42),\n", 546 | " random_state=42, add_indicator=True, tol=0.05)]\n", 547 | "}\n", 548 | "\n", 549 | "search_cv = GridSearchCV(pipe, param_grid=params, verbose=1, n_jobs=-1)" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": null, 555 | "metadata": {}, 556 | "outputs": [], 557 | "source": [ 558 | "search_cv.fit(X_train, y_train)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [ 567 | "search_cv.best_params_" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": null, 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": [ 576 | "search_cv.best_score_" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "metadata": {}, 583 | "outputs": [], 584 | "source": [ 585 | "search_cv.score(X_test, y_test)" 586 | ] 587 | } 588 | ], 589 | "metadata": { 590 | "kernelspec": { 591 | "display_name": "Python 3 (ipykernel)", 592 | "language": "python", 593 | "name": "python3" 594 | }, 595 | "language_info": { 596 | "codemirror_mode": { 597 | "name": "ipython", 598 | "version": 3 599 | }, 600 | "file_extension": ".py", 601 | "mimetype": "text/x-python", 602 | "name": "python", 603 | "nbconvert_exporter": "python", 604 | "pygments_lexer": "ipython3", 605 | "version": "3.9.7" 606 | } 607 | }, 608 | "nbformat": 4, 609 | "nbformat_minor": 4 610 | } 611 | -------------------------------------------------------------------------------- /notebooks/04-pandas-interoperability.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "# Pandas Interoperability\n", 10 | "\n", 11 | "\"Open" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# Install dependencies for google colab\n", 21 | "import sys\n", 22 | "if 'google.colab' in sys.modules:\n", 23 | " %pip install -r https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/master/requirements.txt" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import sklearn\n", 33 | "assert sklearn.__version__.startswith(\"1.0\"), \"Plese install scikit-learn 1.0\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import numpy as np\n", 43 | "sklearn.set_config(display='diagram')" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Categorical Data" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "import pandas as pd\n", 60 | "\n", 61 | "df_train = pd.DataFrame({\n", 62 | " \"pet\": [\"snake\", \"dog\", \"cat\", \"cow\"],\n", 63 | "})" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### OridinalEncoder" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "from sklearn.preprocessing import OrdinalEncoder" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "ord_encoder = OrdinalEncoder()\n", 89 | "ord_encoder.fit_transform(df_train)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "ord_encoder.categories_" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "df_test = pd.DataFrame({\n", 108 | " \"pet\": [\"cow\", \"cat\"]\n", 109 | "})\n", 110 | "df_test" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "ord_encoder.transform(df_test)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "### Categories that are unknown during `fit`" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "df_test_unknown = pd.DataFrame({\n", 136 | " \"pet\": [\"bear\"]\n", 137 | "})" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "try:\n", 147 | " ord_encoder.transform(df_test_unknown)\n", 148 | "except ValueError as err:\n", 149 | " print(err)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "### How to handle unknown categories in OridinalEncoder?" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "### Provide all the categories in the constructor" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "df_train" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "ord_encoder = OrdinalEncoder(\n", 182 | " categories=[['snake', 'dog', 'cat', 'cow', 'bear']])\n", 183 | "ord_encoder.fit_transform(df_train)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "df_test_unknown" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "ord_encoder.transform(df_test_unknown)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "### Setting a value for unknown values directly" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "ord_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "ord_encoder.fit_transform(df_train)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "df_test_unknown" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "ord_encoder.transform(df_test_unknown)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "## OneHotEncoder" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "from sklearn.preprocessing import OneHotEncoder\n", 261 | "\n", 262 | "ohe = OneHotEncoder()\n", 263 | "X_trans = ohe.fit_transform(df_train)\n", 264 | "X_trans" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "By default it is sparse!" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "X_trans.toarray()" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "### Switch to dense" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "ohe = OneHotEncoder(sparse=False)\n", 297 | "ohe.fit_transform(df_train)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "### Unknown categories during transform?" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "df_test_unknown" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "# this will fail\n", 323 | "try:\n", 324 | " ohe.transform(df_test_unknown)\n", 325 | "except ValueError as exc:\n", 326 | " print(exc)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "### OHE can handle unknowns" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "ohe = OneHotEncoder(sparse=False, handle_unknown='ignore')\n", 343 | "ohe.fit(df_train)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "ohe.transform(df_test_unknown)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "ohe.categories_" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "## Two categorical features" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [ 377 | "df_train = pd.DataFrame({\n", 378 | " \"pet\": [\"cat\", \"dog\", \"snake\"],\n", 379 | " \"city\": [\"New York\", \"London\", \"London\"]\n", 380 | "})" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [ 389 | "ohe.fit(df_train)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [ 398 | "ohe.categories_" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "ohe.transform(df_train)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "# Column Transformer!" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "import pandas as pd" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "X_df = pd.DataFrame({\n", 433 | " 'age': [10, 20, 15, 5, 20, 14],\n", 434 | " 'height': [5, 7, 6.5, 4.1, 5.4, 5.4],\n", 435 | " 'pet': ['dog', 'snake', 'cat', 'dog', 'cat', 'cat']\n", 436 | "})\n", 437 | "X_df" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "## With OridinalEncoder" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": {}, 451 | "outputs": [], 452 | "source": [ 453 | "from sklearn.compose import ColumnTransformer\n", 454 | "from sklearn.preprocessing import StandardScaler" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "ct = ColumnTransformer([\n", 464 | " ('numerical', StandardScaler(), ['age', 'height']),\n", 465 | " ('categorical', OrdinalEncoder(), ['pet'])\n", 466 | "])\n", 467 | "\n", 468 | "ct.fit_transform(X_df)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "## With OneHotEncoder" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": {}, 482 | "outputs": [], 483 | "source": [ 484 | "ct = ColumnTransformer([\n", 485 | " ('numerical', StandardScaler(), ['age', 'height']),\n", 486 | " ('categorical', OneHotEncoder(), ['pet'])\n", 487 | "])" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [ 496 | "ct.fit_transform(X_df)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "## Titanic dataset" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": null, 509 | "metadata": {}, 510 | "outputs": [], 511 | "source": [ 512 | "from sklearn.datasets import fetch_openml\n", 513 | "titanic = fetch_openml(data_id=40945, as_frame=True)\n", 514 | "X, y = titanic.data, titanic.target" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": null, 520 | "metadata": {}, 521 | "outputs": [], 522 | "source": [ 523 | "y" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "X.head()" 533 | ] 534 | }, 535 | { 536 | "cell_type": "markdown", 537 | "metadata": {}, 538 | "source": [ 539 | "### Are three categories already encoded in the dataset?" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [ 548 | "X.dtypes" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": {}, 554 | "source": [ 555 | "### Are there missing values in the dataset?" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": null, 561 | "metadata": {}, 562 | "outputs": [], 563 | "source": [ 564 | "missing_values = pd.concat({\"na_cnt\": X.isna().sum(), \"dtypes\": X.dtypes}, axis='columns')\n", 565 | "missing_values" 566 | ] 567 | }, 568 | { 569 | "cell_type": "markdown", 570 | "metadata": {}, 571 | "source": [ 572 | "### Split data into training and test set" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": null, 578 | "metadata": {}, 579 | "outputs": [], 580 | "source": [ 581 | "from sklearn.model_selection import train_test_split\n", 582 | "\n", 583 | "X_train, X_test, y_train, y_test = train_test_split(\n", 584 | " X, y, stratify=y, random_state=42)" 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": {}, 590 | "source": [ 591 | "## ColumnTransformer" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [ 600 | "missing_values" 601 | ] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "### Numerical preprocessing" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": {}, 614 | "outputs": [], 615 | "source": [ 616 | "numerical_features = ['age', 'sibsp', 'parch', 'fare', 'body']" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": null, 622 | "metadata": {}, 623 | "outputs": [], 624 | "source": [ 625 | "from sklearn.pipeline import Pipeline\n", 626 | "from sklearn.impute import SimpleImputer\n", 627 | "\n", 628 | "num_prep = Pipeline([\n", 629 | " ('imputer', SimpleImputer()),\n", 630 | " ('scaler', StandardScaler())\n", 631 | "])" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": null, 637 | "metadata": {}, 638 | "outputs": [], 639 | "source": [ 640 | "num_prep" 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "metadata": {}, 646 | "source": [ 647 | "#### Running only on numerical features " 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [ 656 | "num_trans = num_prep.fit_transform(X_train[numerical_features])\n", 657 | "num_trans" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "num_trans.shape" 667 | ] 668 | }, 669 | { 670 | "cell_type": "markdown", 671 | "metadata": {}, 672 | "source": [ 673 | "### Categorical preprocessing" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": null, 679 | "metadata": {}, 680 | "outputs": [], 681 | "source": [ 682 | "categorical_features = ['sex', 'embarked']" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": null, 688 | "metadata": {}, 689 | "outputs": [], 690 | "source": [ 691 | "cat_prep = OneHotEncoder(handle_unknown='ignore', sparse=False)" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "cat_prep" 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "#### Running only on the categorical features" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": null, 713 | "metadata": {}, 714 | "outputs": [], 715 | "source": [ 716 | "cat_trans = cat_prep.fit_transform(X_train[categorical_features])\n", 717 | "cat_trans" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": null, 723 | "metadata": {}, 724 | "outputs": [], 725 | "source": [ 726 | "cat_trans.shape" 727 | ] 728 | }, 729 | { 730 | "cell_type": "markdown", 731 | "metadata": {}, 732 | "source": [ 733 | "## ColumnTransformer!" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": null, 739 | "metadata": {}, 740 | "outputs": [], 741 | "source": [ 742 | "ct = ColumnTransformer([\n", 743 | " ('numerical', num_prep, numerical_features),\n", 744 | " ('categorical', cat_prep, categorical_features)\n", 745 | "])" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": null, 751 | "metadata": {}, 752 | "outputs": [], 753 | "source": [ 754 | "ct" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": null, 760 | "metadata": {}, 761 | "outputs": [], 762 | "source": [ 763 | "X_trans = ct.fit_transform(X_train)" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": null, 769 | "metadata": {}, 770 | "outputs": [], 771 | "source": [ 772 | "X_trans[:, :5]" 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": null, 778 | "metadata": {}, 779 | "outputs": [], 780 | "source": [ 781 | "X_trans[:, 5:]" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": null, 787 | "metadata": {}, 788 | "outputs": [], 789 | "source": [ 790 | "X_trans.shape" 791 | ] 792 | }, 793 | { 794 | "cell_type": "markdown", 795 | "metadata": {}, 796 | "source": [ 797 | "### Linear model" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": null, 803 | "metadata": {}, 804 | "outputs": [], 805 | "source": [ 806 | "from sklearn.pipeline import Pipeline\n", 807 | "from sklearn.preprocessing import StandardScaler\n", 808 | "from sklearn.linear_model import LogisticRegression" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": null, 814 | "metadata": {}, 815 | "outputs": [], 816 | "source": [ 817 | "log_reg = Pipeline([\n", 818 | " ('preprocess', ct),\n", 819 | " ('log_reg', LogisticRegression(random_state=42))\n", 820 | "])\n", 821 | "log_reg" 822 | ] 823 | }, 824 | { 825 | "cell_type": "code", 826 | "execution_count": null, 827 | "metadata": {}, 828 | "outputs": [], 829 | "source": [ 830 | "log_reg.fit(X_train, y_train)" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": null, 836 | "metadata": {}, 837 | "outputs": [], 838 | "source": [ 839 | "log_reg.score(X_train, y_train)" 840 | ] 841 | }, 842 | { 843 | "cell_type": "markdown", 844 | "metadata": {}, 845 | "source": [ 846 | "## Random Forest" 847 | ] 848 | }, 849 | { 850 | "cell_type": "code", 851 | "execution_count": null, 852 | "metadata": {}, 853 | "outputs": [], 854 | "source": [ 855 | "from sklearn.ensemble import RandomForestClassifier" 856 | ] 857 | }, 858 | { 859 | "cell_type": "code", 860 | "execution_count": null, 861 | "metadata": {}, 862 | "outputs": [], 863 | "source": [ 864 | "rf = Pipeline([\n", 865 | " ('preprocess', ct),\n", 866 | " ('log_reg', RandomForestClassifier(random_state=42))\n", 867 | "])\n", 868 | "rf" 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "execution_count": null, 874 | "metadata": {}, 875 | "outputs": [], 876 | "source": [ 877 | "rf.fit(X_train, y_train)" 878 | ] 879 | }, 880 | { 881 | "cell_type": "code", 882 | "execution_count": null, 883 | "metadata": {}, 884 | "outputs": [], 885 | "source": [ 886 | "rf.score(X_train, y_train)" 887 | ] 888 | }, 889 | { 890 | "cell_type": "markdown", 891 | "metadata": { 892 | "tags": [] 893 | }, 894 | "source": [ 895 | "## Exercise 1\n", 896 | "\n", 897 | "1. Load the ames housing dataset using `sklearn.datasets.fetch_openml` with `data_id=41211` and `as_frame=True`.\n", 898 | " - **Hint**: You may ignore the version warning\n", 899 | "1. How many samples and features are there?\n", 900 | "1. Find and save the categorical and numerical feature names.\n", 901 | " - **Hint**: You can use `X.select_dtypes(include='category').columns` and `X.select_dtypes(include='number').columns`\n", 902 | "1. What are the categorical feature names? What are the numerical feature names?\n", 903 | "1. Split the data into training and test dataset.\n", 904 | "1. Build pipeline using a `ColumnTransformer`, `OrdinalEncoder`, and `sklearn.ensemble.HistGradientBoostingRegressor` and fit on the train dataset.\n", 905 | " - **Hint**: Use `'passthrough'` option for numerical columns.\n", 906 | " - **Hint**: Use `OrdinalEncoder` with `handle_unknown='use_encoded_value'` and `unknown_value=-1`.\n", 907 | "1. Evaluate the model on the test set.\n", 908 | "1. **Extra**: Use `sklearn.compose.make_column_selector` instead of passing the feature names directly." 909 | ] 910 | }, 911 | { 912 | "cell_type": "code", 913 | "execution_count": null, 914 | "metadata": {}, 915 | "outputs": [], 916 | "source": [ 917 | "from sklearn.datasets import fetch_openml\n", 918 | "from sklearn.ensemble import HistGradientBoostingRegressor\n", 919 | "from sklearn.compose import make_column_selector" 920 | ] 921 | }, 922 | { 923 | "cell_type": "code", 924 | "execution_count": null, 925 | "metadata": {}, 926 | "outputs": [], 927 | "source": [] 928 | }, 929 | { 930 | "cell_type": "markdown", 931 | "metadata": {}, 932 | "source": [ 933 | "**If you are running locally**, you can uncomment the following cell to load the solution into the cell. On **Google Colab**, [see solution here](https://github.com/thomasjpfan/ml-workshop-intermediate-1-of-2/blob/master/notebooks/solutions/04-ex01-solutions.py). " 934 | ] 935 | }, 936 | { 937 | "cell_type": "code", 938 | "execution_count": null, 939 | "metadata": {}, 940 | "outputs": [], 941 | "source": [ 942 | "# %load solutions/04-ex01-solutions.py" 943 | ] 944 | } 945 | ], 946 | "metadata": { 947 | "kernelspec": { 948 | "display_name": "Python 3 (ipykernel)", 949 | "language": "python", 950 | "name": "python3" 951 | }, 952 | "language_info": { 953 | "codemirror_mode": { 954 | "name": "ipython", 955 | "version": 3 956 | }, 957 | "file_extension": ".py", 958 | "mimetype": "text/x-python", 959 | "name": "python", 960 | "nbconvert_exporter": "python", 961 | "pygments_lexer": "ipython3", 962 | "version": "3.9.7" 963 | } 964 | }, 965 | "nbformat": 4, 966 | "nbformat_minor": 4 967 | } 968 | -------------------------------------------------------------------------------- /notebooks/data/iris_w_missing.csv: -------------------------------------------------------------------------------- 1 | sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target 2 | 4.6,3.2,1.4,0.2,0 3 | 5.9,3.0,5.1,1.8,2 4 | 5.1,3.8,1.9,0.4,0 5 | 4.8,3.1,1.6,0.2,0 6 | 6.3,2.5,,1.9,2 7 | 5.7,2.5,,,2 8 | 6.3,2.7,4.9,1.8,2 9 | 6.9,3.1,,,1 10 | 5.0,3.5,1.6,0.6,0 11 | 5.5,2.5,4.0,1.3,1 12 | 7.7,2.6,6.9,2.3,2 13 | 5.7,4.4,1.5,0.4,0 14 | 7.0,3.2,4.7,1.4,1 15 | 6.5,3.0,,,2 16 | 4.7,3.2,1.3,0.2,0 17 | 6.3,3.4,,,2 18 | 7.7,2.8,6.7,,2 19 | 5.4,3.0,,1.5,1 20 | 7.4,2.8,,,2 21 | 4.9,2.4,3.3,1.0,1 22 | 6.4,2.9,4.3,,1 23 | 6.3,2.8,5.1,1.5,2 24 | 5.8,4.0,1.2,0.2,0 25 | 4.9,2.5,,,2 26 | 6.0,2.2,,1.5,2 27 | 5.5,3.5,1.3,0.2,0 28 | 6.3,2.5,,1.5,1 29 | 5.8,2.7,3.9,1.2,1 30 | 5.5,2.6,4.4,1.2,1 31 | 6.1,3.0,4.6,1.4,1 32 | 5.1,2.5,3.0,1.1,1 33 | 6.1,3.0,,1.8,2 34 | 6.5,3.0,5.8,2.2,2 35 | 5.5,2.4,,,1 36 | 5.1,3.4,1.5,0.2,0 37 | 5.6,2.8,,2.0,2 38 | 5.2,3.5,1.5,0.2,0 39 | 5.8,2.7,4.1,1.0,1 40 | 7.6,3.0,6.6,2.1,2 41 | 6.5,3.0,5.2,,2 42 | 6.0,2.2,,1.0,1 43 | 5.2,4.1,1.5,0.1,0 44 | 6.9,3.1,,2.3,2 45 | 6.4,3.2,5.3,2.3,2 46 | 4.9,3.1,1.5,0.1,0 47 | 6.2,3.4,,,2 48 | 7.1,3.0,,2.1,2 49 | 6.4,3.1,,,2 50 | 5.1,3.3,1.7,0.5,0 51 | 4.4,3.0,1.3,0.2,0 52 | 5.3,3.7,1.5,0.2,0 53 | 4.3,3.0,1.1,0.1,0 54 | 5.0,3.3,1.4,0.2,0 55 | 5.8,2.7,5.1,,2 56 | 6.7,3.3,,,2 57 | 5.7,3.8,1.7,0.3,0 58 | 6.4,2.8,,,2 59 | 5.9,3.2,,1.8,1 60 | 6.4,2.8,,,2 61 | 5.1,3.5,1.4,0.2,0 62 | 5.1,3.8,1.6,0.2,0 63 | 6.8,2.8,4.8,,1 64 | 5.1,3.7,1.5,0.4,0 65 | 5.5,4.2,1.4,0.2,0 66 | 5.4,3.4,1.5,0.4,0 67 | 5.1,3.8,1.5,0.3,0 68 | 6.2,2.2,4.5,,1 69 | 4.9,3.0,1.4,0.2,0 70 | 6.7,3.0,5.0,,1 71 | 5.2,3.4,1.4,0.2,0 72 | 5.0,2.0,3.5,1.0,1 73 | 6.2,2.9,4.3,1.3,1 74 | 6.0,2.9,4.5,1.5,1 75 | 4.9,3.1,1.5,0.2,0 76 | 7.3,2.9,6.3,,2 77 | 6.7,3.3,,2.1,2 78 | 4.5,2.3,1.3,0.3,0 79 | 5.0,3.4,1.5,0.2,0 80 | 5.7,3.0,4.2,1.2,1 81 | 6.1,2.8,4.7,1.2,1 82 | 5.0,3.6,1.4,0.2,0 83 | 6.7,3.1,4.4,1.4,1 84 | 6.3,3.3,,1.6,1 85 | 6.0,2.7,5.1,1.6,1 86 | 5.2,2.7,3.9,1.4,1 87 | 5.0,3.0,1.6,0.2,0 88 | 4.4,3.2,1.3,0.2,0 89 | 6.1,2.6,5.6,,2 90 | 5.6,2.5,3.9,1.1,1 91 | 4.6,3.1,1.5,0.2,0 92 | 6.3,2.9,,,2 93 | 6.9,3.2,,2.3,2 94 | 5.4,3.4,1.7,0.2,0 95 | 4.8,3.4,1.9,0.2,0 96 | 4.8,3.4,1.6,0.2,0 97 | 4.7,3.2,1.6,0.2,0 98 | 5.4,3.9,1.7,0.4,0 99 | 6.2,2.8,4.8,1.8,2 100 | 5.0,2.3,3.3,1.0,1 101 | 5.5,2.4,3.7,1.0,1 102 | 6.3,3.3,,2.5,2 103 | 5.7,2.6,3.5,1.0,1 104 | 6.1,2.9,4.7,,1 105 | 7.7,3.0,6.1,,2 106 | 4.8,3.0,1.4,0.3,0 107 | 5.6,2.9,3.6,1.3,1 108 | 4.6,3.4,1.4,0.3,0 109 | 5.8,2.7,,,2 110 | 5.5,2.3,,1.3,1 111 | 5.7,2.8,4.1,1.3,1 112 | 6.7,3.0,,2.3,2 113 | 6.0,3.4,4.5,1.6,1 114 | 6.7,3.1,,,1 115 | 7.7,3.8,6.7,,2 116 | 5.7,2.8,,1.3,1 117 | 5.0,3.4,1.6,0.4,0 118 | 6.6,2.9,4.6,1.3,1 119 | 6.4,3.2,4.5,1.5,1 120 | 4.4,2.9,1.4,0.2,0 121 | 5.4,3.7,1.5,0.2,0 122 | 6.7,2.5,,,2 123 | 5.6,3.0,4.5,,1 124 | 5.8,2.6,4.0,1.2,1 125 | 5.0,3.5,1.3,0.3,0 126 | 6.0,3.0,,,2 127 | 6.5,3.2,,,2 128 | 4.9,3.6,1.4,0.1,0 129 | 5.7,2.9,4.2,1.3,1 130 | 5.6,3.0,,1.3,1 131 | 4.6,3.6,1.0,0.2,0 132 | 6.7,3.1,,2.4,2 133 | 5.9,3.0,4.2,1.5,1 134 | 5.6,2.7,4.2,1.3,1 135 | 7.2,3.0,5.8,,2 136 | 6.8,3.2,,2.3,2 137 | 7.2,3.6,,,2 138 | 5.4,3.9,1.3,0.4,0 139 | 6.6,3.0,4.4,1.4,1 140 | 5.1,3.5,1.4,0.3,0 141 | 4.8,3.0,1.4,0.1,0 142 | 7.9,3.8,6.4,2.0,2 143 | 6.5,2.8,,1.5,1 144 | 6.9,3.1,5.4,,2 145 | 6.4,2.7,,1.9,2 146 | 5.0,3.2,1.2,0.2,0 147 | 6.3,2.3,4.4,1.3,1 148 | 6.1,2.8,4.0,1.3,1 149 | 7.2,3.2,,,2 150 | 5.8,2.8,,,2 151 | 6.8,3.0,,,2 152 | -------------------------------------------------------------------------------- /notebooks/images/approval_ratings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/images/approval_ratings.png -------------------------------------------------------------------------------- /notebooks/images/approval_ratings_random.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/images/approval_ratings_random.png -------------------------------------------------------------------------------- /notebooks/images/approval_ratings_structured.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/images/approval_ratings_structured.png -------------------------------------------------------------------------------- /notebooks/images/kfold_cv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/images/kfold_cv.png -------------------------------------------------------------------------------- /notebooks/images/knn_boundary_n_neighbors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/images/knn_boundary_n_neighbors.png -------------------------------------------------------------------------------- /notebooks/images/knn_model_complexity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/images/knn_model_complexity.png -------------------------------------------------------------------------------- /notebooks/images/med_knn_rf_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasjpfan/ml-workshop-intermediate-1-of-2/a0981d4101b8a9caf2005f6e8d6f6e631fe96dec/notebooks/images/med_knn_rf_comparison.png -------------------------------------------------------------------------------- /notebooks/solutions/00-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | cancer = load_breast_cancer(as_frame=True) 2 | 3 | X, y = cancer.data, cancer.target 4 | 5 | y.value_counts() 6 | 7 | X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) 8 | 9 | log_reg = make_pipeline( 10 | StandardScaler(), 11 | LogisticRegression() 12 | ) 13 | 14 | log_reg.fit(X_train, y_train) 15 | 16 | log_reg.score(X_test, y_test) 17 | 18 | y_pred = log_reg.predict(X_test) 19 | 20 | f1_score(y_test, y_pred) 21 | -------------------------------------------------------------------------------- /notebooks/solutions/01-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | y.value_counts() 2 | 3 | log_reg.fit(X_train, y_train) 4 | 5 | log_reg.score(X_test, y_test) 6 | 7 | d_auc_scores = cross_val_score(dummy_clf, X_train, y_train, scoring='roc_auc') 8 | d_auc_scores.mean() 9 | 10 | knc_auc_scores = cross_val_score(knc, X_train, y_train, scoring='roc_auc') 11 | knc_auc_scores.mean() 12 | 13 | log_reg_auc_scores = cross_val_score(log_reg, X_train, y_train, scoring='roc_auc') 14 | 15 | log_reg_auc_scores.mean() 16 | -------------------------------------------------------------------------------- /notebooks/solutions/01-ex02-solutions.py: -------------------------------------------------------------------------------- 1 | results = cross_validate(log_reg, X_train, y_train, cv=4) 2 | 3 | results_df = pd.DataFrame(results) 4 | 5 | results_df 6 | 7 | results = cross_validate(log_reg, X_train, y_train, cv=4, scoring=['f1', 'accuracy', 'roc_auc']) 8 | 9 | results_df = pd.DataFrame(results) 10 | 11 | results_df 12 | -------------------------------------------------------------------------------- /notebooks/solutions/02-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | from scipy.stats import randint 2 | 3 | from sklearn.ensemble import RandomForestClassifier 4 | from sklearn.model_selection import RandomizedSearchCV 5 | from sklearn.experimental import enable_halving_search_cv 6 | from sklearn.model_selection import HalvingRandomSearchCV 7 | 8 | param_dist = { 9 | "max_features": randint(1, 11), 10 | "min_samples_split": randint(2, 11) 11 | } 12 | 13 | search_cv = RandomizedSearchCV(RandomForestClassifier(random_state=0), 14 | param_distributions=param_dist, n_iter=20, verbose=1, n_jobs=8, random_state=0) 15 | 16 | search_cv.fit(X_train, y_train) 17 | 18 | search_cv.best_params_ 19 | 20 | search_cv.best_score_ 21 | 22 | search_cv.score(X_test, y_test) 23 | 24 | half_cv = HalvingRandomSearchCV(RandomForestClassifier(random_state=0), 25 | param_distributions=param_dist, verbose=1, n_jobs=8, random_state=0) 26 | 27 | half_cv.fit(X_train, y_train) 28 | 29 | half_cv.best_params_ 30 | 31 | half_cv.best_score_ 32 | 33 | half_cv.score(X_test, y_test) 34 | -------------------------------------------------------------------------------- /notebooks/solutions/03-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | 2 | cancer = fetch_openml(data_id=15, as_frame=True) 3 | 4 | print(cancer.DESCR) 5 | 6 | X, y = cancer.data, cancer.target 7 | 8 | X.shape 9 | 10 | X.isna().sum() 11 | 12 | imputer = SimpleImputer(add_indicator=True) 13 | X_trans = imputer.fit_transform(X) 14 | 15 | X_trans.shape 16 | 17 | X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) 18 | 19 | log_reg = make_pipeline( 20 | SimpleImputer(add_indicator=True), 21 | StandardScaler(), 22 | LogisticRegression() 23 | ) 24 | 25 | log_reg.fit(X_train, y_train) 26 | 27 | log_reg.score(X_test, y_test) 28 | -------------------------------------------------------------------------------- /notebooks/solutions/04-ex01-solutions.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_openml 2 | from sklearn.ensemble import HistGradientBoostingRegressor 3 | from sklearn.compose import make_column_selector 4 | 5 | ames = fetch_openml(data_id=41211, as_frame=True) 6 | 7 | X, y = ames.data, ames.target 8 | 9 | categorical_names = X.select_dtypes(include='category').columns 10 | numerical_names = X.select_dtypes(include='number').columns 11 | 12 | categorical_names 13 | 14 | numerical_names 15 | 16 | X_train, X_test, y_train, y_test = train_test_split( 17 | X, y, random_state=42 18 | ) 19 | 20 | preprocessor = ColumnTransformer([ 21 | ("numerical", "passthrough", numerical_names), 22 | ("categorical", OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), categorical_names) 23 | ]) 24 | 25 | hist = Pipeline([ 26 | ("preprocessor", preprocessor), 27 | ("regressor", HistGradientBoostingRegressor(random_state=42)) 28 | ]) 29 | 30 | hist.fit(X_train, y_train) 31 | 32 | hist.score(X_test, y_test) 33 | 34 | # Extra 35 | num_selector = make_column_selector(dtype_include="number") 36 | cat_selector = make_column_selector(dtype_include="category") 37 | 38 | prep_callable = ColumnTransformer([ 39 | ("numerical", "passthrough", num_selector), 40 | ("categorical", OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), cat_selector) 41 | ]) 42 | 43 | hist = Pipeline([ 44 | ("prep", prep_callable), 45 | ("reg", HistGradientBoostingRegressor(random_state=42)) 46 | ]) 47 | 48 | hist.fit(X_train, y_train) 49 | 50 | hist.score(X_test, y_test) 51 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==1.0.* 2 | seaborn==0.11.2 3 | pandas 4 | -------------------------------------------------------------------------------- /slides.md: -------------------------------------------------------------------------------- 1 | title: Intermediate Machine Learning with scikit-learn: Cross validation, Parameter Tuning, Pandas Interoperability, and Missing Values 2 | use_katex: True 3 | class: title-slide 4 | 5 | # Intermediate Machine Learning with scikit-learn 6 | ## Cross validation, Parameter Tuning, Pandas Interoperability, and Missing Values 7 | 8 | ![](images/scikit-learn-logo-notext.png) 9 | 10 | .larger[Thomas J. Fan]
11 | @thomasjpfan
12 | 13 | 14 | 15 | This workshop on Github: github.com/thomasjpfan/ml-workshop-intermediate-1-of-2 16 | 17 | --- 18 | 19 | name: table-of-contents 20 | class: title-slide, left 21 | 22 | # Table of Contents 23 | .g[ 24 | .g-6[ 25 | 1. [Cross Validation](#validation) 26 | 1. [Parameter Tuning](#parameter-tuning) 27 | 1. [Missing Values](#missing-values) 28 | 1. [Pandas Interoperability](#pandas) 29 | ] 30 | .g-6.g-center[ 31 | ![](images/scikit-learn-logo-notext.png) 32 | ] 33 | ] 34 | 35 | --- 36 | 37 | # Scikit-learn API 38 | 39 | .center[ 40 | ## `estimator.fit(X, [y])` 41 | ] 42 | 43 | .g[ 44 | .g-6[ 45 | ## `estimator.predict` 46 | - Classification 47 | - Regression 48 | - Clustering 49 | ] 50 | .g-6[ 51 | ## `estimator.transform` 52 | - Preprocessing 53 | - Dimensionality reduction 54 | - Feature selection 55 | - Feature extraction 56 | ] 57 | ] 58 | 59 | --- 60 | 61 | # Data Representation 62 | 63 | ![:scale 80%](images/data-representation.svg) 64 | 65 | --- 66 | 67 | # Supervised ML Workflow 68 | 69 | ![](images/ml-workflow-sklearn.svg) 70 | 71 | --- 72 | 73 | class: chapter-slide 74 | 75 | # Notebook 📒! 76 | ## notebooks/00-review-sklearn.ipynb 77 | 78 | --- 79 | 80 | name: validation 81 | class: chapter-slide 82 | 83 | # 1. Cross Validation 84 | 85 | .footnote-back[ 86 | [Back to Table of Contents](#table-of-contents) 87 | ] 88 | 89 | --- 90 | 91 | # Single train test split 92 | 93 | ![:scale 70%](images/train-test.svg) 94 | 95 | --- 96 | 97 | # Three Fold Split 98 | 99 | ![:scale 75%](images/split-data-three.svg) 100 | 101 | --- 102 | 103 | # Why cross validate? 104 | 105 | ![:scale 80%](notebooks/images/overfitting_validation_set_1.svg) 106 | 107 | --- 108 | 109 | # Why cross validate? 110 | 111 | ![:scale 80%](notebooks/images/overfitting_validation_set_2.svg) 112 | 113 | --- 114 | 115 | # Can we do better? 116 | 117 | ![:scale 80%](images/grid_search_cross_validation.png) 118 | 119 | --- 120 | 121 | class: chapter-slide 122 | 123 | # Notebook 📓! 124 | ## notebooks/01-cross-validation.ipynb 125 | 126 | --- 127 | 128 | class: chapter-slide 129 | 130 | # Cross Validation Strategies 131 | 132 | --- 133 | 134 | ![:scale 90%](notebooks/images/kfold_cv.svg) 135 | 136 | --- 137 | 138 | ![:scale 90%](notebooks/images/stratified_cv.svg) 139 | 140 | --- 141 | 142 | ![:scale 90%](notebooks/images/shuffle_split_cv.svg) 143 | 144 | --- 145 | 146 | ![:scale 90%](notebooks/images/repeated_stratified_kfold.svg) 147 | 148 | --- 149 | 150 | # Strategies for increasing the number of folds 151 | 152 | - High variance, takes a long time 153 | ```py 154 | from sklearn.model_selection import LeaveOneOut 155 | ``` 156 | 157 | - `ShuffleSplit` with stratification 158 | ```py 159 | from sklearn.model_selection import StratifiedShuffleSplit 160 | ``` 161 | 162 | - `RepeatKFold` or `RepeatedStratifiedKFold` 163 | ```py 164 | from sklearn.model_selection import RepeatedKFold 165 | from sklearn.model_selection import RepeatedStratifiedKFold 166 | ``` 167 | 168 | --- 169 | 170 | # Cross-validation with non-idd data 171 | 172 | ## Grouped data 173 | 174 | - Assume data is not iid such as patient ID or user id 175 | - We want to generalized to a new patient 176 | 177 | ## Time Series 178 | 179 | - Data is correlated 180 | 181 | --- 182 | 183 | ![:scale 90%](notebooks/images/group_kfold.svg) 184 | 185 | --- 186 | 187 | ![:scale 90%](notebooks/images/approval_ratings.png) 188 | 189 | --- 190 | 191 | ![:scale 90%](notebooks/images/approval_ratings_random.png) 192 | 193 | --- 194 | 195 | ![:scale 90%](notebooks/images/approval_ratings_structured.png) 196 | 197 | --- 198 | 199 | ![:scale 90%](notebooks/images/time_series_cv.svg) 200 | 201 | --- 202 | 203 | ![:scale 90%](notebooks/images/time_series_walk_forward_cv.svg) 204 | 205 | --- 206 | 207 | class: chapter-slide 208 | 209 | # Notebook 📓! 210 | ## notebooks/01-cross-validation.ipynb 211 | 212 | --- 213 | 214 | name: parameter-tuning 215 | class: chapter-slide 216 | 217 | # 2. Parameter Tuning 218 | 219 | .footnote-back[ 220 | [Back to Table of Contents](#table-of-contents) 221 | ] 222 | 223 | --- 224 | 225 | class: center 226 | 227 | # Why Tune Parameters? 228 | 229 | ![:scale 50%](notebooks/images/knn_boundary_n_neighbors.png) 230 | 231 | --- 232 | 233 | # Score vs n_neighbors 234 | 235 | ![:scale 80%](notebooks/images/knn_model_complexity.png) 236 | 237 | --- 238 | 239 | # Parameter Tuning Workflow 240 | 241 | ![:scale 80%](images/gridsearch_workflow.png) 242 | 243 | --- 244 | 245 | # GridSearchCV 246 | 247 | ```py 248 | from sklearn.model_selection import GridSearchCV 249 | 250 | param_grid = {'n_neighbors': np.arange(1, 30, 2)} 251 | grid = GridSearchCV(KNeighborsClassifier(), param_grid=param_grid, 252 | return_train_score=True) 253 | 254 | grid.fit(X_train, y_train) 255 | ``` 256 | 257 | Best score 258 | 259 | ```py 260 | grid.best_score_ 261 | ``` 262 | 263 | Best parameters 264 | 265 | ```py 266 | grid.best_params_ 267 | ``` 268 | 269 | --- 270 | 271 | # Random Search 272 | 273 | ![](images/bergstra_random.jpeg) 274 | 275 | --- 276 | 277 | # RandomizedSearchCV with scikit-learn 278 | 279 | ```py 280 | from scipy.stats import randint 281 | param_dist = { 282 | "max_depth": randint(3, 9), 283 | "max_features": randint(1, 11) 284 | } 285 | 286 | random_search = RandomizedSearchCV( 287 | clf, 288 | param_distributions=param_dist, 289 | n_iter=20 290 | ) 291 | ``` 292 | 293 | - Values in `param_distributions` can be a list or an object from the 294 | `scipy.stats` module 295 | 296 | --- 297 | 298 | # Successive Halving 299 | 300 | ```python 301 | from sklearn.experimental import enable_halving_search_cv # noqa 302 | 303 | from sklearn.model_selection import HalvingRandomSearchCV 304 | from sklearn.model_selection import HalvingGridSearchCV 305 | ``` 306 | 307 | ??? 308 | 309 | The search strategy starts evaluating all the candidates with a small amount of resources and iteratively selects the best candidates, using more and more resources. 310 | 311 | --- 312 | 313 | class: center 314 | 315 | ![:scale 90%](notebooks/images/halvingcv.svg) 316 | 317 | --- 318 | 319 | class: chapter-slide 320 | 321 | # Notebook 📓! 322 | ## notebooks/02-parameter-tuning.ipynb 323 | 324 | --- 325 | 326 | name: missing-values 327 | class: chapter-slide 328 | 329 | # 3. Missing Values 330 | 331 | .footnote-back[ 332 | [Back to Table of Contents](#table-of-contents) 333 | ] 334 | 335 | --- 336 | 337 | # Imputers in scikit-learn 338 | 339 | ## Impute module 340 | 341 | ```py 342 | from sklearn.impute import SimpleImputer 343 | from sklearn.impute import KNNImputer 344 | 345 | # `add_indicator=True` to add missing indicator 346 | imputer = SimpleImputer(add_indicator=True) 347 | 348 | from sklearn.experimental import enable_iterative_imputer 349 | from sklearn.impute import IterativeImputer 350 | ``` 351 | 352 | --- 353 | 354 | # Comparing the Different methods 355 | 356 | ![:scale 100%](images/med_knn_rf_comparison.png) 357 | 358 | --- 359 | 360 | # Estimators with native support 361 | 362 | ## Histogram-based Gradient Boosting Regression Trees 363 | 364 | - Based on LightGBM implementation 365 | - Have native support for missing values 366 | 367 | ```py 368 | from sklearn.experimental import enable_hist_gradient_boosting 369 | from sklearn.ensemble import HistGradientBoostingClassifier 370 | from sklearn.ensemble import HistGradientBoostingRegressor 371 | ``` 372 | 373 | --- 374 | 375 | class: chapter-slide 376 | 377 | # Notebook 📔! 378 | ## notebooks/03-missing-values.ipynb 379 | 380 | --- 381 | 382 | name: pandas 383 | class: chapter-slide 384 | 385 | # 4. Pandas Interoperability 386 | 387 | .footnote-back[ 388 | [Back to Table of Contents](#table-of-contents) 389 | ] 390 | 391 | --- 392 | 393 | # Categorical Data 394 | 395 | ## Examples of categories: 396 | 397 | - `['Manhattan', 'Queens', 'Brooklyn', 'Bronx']` 398 | - `['dog', 'cat', 'mouse']` 399 | 400 | ## Scikit-learn Encoders 401 | 402 | `OrdinalEncoder`: Encodes categories into an integer 403 | ```py 404 | from sklearn.preprocessing import OrdinalEncoder 405 | ``` 406 | 407 | `OneHotEncoder`: Encodes categories into an integer 408 | ```py 409 | from sklearn.preprocessing import OneHotEncoder 410 | ``` 411 | 412 | --- 413 | 414 | # Heterogenous data 415 | 416 | ## Example: Titanic Dataset 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 |
pclasssexagesibspparchfareembarkedbody
01.0female29.00000.00.0211.3375SNaN
11.0male0.91671.02.0151.5500SNaN
21.0female2.00001.02.0151.5500SNaN
31.0male30.00001.02.0151.5500S135.0
41.0female25.00001.02.0151.5500SNaN
490 | 491 | --- 492 | 493 | # scikit-learn's ColumnTransformer 494 | 495 | ![:scale 100%](images/column_transformer_schematic.png) 496 | 497 | --- 498 | 499 | class: chapter-slide 500 | 501 | # Notebook 📔! 502 | ## notebooks/04-pandas-interoperability.ipynb 503 | 504 | --- 505 | 506 | class: title-slide, left 507 | 508 | # Closing 509 | 510 | .g.g-middle[ 511 | .g-7[ 512 | ![:scale 30%](images/scikit-learn-logo-notext.png) 513 | 1. [Cross Validation](#validation) 514 | 1. [Parameter Tuning](#parameter-tuning) 515 | 1. [Missing Values](#missing-values) 516 | 1. [Pandas Interoperability](#pandas) 517 | ] 518 | .g-5.center[ 519 |
520 | .larger[Thomas J. Fan]
521 | @thomasjpfan
522 | 523 | 524 | 525 | This workshop on Github: github.com/thomasjpfan/ml-workshop-intermediate-1-of-2 526 | ] 527 | ] 528 | --------------------------------------------------------------------------------