├── .gitignore ├── LICENSE ├── Model_Stacking.ipynb ├── README.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Casper Bøgeskov Hansen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Model_Stacking.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.5" 21 | }, 22 | "colab": { 23 | "name": "Model Stacking.ipynb", 24 | "provenance": [], 25 | "collapsed_sections": [], 26 | "include_colab_link": true 27 | } 28 | }, 29 | "cells": [ 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "view-in-github", 34 | "colab_type": "text" 35 | }, 36 | "source": [ 37 | "\"Open" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "metadata": { 43 | "id": "73zWN1Sb77gf", 44 | "colab_type": "code", 45 | "outputId": "c8947b99-8196-4c16-e67a-2101974c1152", 46 | "colab": { 47 | "base_uri": "https://localhost:8080/", 48 | "height": 664 49 | } 50 | }, 51 | "source": [ 52 | "!pip install lightgbm xgboost scikit-learn pandas mlxtend --upgrade" 53 | ], 54 | "execution_count": 15, 55 | "outputs": [ 56 | { 57 | "output_type": "stream", 58 | "text": [ 59 | "Collecting lightgbm\n", 60 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0b/9d/ddcb2f43aca194987f1a99e27edf41cf9bc39ea750c3371c2a62698c509a/lightgbm-2.3.1-py2.py3-none-manylinux1_x86_64.whl (1.2MB)\n", 61 | "\r\u001b[K |▎ | 10kB 17.8MB/s eta 0:00:01\r\u001b[K |▌ | 20kB 6.6MB/s eta 0:00:01\r\u001b[K |▉ | 30kB 8.1MB/s eta 0:00:01\r\u001b[K |█ | 40kB 5.8MB/s eta 0:00:01\r\u001b[K |█▎ | 51kB 6.3MB/s eta 0:00:01\r\u001b[K |█▋ | 61kB 7.3MB/s eta 0:00:01\r\u001b[K |█▉ | 71kB 7.8MB/s eta 0:00:01\r\u001b[K |██ | 81kB 7.3MB/s eta 0:00:01\r\u001b[K |██▍ | 92kB 8.1MB/s eta 0:00:01\r\u001b[K |██▋ | 102kB 8.0MB/s eta 0:00:01\r\u001b[K |███ | 112kB 8.0MB/s eta 0:00:01\r\u001b[K |███▏ | 122kB 8.0MB/s eta 0:00:01\r\u001b[K |███▍ | 133kB 8.0MB/s eta 0:00:01\r\u001b[K |███▊ | 143kB 8.0MB/s eta 0:00:01\r\u001b[K |████ | 153kB 8.0MB/s eta 0:00:01\r\u001b[K |████▏ | 163kB 8.0MB/s eta 0:00:01\r\u001b[K |████▌ | 174kB 8.0MB/s eta 0:00:01\r\u001b[K |████▊ | 184kB 8.0MB/s eta 0:00:01\r\u001b[K |█████ | 194kB 8.0MB/s eta 0:00:01\r\u001b[K |█████▎ | 204kB 8.0MB/s eta 0:00:01\r\u001b[K |█████▌ | 215kB 8.0MB/s eta 0:00:01\r\u001b[K |█████▉ | 225kB 8.0MB/s eta 0:00:01\r\u001b[K |██████ | 235kB 8.0MB/s eta 0:00:01\r\u001b[K |██████▎ | 245kB 8.0MB/s eta 0:00:01\r\u001b[K |██████▋ | 256kB 8.0MB/s eta 0:00:01\r\u001b[K |██████▉ | 266kB 8.0MB/s eta 0:00:01\r\u001b[K |███████ | 276kB 8.0MB/s eta 0:00:01\r\u001b[K |███████▍ | 286kB 8.0MB/s eta 0:00:01\r\u001b[K |███████▋ | 296kB 8.0MB/s eta 0:00:01\r\u001b[K |████████ | 307kB 8.0MB/s eta 0:00:01\r\u001b[K |████████▏ | 317kB 8.0MB/s eta 0:00:01\r\u001b[K |████████▍ | 327kB 8.0MB/s eta 0:00:01\r\u001b[K |████████▊ | 337kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████ | 348kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████▏ | 358kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████▌ | 368kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████▊ | 378kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████ | 389kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████▎ | 399kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████▌ | 409kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████▉ | 419kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████ | 430kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████▎ | 440kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████▋ | 450kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████▉ | 460kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████ | 471kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████▍ | 481kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████▋ | 491kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████ | 501kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████▏ | 512kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████▍ | 522kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████▊ | 532kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████ | 542kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████▏ | 552kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████▌ | 563kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████▊ | 573kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████ | 583kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████▎ | 593kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████▌ | 604kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████▉ | 614kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████ | 624kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████▎ | 634kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████▋ | 645kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████▉ | 655kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████ | 665kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████▍ | 675kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████▋ | 686kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████ | 696kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████▏ | 706kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████▍ | 716kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████▊ | 727kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████ | 737kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████▏ | 747kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████▌ | 757kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████▊ | 768kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████ | 778kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████▎ | 788kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████▌ | 798kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 808kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████ | 819kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████▎ | 829kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████▋ | 839kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████▉ | 849kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████▏ | 860kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████▍ | 870kB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████▋ | 880kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 890kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████▏ | 901kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████▍ | 911kB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████▊ | 921kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████ | 931kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████▏ | 942kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████▌ | 952kB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████▊ | 962kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████ | 972kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████▎ | 983kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████▌ | 993kB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 1.0MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████ | 1.0MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████▎ | 1.0MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████▋ | 1.0MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████▉ | 1.0MB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████▏ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████▍ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████████ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████████▏ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████████▍ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████████▊ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████████ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▏ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▌ | 1.1MB 8.0MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▊ | 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████████ | 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▌ | 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▉ | 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▎| 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▋| 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▉| 1.2MB 8.0MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 1.2MB 8.0MB/s \n", 62 | "\u001b[?25hRequirement already up-to-date: xgboost in /usr/local/lib/python3.6/dist-packages (0.90)\n", 63 | "Collecting scikit-learn\n", 64 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d1/48/e9fa9e252abcd1447eff6f9257636af31758a6e46fd5ce5d3c879f6907cb/scikit_learn-0.22.1-cp36-cp36m-manylinux1_x86_64.whl (7.0MB)\n", 65 | "\u001b[K |████████████████████████████████| 7.1MB 22.0MB/s \n", 66 | "\u001b[?25hRequirement already up-to-date: pandas in /usr/local/lib/python3.6/dist-packages (0.25.3)\n", 67 | "Collecting mlxtend\n", 68 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/52/04/c362f34f666f0ddc7cf593805e64d64fa670ed96fd9302e68549dd48287d/mlxtend-0.17.0-py2.py3-none-any.whl (1.3MB)\n", 69 | "\u001b[K |████████████████████████████████| 1.3MB 47.1MB/s \n", 70 | "\u001b[?25hRequirement already satisfied, skipping upgrade: scipy in /usr/local/lib/python3.6/dist-packages (from lightgbm) (1.3.3)\n", 71 | "Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from lightgbm) (1.17.4)\n", 72 | "Requirement already satisfied, skipping upgrade: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn) (0.14.1)\n", 73 | "Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.6.1)\n", 74 | "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2018.9)\n", 75 | "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from mlxtend) (42.0.2)\n", 76 | "Requirement already satisfied, skipping upgrade: matplotlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from mlxtend) (3.1.2)\n", 77 | "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas) (1.12.0)\n", 78 | "Requirement already satisfied, skipping upgrade: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.0.0->mlxtend) (2.4.5)\n", 79 | "Requirement already satisfied, skipping upgrade: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.0.0->mlxtend) (1.1.0)\n", 80 | "Requirement already satisfied, skipping upgrade: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.0.0->mlxtend) (0.10.0)\n", 81 | "Installing collected packages: scikit-learn, lightgbm, mlxtend\n", 82 | " Found existing installation: scikit-learn 0.21.3\n", 83 | " Uninstalling scikit-learn-0.21.3:\n", 84 | " Successfully uninstalled scikit-learn-0.21.3\n", 85 | " Found existing installation: lightgbm 2.2.3\n", 86 | " Uninstalling lightgbm-2.2.3:\n", 87 | " Successfully uninstalled lightgbm-2.2.3\n", 88 | " Found existing installation: mlxtend 0.14.0\n", 89 | " Uninstalling mlxtend-0.14.0:\n", 90 | " Successfully uninstalled mlxtend-0.14.0\n", 91 | "Successfully installed lightgbm-2.3.1 mlxtend-0.17.0 scikit-learn-0.22.1\n" 92 | ], 93 | "name": "stdout" 94 | }, 95 | { 96 | "output_type": "display_data", 97 | "data": { 98 | "application/vnd.colab-display-data+json": { 99 | "pip_warning": { 100 | "packages": [ 101 | "lightgbm", 102 | "mlxtend", 103 | "sklearn" 104 | ] 105 | } 106 | } 107 | }, 108 | "metadata": { 109 | "tags": [] 110 | } 111 | } 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "metadata": { 117 | "id": "dzx0-ABr75WX", 118 | "colab_type": "code", 119 | "colab": {} 120 | }, 121 | "source": [ 122 | "import pandas as pd\n", 123 | "import numpy as np\n", 124 | "from sklearn.datasets import load_boston\n", 125 | "\n", 126 | "# Suppress warnings for now\n", 127 | "import warnings\n", 128 | "warnings.simplefilter(action='ignore', category=UserWarning)\n", 129 | "warnings.simplefilter(action='ignore', category=FutureWarning)" 130 | ], 131 | "execution_count": 0, 132 | "outputs": [] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": { 137 | "id": "R5vCI_Fl75Wa", 138 | "colab_type": "text" 139 | }, 140 | "source": [ 141 | "# Loading the dataset from Scikit-Learn" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "metadata": { 147 | "id": "2yw3p1Z675Wb", 148 | "colab_type": "code", 149 | "colab": {} 150 | }, 151 | "source": [ 152 | "def dataset_to_df(load):\n", 153 | " # Load the input data into the dataframe\n", 154 | " df = pd.DataFrame(load.data, columns=load.feature_names)\n", 155 | " \n", 156 | " # Add the output data into the dataframe\n", 157 | " df['label'] = pd.Series(load.target)\n", 158 | " \n", 159 | " # Return the dataframe\n", 160 | " return df" 161 | ], 162 | "execution_count": 0, 163 | "outputs": [] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "metadata": { 168 | "scrolled": true, 169 | "id": "OKcGkfdA75Wd", 170 | "colab_type": "code", 171 | "outputId": "c8ace8bf-c0c5-404b-b0c0-d9afbba5dee1", 172 | "colab": { 173 | "base_uri": "https://localhost:8080/", 174 | "height": 204 175 | } 176 | }, 177 | "source": [ 178 | "df = dataset_to_df(load_boston())\n", 179 | "df.head()" 180 | ], 181 | "execution_count": 3, 182 | "outputs": [ 183 | { 184 | "output_type": "execute_result", 185 | "data": { 186 | "text/html": [ 187 | "
\n", 188 | "\n", 201 | "\n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATlabel
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.9824.0
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.1421.6
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.0334.7
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.9433.4
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.3336.2
\n", 309 | "
" 310 | ], 311 | "text/plain": [ 312 | " CRIM ZN INDUS CHAS NOX ... TAX PTRATIO B LSTAT label\n", 313 | "0 0.00632 18.0 2.31 0.0 0.538 ... 296.0 15.3 396.90 4.98 24.0\n", 314 | "1 0.02731 0.0 7.07 0.0 0.469 ... 242.0 17.8 396.90 9.14 21.6\n", 315 | "2 0.02729 0.0 7.07 0.0 0.469 ... 242.0 17.8 392.83 4.03 34.7\n", 316 | "3 0.03237 0.0 2.18 0.0 0.458 ... 222.0 18.7 394.63 2.94 33.4\n", 317 | "4 0.06905 0.0 2.18 0.0 0.458 ... 222.0 18.7 396.90 5.33 36.2\n", 318 | "\n", 319 | "[5 rows x 14 columns]" 320 | ] 321 | }, 322 | "metadata": { 323 | "tags": [] 324 | }, 325 | "execution_count": 3 326 | } 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": { 332 | "id": "GCIJZVIG75Wg", 333 | "colab_type": "text" 334 | }, 335 | "source": [ 336 | "# What does the features mean?\n", 337 | "\n", 338 | "The features are described here: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html\n", 339 | "\n", 340 | "1. CRIM - per capita crime rate by town\n", 341 | "2. ZN - proportion of residential land zoned for lots over 25,000 sq.ft.\n", 342 | "3. INDUS - proportion of non-retail business acres per town.\n", 343 | "4. CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)\n", 344 | "5. NOX - nitric oxides concentration (parts per 10 million)\n", 345 | "6. RM - average number of rooms per dwelling\n", 346 | "7. AGE - proportion of owner-occupied units built prior to 1940\n", 347 | "8. DIS - weighted distances to five Boston employment centres\n", 348 | "9. RAD - index of accessibility to radial highways\n", 349 | "10. TAX - full-value property-tax rate per \\$10,000\n", 350 | "11. PTRATIO - pupil-teacher ratio by town\n", 351 | "12. B - $1000(Bk - 0.63)^2$ where Bk is the proportion of blacks by town\n", 352 | "13. LSTAT - % lower status of the population\n", 353 | "14. (label) MEDV - Median value of owner-occupied homes in $1000's" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": { 359 | "id": "BN7de4PZ75Wg", 360 | "colab_type": "text" 361 | }, 362 | "source": [ 363 | "# Baseline Predictions" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "metadata": { 369 | "id": "v565z6M375Wh", 370 | "colab_type": "code", 371 | "colab": {} 372 | }, 373 | "source": [ 374 | "from sklearn.model_selection import train_test_split\n", 375 | "\n", 376 | "# Getting the output variable\n", 377 | "y = df['label']\n", 378 | "\n", 379 | "# Getting the input variables\n", 380 | "X = df.drop(['label'], axis=1)\n", 381 | "\n", 382 | "# Diving our input and output into training and testing sets\n", 383 | "X_train, X_test, y_train, y_test = train_test_split(\n", 384 | " X, y, \n", 385 | " test_size=0.33, \n", 386 | " random_state=42\n", 387 | " )" 388 | ], 389 | "execution_count": 0, 390 | "outputs": [] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "metadata": { 395 | "id": "GcmAVXh275Wj", 396 | "colab_type": "code", 397 | "colab": {} 398 | }, 399 | "source": [ 400 | "from sklearn.model_selection import GridSearchCV\n", 401 | "from sklearn.metrics import r2_score\n", 402 | "\n", 403 | "def algorithm_pipeline(X_train_data, X_test_data, y_train_data, y_test_data, \n", 404 | " model, param_grid, cv=10, scoring_fit='neg_mean_squared_error',\n", 405 | " scoring_test=r2_score, do_probabilities = False):\n", 406 | " gs = GridSearchCV(\n", 407 | " estimator=model,\n", 408 | " param_grid=param_grid, \n", 409 | " cv=cv, \n", 410 | " n_jobs=-1, \n", 411 | " scoring=scoring_fit,\n", 412 | " verbose=2\n", 413 | " )\n", 414 | " fitted_model = gs.fit(X_train_data, y_train_data)\n", 415 | " best_model = fitted_model.best_estimator_\n", 416 | " \n", 417 | " if do_probabilities:\n", 418 | " pred = fitted_model.predict_proba(X_test_data)\n", 419 | " else:\n", 420 | " pred = fitted_model.predict(X_test_data)\n", 421 | " \n", 422 | " score = scoring_test(y_test_data, pred)\n", 423 | " \n", 424 | " return [best_model, pred, score]" 425 | ], 426 | "execution_count": 0, 427 | "outputs": [] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "metadata": { 432 | "id": "P--RzLUz75Wl", 433 | "colab_type": "code", 434 | "colab": {} 435 | }, 436 | "source": [ 437 | "from sklearn.ensemble import RandomForestRegressor\n", 438 | "from lightgbm import LGBMRegressor\n", 439 | "from xgboost import XGBRegressor\n", 440 | "\n", 441 | "# Defining our estimator, the algorithm to optimize\n", 442 | "models_to_train = [XGBRegressor(), LGBMRegressor(), RandomForestRegressor()]\n", 443 | "\n", 444 | "# Defining the hyperparameters to optimize\n", 445 | "grid_parameters = [\n", 446 | " { # XGBoost\n", 447 | " 'n_estimators': [400, 700, 1000],\n", 448 | " 'colsample_bytree': [0.7, 0.8],\n", 449 | " 'max_depth': [15,20,25],\n", 450 | " 'reg_alpha': [1.1, 1.2, 1.3],\n", 451 | " 'reg_lambda': [1.1, 1.2, 1.3],\n", 452 | " 'subsample': [0.7, 0.8, 0.9]\n", 453 | " },\n", 454 | " { # LightGBM\n", 455 | " 'n_estimators': [400, 700, 1000],\n", 456 | " 'learning_rate': [0.12],\n", 457 | " 'colsample_bytree': [0.7, 0.8],\n", 458 | " 'max_depth': [4],\n", 459 | " 'num_leaves': [10, 20],\n", 460 | " 'reg_alpha': [1.1, 1.2],\n", 461 | " 'reg_lambda': [1.1, 1.2],\n", 462 | " 'min_split_gain': [0.3, 0.4],\n", 463 | " 'subsample': [0.8, 0.9],\n", 464 | " 'subsample_freq': [10, 20]\n", 465 | " }, \n", 466 | " { # Random Forest\n", 467 | " 'max_depth':[3, 5, 10, 13], \n", 468 | " 'n_estimators':[100, 200, 400, 600, 900],\n", 469 | " 'max_features':[2, 4, 6, 8, 10]\n", 470 | " }\n", 471 | "]" 472 | ], 473 | "execution_count": 0, 474 | "outputs": [] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "metadata": { 479 | "id": "QEE4dwk175Wn", 480 | "colab_type": "code", 481 | "outputId": "1835710e-1db8-4576-d256-cc8fc01a7c2e", 482 | "colab": { 483 | "base_uri": "https://localhost:8080/", 484 | "height": 425 485 | } 486 | }, 487 | "source": [ 488 | "models_preds_scores = []\n", 489 | "\n", 490 | "for i, model in enumerate(models_to_train):\n", 491 | " params = grid_parameters[i]\n", 492 | " \n", 493 | " result = algorithm_pipeline(X_train, X_test, y_train, y_test, \n", 494 | " model, params, cv=5)\n", 495 | " models_preds_scores.append(result)" 496 | ], 497 | "execution_count": 7, 498 | "outputs": [ 499 | { 500 | "output_type": "stream", 501 | "text": [ 502 | "Fitting 5 folds for each of 486 candidates, totalling 2430 fits\n" 503 | ], 504 | "name": "stdout" 505 | }, 506 | { 507 | "output_type": "stream", 508 | "text": [ 509 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n", 510 | "[Parallel(n_jobs=-1)]: Done 37 tasks | elapsed: 9.1s\n", 511 | "[Parallel(n_jobs=-1)]: Done 158 tasks | elapsed: 34.8s\n", 512 | "[Parallel(n_jobs=-1)]: Done 361 tasks | elapsed: 1.6min\n", 513 | "[Parallel(n_jobs=-1)]: Done 644 tasks | elapsed: 2.8min\n", 514 | "[Parallel(n_jobs=-1)]: Done 1009 tasks | elapsed: 4.4min\n", 515 | "[Parallel(n_jobs=-1)]: Done 1454 tasks | elapsed: 6.4min\n", 516 | "[Parallel(n_jobs=-1)]: Done 1981 tasks | elapsed: 9.0min\n", 517 | "[Parallel(n_jobs=-1)]: Done 2430 out of 2430 | elapsed: 11.2min finished\n" 518 | ], 519 | "name": "stderr" 520 | }, 521 | { 522 | "output_type": "stream", 523 | "text": [ 524 | "[22:23:46] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 525 | "Fitting 5 folds for each of 384 candidates, totalling 1920 fits\n" 526 | ], 527 | "name": "stdout" 528 | }, 529 | { 530 | "output_type": "stream", 531 | "text": [ 532 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n", 533 | "[Parallel(n_jobs=-1)]: Done 70 tasks | elapsed: 4.5s\n", 534 | "[Parallel(n_jobs=-1)]: Done 312 tasks | elapsed: 20.9s\n", 535 | "[Parallel(n_jobs=-1)]: Done 718 tasks | elapsed: 49.0s\n", 536 | "[Parallel(n_jobs=-1)]: Done 1284 tasks | elapsed: 1.5min\n", 537 | "[Parallel(n_jobs=-1)]: Done 1920 out of 1920 | elapsed: 2.2min finished\n", 538 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n" 539 | ], 540 | "name": "stderr" 541 | }, 542 | { 543 | "output_type": "stream", 544 | "text": [ 545 | "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n" 546 | ], 547 | "name": "stdout" 548 | }, 549 | { 550 | "output_type": "stream", 551 | "text": [ 552 | "[Parallel(n_jobs=-1)]: Done 37 tasks | elapsed: 15.6s\n", 553 | "[Parallel(n_jobs=-1)]: Done 158 tasks | elapsed: 1.4min\n", 554 | "[Parallel(n_jobs=-1)]: Done 361 tasks | elapsed: 3.5min\n", 555 | "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 5.2min finished\n" 556 | ], 557 | "name": "stderr" 558 | } 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "metadata": { 564 | "id": "4LBj5Zy1q_Mq", 565 | "colab_type": "code", 566 | "colab": { 567 | "base_uri": "https://localhost:8080/", 568 | "height": 68 569 | }, 570 | "outputId": "1ac4f255-7100-46ee-b5c6-8a447866b2df" 571 | }, 572 | "source": [ 573 | "for result in models_preds_scores:\n", 574 | " print('Model: {0}, Score: {1}'.format(type(result[0]).__name__, result[2]))" 575 | ], 576 | "execution_count": 8, 577 | "outputs": [ 578 | { 579 | "output_type": "stream", 580 | "text": [ 581 | "Model: XGBRegressor, Score: 0.8954046682953245\n", 582 | "Model: LGBMRegressor, Score: 0.8678534096374354\n", 583 | "Model: RandomForestRegressor, Score: 0.8694947742055505\n" 584 | ], 585 | "name": "stdout" 586 | } 587 | ] 588 | }, 589 | { 590 | "cell_type": "markdown", 591 | "metadata": { 592 | "id": "mJVra5Eg75Wr", 593 | "colab_type": "text" 594 | }, 595 | "source": [ 596 | "# Improving baseline with stacking" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "metadata": { 602 | "id": "HaorxncP75Wt", 603 | "colab_type": "code", 604 | "outputId": "f53f5ae9-d621-4118-a965-5a41fb909ecf", 605 | "colab": { 606 | "base_uri": "https://localhost:8080/", 607 | "height": 272 608 | } 609 | }, 610 | "source": [ 611 | "from mlxtend.regressor import StackingCVRegressor\n", 612 | "from sklearn.linear_model import Ridge, Lasso\n", 613 | "from sklearn.svm import SVR\n", 614 | "\n", 615 | "xgb = XGBRegressor()\n", 616 | "lgbm = LGBMRegressor()\n", 617 | "rf = RandomForestRegressor()\n", 618 | "ridge = Ridge()\n", 619 | "lasso = Lasso()\n", 620 | "svr = SVR(kernel='linear')\n", 621 | "\n", 622 | "stack = StackingCVRegressor(regressors=(ridge, lasso, svr, rf, lgbm, xgb),\n", 623 | " meta_regressor=xgb, cv=12,\n", 624 | " use_features_in_secondary=True,\n", 625 | " store_train_meta_features=True,\n", 626 | " shuffle=False,\n", 627 | " random_state=42)\n", 628 | "\n", 629 | "stack.fit(X_train, y_train)\n", 630 | "\n", 631 | "X_test.columns = ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12']\n", 632 | "pred = stack.predict(X_test)\n", 633 | "score = r2_score(y_test, pred)\n", 634 | "print(score)" 635 | ], 636 | "execution_count": 29, 637 | "outputs": [ 638 | { 639 | "output_type": "stream", 640 | "text": [ 641 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 642 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 643 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 644 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 645 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 646 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 647 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 648 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 649 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 650 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 651 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 652 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 653 | "[22:55:36] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 654 | "[22:55:40] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", 655 | "0.9071185724216408\n" 656 | ], 657 | "name": "stdout" 658 | } 659 | ] 660 | } 661 | ] 662 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/casperbh96/model-stacking/blob/master/Model_Stacking.ipynb) 2 | 3 | # Model Stacking for Machine Learning 4 | 5 | This repository provides an example notebook of model stacking on a boston housing prices dataset. 6 | 7 | # Install 8 | 9 | For installing inline, like in a Colab notebook: 10 | 11 | `!pip install lightgbm xgboost scikit-learn pandas mlxtend --upgrade` 12 | 13 | Else, you can install using the requirements file: 14 | 15 | `pip install -r requirements.txt` 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2019.11.28 2 | cycler==0.10.0 3 | joblib==1.2.0 4 | kiwisolver==1.1.0 5 | lightgbm==2.3.1 6 | matplotlib==3.1.2 7 | mlxtend==0.17.0 8 | numpy==1.18.1 9 | pandas==0.25.3 10 | pyparsing==2.4.6 11 | python-dateutil==2.8.1 12 | pytz==2019.3 13 | scikit-learn==0.22.1 14 | scipy==1.4.1 15 | six==1.13.0 16 | xgboost==0.90 17 | --------------------------------------------------------------------------------