├── .gitattributes ├── 9781484277614.jpg ├── CHAPTER 4 SURVIVAL ANALYSIS WITH PYSPARK AND LIFELINES ├── Figure 8-1.png ├── Figure 8-2.png └── survival_data.xlsx ├── CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O ├── banking.zip ├── Figure 3-1.png ├── Figure 3-2.png └── Figure 3-3.png ├── CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O ├── Figure 7-1.png ├── Figure 7-2.png ├── Figure 7-3.png ├── Figure 7-4.png └── Figure 7-5.png ├── CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK ├── Figure 6-1.png ├── Figure 6-2.png ├── Figure 6-3.png ├── Figure 6-4.png ├── Figure 6-5.png └── Figure 6-6.png ├── CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O ├── Figure 8-1.png ├── Figure 8-2.png ├── Figure 8-3.png └── Mall_Customers.csv ├── CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O ├── Figure 5-1.png ├── Figure 5-2.png ├── Figure 5-3.png ├── Figure 5-4_.png ├── Figure 5-5_.png ├── Figure 5-6.png ├── Figure 5-7.png ├── Figure 5-8_.png └── Figure 5-9.png ├── CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O ├── Figure 9-1.png ├── Figure 9-2.png ├── Figure 9-3.png ├── Figure 9-4.png └── Mall_Customers.csv ├── errata.md ├── README.md ├── Contributing.md ├── LICENSE.txt └── CHAPTER 10 AUTOMATING THE MACHINE LEARNING PROCESS WITH H2O └── CHAPTER 10 AUTOMATING THE MACHINE LEARNING PROCESS WITH H2O.ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /9781484277614.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/9781484277614.jpg -------------------------------------------------------------------------------- /CHAPTER 4 SURVIVAL ANALYSIS WITH PYSPARK AND LIFELINES/Figure 8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 4 SURVIVAL ANALYSIS WITH PYSPARK AND LIFELINES/Figure 8-1.png -------------------------------------------------------------------------------- /CHAPTER 4 SURVIVAL ANALYSIS WITH PYSPARK AND LIFELINES/Figure 8-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 4 SURVIVAL ANALYSIS WITH PYSPARK AND LIFELINES/Figure 8-2.png -------------------------------------------------------------------------------- /CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/banking.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/banking.zip -------------------------------------------------------------------------------- /CHAPTER 4 SURVIVAL ANALYSIS WITH PYSPARK AND LIFELINES/survival_data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 4 SURVIVAL ANALYSIS WITH PYSPARK AND LIFELINES/survival_data.xlsx -------------------------------------------------------------------------------- /CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-1.png -------------------------------------------------------------------------------- /CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-2.png -------------------------------------------------------------------------------- /CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-3.png -------------------------------------------------------------------------------- /CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-4.png -------------------------------------------------------------------------------- /CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 7 NEURAL NETWORKS WITH SCIKIT-LEARN, KERAS AND H2O/Figure 7-5.png -------------------------------------------------------------------------------- /CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/Figure 3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/Figure 3-1.png -------------------------------------------------------------------------------- /CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/Figure 3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/Figure 3-2.png -------------------------------------------------------------------------------- /CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/Figure 3-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 3 LINEAR MODELLING WITH SCIKIT-LEARN, PYSPARK AND H2O/Figure 3-3.png -------------------------------------------------------------------------------- /CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-1.png -------------------------------------------------------------------------------- /CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-2.png -------------------------------------------------------------------------------- /CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-3.png -------------------------------------------------------------------------------- /CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-4.png -------------------------------------------------------------------------------- /CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-5.png -------------------------------------------------------------------------------- /CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 6 TREE MODELLING WITH SCIKI-LEARN, XGBOOST, AND PYSPARK/Figure 6-6.png -------------------------------------------------------------------------------- /CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 8-1.png -------------------------------------------------------------------------------- /CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 8-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 8-2.png -------------------------------------------------------------------------------- /CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 8-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 8-3.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-1.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-2.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-3.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-4_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-4_.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-5_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-5_.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-6.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-7.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-8_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-8_.png -------------------------------------------------------------------------------- /CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 5 BINARY CLASSIFICATIO WITH SCIKIT-LEARN, PYSPARK AN H2O/Figure 5-9.png -------------------------------------------------------------------------------- /CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-1.png -------------------------------------------------------------------------------- /CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-2.png -------------------------------------------------------------------------------- /CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-3.png -------------------------------------------------------------------------------- /CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/data-science-solutions-python/HEAD/CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Figure 9-4.png -------------------------------------------------------------------------------- /errata.md: -------------------------------------------------------------------------------- 1 | # Errata for *Data Science Solutions with Python* 2 | 3 | On **page xx** [Summary of error]: 4 | 5 | Details of error here. Highlight key pieces in **bold**. 6 | 7 | *** 8 | 9 | On **page xx** [Summary of error]: 10 | 11 | Details of error here. Highlight key pieces in **bold**. 12 | 13 | *** -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apress Source Code 2 | 3 | This repository accompanies [*Data Science Solutions with Python*](https://www.apress.com/9781484277614) by Tshepo Chris Nokeri (Apress, 2022). 4 | 5 | [comment]: #cover 6 | ![Cover image](9781484277614.jpg) 7 | 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git. 9 | 10 | ## Releases 11 | 12 | Release v1.0 corresponds to the code in the published book, without corrections or updates. 13 | 14 | ## Contributions 15 | 16 | See the file Contributing.md for more information on how you can contribute to this repository. -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to Apress Source Code 2 | 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. 4 | 5 | ## How to Contribute 6 | 7 | 1. Make sure you have a GitHub account. 8 | 2. Fork the repository for the relevant book. 9 | 3. Create a new branch on which to make your change, e.g. 10 | `git checkout -b my_code_contribution` 11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. 12 | 5. Submit a pull request. 13 | 14 | Thank you for your contribution! -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Freeware License, some rights reserved 2 | 3 | Copyright (c) 2022 Tshepo Chris Nokeri 4 | 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 6 | of this software and associated documentation files (the "Software"), 7 | to work with the Software within the limits of freeware distribution and fair use. 8 | This includes the rights to use, copy, and modify the Software for personal use. 9 | Users are also allowed and encouraged to submit corrections and modifications 10 | to the Software for the benefit of other users. 11 | 12 | It is not allowed to reuse, modify, or redistribute the Software for 13 | commercial use in any way, or for a user’s educational materials such as books 14 | or blog articles without prior permission from the copyright holder. 15 | 16 | The above copyright notice and this permission notice need to be included 17 | in all copies or substantial portions of the software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | 28 | -------------------------------------------------------------------------------- /CHAPTER 8 CLUSTER ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100) 2 | 1,Male,19,15,39 3 | 2,Male,21,15,81 4 | 3,Female,20,16,6 5 | 4,Female,23,16,77 6 | 5,Female,31,17,40 7 | 6,Female,22,17,76 8 | 7,Female,35,18,6 9 | 8,Female,23,18,94 10 | 9,Male,64,19,3 11 | 10,Female,30,19,72 12 | 11,Male,67,19,14 13 | 12,Female,35,19,99 14 | 13,Female,58,20,15 15 | 14,Female,24,20,77 16 | 15,Male,37,20,13 17 | 16,Male,22,20,79 18 | 17,Female,35,21,35 19 | 18,Male,20,21,66 20 | 19,Male,52,23,29 21 | 20,Female,35,23,98 22 | 21,Male,35,24,35 23 | 22,Male,25,24,73 24 | 23,Female,46,25,5 25 | 24,Male,31,25,73 26 | 25,Female,54,28,14 27 | 26,Male,29,28,82 28 | 27,Female,45,28,32 29 | 28,Male,35,28,61 30 | 29,Female,40,29,31 31 | 30,Female,23,29,87 32 | 31,Male,60,30,4 33 | 32,Female,21,30,73 34 | 33,Male,53,33,4 35 | 34,Male,18,33,92 36 | 35,Female,49,33,14 37 | 36,Female,21,33,81 38 | 37,Female,42,34,17 39 | 38,Female,30,34,73 40 | 39,Female,36,37,26 41 | 40,Female,20,37,75 42 | 41,Female,65,38,35 43 | 42,Male,24,38,92 44 | 43,Male,48,39,36 45 | 44,Female,31,39,61 46 | 45,Female,49,39,28 47 | 46,Female,24,39,65 48 | 47,Female,50,40,55 49 | 48,Female,27,40,47 50 | 49,Female,29,40,42 51 | 50,Female,31,40,42 52 | 51,Female,49,42,52 53 | 52,Male,33,42,60 54 | 53,Female,31,43,54 55 | 54,Male,59,43,60 56 | 55,Female,50,43,45 57 | 56,Male,47,43,41 58 | 57,Female,51,44,50 59 | 58,Male,69,44,46 60 | 59,Female,27,46,51 61 | 60,Male,53,46,46 62 | 61,Male,70,46,56 63 | 62,Male,19,46,55 64 | 63,Female,67,47,52 65 | 64,Female,54,47,59 66 | 65,Male,63,48,51 67 | 66,Male,18,48,59 68 | 67,Female,43,48,50 69 | 68,Female,68,48,48 70 | 69,Male,19,48,59 71 | 70,Female,32,48,47 72 | 71,Male,70,49,55 73 | 72,Female,47,49,42 74 | 73,Female,60,50,49 75 | 74,Female,60,50,56 76 | 75,Male,59,54,47 77 | 76,Male,26,54,54 78 | 77,Female,45,54,53 79 | 78,Male,40,54,48 80 | 79,Female,23,54,52 81 | 80,Female,49,54,42 82 | 81,Male,57,54,51 83 | 82,Male,38,54,55 84 | 83,Male,67,54,41 85 | 84,Female,46,54,44 86 | 85,Female,21,54,57 87 | 86,Male,48,54,46 88 | 87,Female,55,57,58 89 | 88,Female,22,57,55 90 | 89,Female,34,58,60 91 | 90,Female,50,58,46 92 | 91,Female,68,59,55 93 | 92,Male,18,59,41 94 | 93,Male,48,60,49 95 | 94,Female,40,60,40 96 | 95,Female,32,60,42 97 | 96,Male,24,60,52 98 | 97,Female,47,60,47 99 | 98,Female,27,60,50 100 | 99,Male,48,61,42 101 | 100,Male,20,61,49 102 | 101,Female,23,62,41 103 | 102,Female,49,62,48 104 | 103,Male,67,62,59 105 | 104,Male,26,62,55 106 | 105,Male,49,62,56 107 | 106,Female,21,62,42 108 | 107,Female,66,63,50 109 | 108,Male,54,63,46 110 | 109,Male,68,63,43 111 | 110,Male,66,63,48 112 | 111,Male,65,63,52 113 | 112,Female,19,63,54 114 | 113,Female,38,64,42 115 | 114,Male,19,64,46 116 | 115,Female,18,65,48 117 | 116,Female,19,65,50 118 | 117,Female,63,65,43 119 | 118,Female,49,65,59 120 | 119,Female,51,67,43 121 | 120,Female,50,67,57 122 | 121,Male,27,67,56 123 | 122,Female,38,67,40 124 | 123,Female,40,69,58 125 | 124,Male,39,69,91 126 | 125,Female,23,70,29 127 | 126,Female,31,70,77 128 | 127,Male,43,71,35 129 | 128,Male,40,71,95 130 | 129,Male,59,71,11 131 | 130,Male,38,71,75 132 | 131,Male,47,71,9 133 | 132,Male,39,71,75 134 | 133,Female,25,72,34 135 | 134,Female,31,72,71 136 | 135,Male,20,73,5 137 | 136,Female,29,73,88 138 | 137,Female,44,73,7 139 | 138,Male,32,73,73 140 | 139,Male,19,74,10 141 | 140,Female,35,74,72 142 | 141,Female,57,75,5 143 | 142,Male,32,75,93 144 | 143,Female,28,76,40 145 | 144,Female,32,76,87 146 | 145,Male,25,77,12 147 | 146,Male,28,77,97 148 | 147,Male,48,77,36 149 | 148,Female,32,77,74 150 | 149,Female,34,78,22 151 | 150,Male,34,78,90 152 | 151,Male,43,78,17 153 | 152,Male,39,78,88 154 | 153,Female,44,78,20 155 | 154,Female,38,78,76 156 | 155,Female,47,78,16 157 | 156,Female,27,78,89 158 | 157,Male,37,78,1 159 | 158,Female,30,78,78 160 | 159,Male,34,78,1 161 | 160,Female,30,78,73 162 | 161,Female,56,79,35 163 | 162,Female,29,79,83 164 | 163,Male,19,81,5 165 | 164,Female,31,81,93 166 | 165,Male,50,85,26 167 | 166,Female,36,85,75 168 | 167,Male,42,86,20 169 | 168,Female,33,86,95 170 | 169,Female,36,87,27 171 | 170,Male,32,87,63 172 | 171,Male,40,87,13 173 | 172,Male,28,87,75 174 | 173,Male,36,87,10 175 | 174,Male,36,87,92 176 | 175,Female,52,88,13 177 | 176,Female,30,88,86 178 | 177,Male,58,88,15 179 | 178,Male,27,88,69 180 | 179,Male,59,93,14 181 | 180,Male,35,93,90 182 | 181,Female,37,97,32 183 | 182,Female,32,97,86 184 | 183,Male,46,98,15 185 | 184,Female,29,98,88 186 | 185,Female,41,99,39 187 | 186,Male,30,99,97 188 | 187,Female,54,101,24 189 | 188,Male,28,101,68 190 | 189,Female,41,103,17 191 | 190,Female,36,103,85 192 | 191,Female,34,103,23 193 | 192,Female,32,103,69 194 | 193,Male,33,113,8 195 | 194,Female,38,113,91 196 | 195,Female,47,120,16 197 | 196,Female,35,120,79 198 | 197,Female,45,126,28 199 | 198,Male,32,126,74 200 | 199,Male,32,137,18 201 | 200,Male,30,137,83 202 | -------------------------------------------------------------------------------- /CHAPTER 9 PRINCIPAL COMPONENT ANALYSIS WITH SCIKIT-LEARN, PYSPARK, AND H2O/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100) 2 | 1,Male,19,15,39 3 | 2,Male,21,15,81 4 | 3,Female,20,16,6 5 | 4,Female,23,16,77 6 | 5,Female,31,17,40 7 | 6,Female,22,17,76 8 | 7,Female,35,18,6 9 | 8,Female,23,18,94 10 | 9,Male,64,19,3 11 | 10,Female,30,19,72 12 | 11,Male,67,19,14 13 | 12,Female,35,19,99 14 | 13,Female,58,20,15 15 | 14,Female,24,20,77 16 | 15,Male,37,20,13 17 | 16,Male,22,20,79 18 | 17,Female,35,21,35 19 | 18,Male,20,21,66 20 | 19,Male,52,23,29 21 | 20,Female,35,23,98 22 | 21,Male,35,24,35 23 | 22,Male,25,24,73 24 | 23,Female,46,25,5 25 | 24,Male,31,25,73 26 | 25,Female,54,28,14 27 | 26,Male,29,28,82 28 | 27,Female,45,28,32 29 | 28,Male,35,28,61 30 | 29,Female,40,29,31 31 | 30,Female,23,29,87 32 | 31,Male,60,30,4 33 | 32,Female,21,30,73 34 | 33,Male,53,33,4 35 | 34,Male,18,33,92 36 | 35,Female,49,33,14 37 | 36,Female,21,33,81 38 | 37,Female,42,34,17 39 | 38,Female,30,34,73 40 | 39,Female,36,37,26 41 | 40,Female,20,37,75 42 | 41,Female,65,38,35 43 | 42,Male,24,38,92 44 | 43,Male,48,39,36 45 | 44,Female,31,39,61 46 | 45,Female,49,39,28 47 | 46,Female,24,39,65 48 | 47,Female,50,40,55 49 | 48,Female,27,40,47 50 | 49,Female,29,40,42 51 | 50,Female,31,40,42 52 | 51,Female,49,42,52 53 | 52,Male,33,42,60 54 | 53,Female,31,43,54 55 | 54,Male,59,43,60 56 | 55,Female,50,43,45 57 | 56,Male,47,43,41 58 | 57,Female,51,44,50 59 | 58,Male,69,44,46 60 | 59,Female,27,46,51 61 | 60,Male,53,46,46 62 | 61,Male,70,46,56 63 | 62,Male,19,46,55 64 | 63,Female,67,47,52 65 | 64,Female,54,47,59 66 | 65,Male,63,48,51 67 | 66,Male,18,48,59 68 | 67,Female,43,48,50 69 | 68,Female,68,48,48 70 | 69,Male,19,48,59 71 | 70,Female,32,48,47 72 | 71,Male,70,49,55 73 | 72,Female,47,49,42 74 | 73,Female,60,50,49 75 | 74,Female,60,50,56 76 | 75,Male,59,54,47 77 | 76,Male,26,54,54 78 | 77,Female,45,54,53 79 | 78,Male,40,54,48 80 | 79,Female,23,54,52 81 | 80,Female,49,54,42 82 | 81,Male,57,54,51 83 | 82,Male,38,54,55 84 | 83,Male,67,54,41 85 | 84,Female,46,54,44 86 | 85,Female,21,54,57 87 | 86,Male,48,54,46 88 | 87,Female,55,57,58 89 | 88,Female,22,57,55 90 | 89,Female,34,58,60 91 | 90,Female,50,58,46 92 | 91,Female,68,59,55 93 | 92,Male,18,59,41 94 | 93,Male,48,60,49 95 | 94,Female,40,60,40 96 | 95,Female,32,60,42 97 | 96,Male,24,60,52 98 | 97,Female,47,60,47 99 | 98,Female,27,60,50 100 | 99,Male,48,61,42 101 | 100,Male,20,61,49 102 | 101,Female,23,62,41 103 | 102,Female,49,62,48 104 | 103,Male,67,62,59 105 | 104,Male,26,62,55 106 | 105,Male,49,62,56 107 | 106,Female,21,62,42 108 | 107,Female,66,63,50 109 | 108,Male,54,63,46 110 | 109,Male,68,63,43 111 | 110,Male,66,63,48 112 | 111,Male,65,63,52 113 | 112,Female,19,63,54 114 | 113,Female,38,64,42 115 | 114,Male,19,64,46 116 | 115,Female,18,65,48 117 | 116,Female,19,65,50 118 | 117,Female,63,65,43 119 | 118,Female,49,65,59 120 | 119,Female,51,67,43 121 | 120,Female,50,67,57 122 | 121,Male,27,67,56 123 | 122,Female,38,67,40 124 | 123,Female,40,69,58 125 | 124,Male,39,69,91 126 | 125,Female,23,70,29 127 | 126,Female,31,70,77 128 | 127,Male,43,71,35 129 | 128,Male,40,71,95 130 | 129,Male,59,71,11 131 | 130,Male,38,71,75 132 | 131,Male,47,71,9 133 | 132,Male,39,71,75 134 | 133,Female,25,72,34 135 | 134,Female,31,72,71 136 | 135,Male,20,73,5 137 | 136,Female,29,73,88 138 | 137,Female,44,73,7 139 | 138,Male,32,73,73 140 | 139,Male,19,74,10 141 | 140,Female,35,74,72 142 | 141,Female,57,75,5 143 | 142,Male,32,75,93 144 | 143,Female,28,76,40 145 | 144,Female,32,76,87 146 | 145,Male,25,77,12 147 | 146,Male,28,77,97 148 | 147,Male,48,77,36 149 | 148,Female,32,77,74 150 | 149,Female,34,78,22 151 | 150,Male,34,78,90 152 | 151,Male,43,78,17 153 | 152,Male,39,78,88 154 | 153,Female,44,78,20 155 | 154,Female,38,78,76 156 | 155,Female,47,78,16 157 | 156,Female,27,78,89 158 | 157,Male,37,78,1 159 | 158,Female,30,78,78 160 | 159,Male,34,78,1 161 | 160,Female,30,78,73 162 | 161,Female,56,79,35 163 | 162,Female,29,79,83 164 | 163,Male,19,81,5 165 | 164,Female,31,81,93 166 | 165,Male,50,85,26 167 | 166,Female,36,85,75 168 | 167,Male,42,86,20 169 | 168,Female,33,86,95 170 | 169,Female,36,87,27 171 | 170,Male,32,87,63 172 | 171,Male,40,87,13 173 | 172,Male,28,87,75 174 | 173,Male,36,87,10 175 | 174,Male,36,87,92 176 | 175,Female,52,88,13 177 | 176,Female,30,88,86 178 | 177,Male,58,88,15 179 | 178,Male,27,88,69 180 | 179,Male,59,93,14 181 | 180,Male,35,93,90 182 | 181,Female,37,97,32 183 | 182,Female,32,97,86 184 | 183,Male,46,98,15 185 | 184,Female,29,98,88 186 | 185,Female,41,99,39 187 | 186,Male,30,99,97 188 | 187,Female,54,101,24 189 | 188,Male,28,101,68 190 | 189,Female,41,103,17 191 | 190,Female,36,103,85 192 | 191,Female,34,103,23 193 | 192,Female,32,103,69 194 | 193,Male,33,113,8 195 | 194,Female,38,113,91 196 | 195,Female,47,120,16 197 | 196,Female,35,120,79 198 | 197,Female,45,126,28 199 | 198,Male,32,126,74 200 | 199,Male,32,137,18 201 | 200,Male,30,137,83 202 | -------------------------------------------------------------------------------- /CHAPTER 10 AUTOMATING THE MACHINE LEARNING PROCESS WITH H2O/CHAPTER 10 AUTOMATING THE MACHINE LEARNING PROCESS WITH H2O.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Data Science Solutions with Pyton by Tshepo Chris Nokeri, Apress. 2021" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# CHAPTER 10: AUTOMATING THE MACHINE LEARNING PROCESS WITH H2O" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "This is a short yet insightful chapter that reasonably concludes the book by debunking a straightforward approach towards automating machine learning processes with the help of a widespread machine learning framework known as H2O." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import warnings\n", 31 | "warnings.filterwarnings(\"ignore\")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "# Data Preprocessing" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import numpy as np\n", 48 | "import pandas as pd\n", 49 | "from sklearn.preprocessing import StandardScaler\n", 50 | "from sklearn.model_selection import train_test_split\n", 51 | "df = pd.read_csv(r\"C:\\Users\\i5 lenov\\Desktop\\Source Code-20210822T014112Z-001\\Source Code\\Chapter_3_Parametric_Methods_Linear_Regression_Analysis\\WA_Fn-UseC_-Marketing_Customer_Value_Analysis.csv\")\n", 52 | "drop_column_names = df.columns[[0, 6]]\n", 53 | "initial_data = df.drop(drop_column_names, axis=\"columns\")\n", 54 | "initial_data.iloc[::, 0] = pd.get_dummies(initial_data.iloc[::, 0])\n", 55 | "initial_data.iloc[::, 2] = pd.get_dummies(initial_data.iloc[::, 2])\n", 56 | "initial_data.iloc[::, 3] = pd.get_dummies(initial_data.iloc[::, 3])\n", 57 | "initial_data.iloc[::, 4] = pd.get_dummies(initial_data.iloc[::, 4])\n", 58 | "initial_data.iloc[::, 5] = pd.get_dummies(initial_data.iloc[::, 5])\n", 59 | "initial_data.iloc[::, 6] = pd.get_dummies(initial_data.iloc[::, 6])\n", 60 | "initial_data.iloc[::, 7] = pd.get_dummies(initial_data.iloc[::, 7])\n", 61 | "initial_data.iloc[::, 8] = pd.get_dummies(initial_data.iloc[::, 8])\n", 62 | "initial_data.iloc[::, 9] = pd.get_dummies(initial_data.iloc[::, 9])\n", 63 | "initial_data.iloc[::, 15] = pd.get_dummies(initial_data.iloc[::, 15])\n", 64 | "initial_data.iloc[::, 16] = pd.get_dummies(initial_data.iloc[::, 16])\n", 65 | "initial_data.iloc[::, 17] = pd.get_dummies(initial_data.iloc[::, 17])\n", 66 | "initial_data.iloc[::, 18] = pd.get_dummies(initial_data.iloc[::, 18])\n", 67 | "initial_data.iloc[::, 20] = pd.get_dummies(initial_data.iloc[::, 20])\n", 68 | "initial_data.iloc[::, 21] = pd.get_dummies(initial_data.iloc[::, 21])" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.\n", 81 | "Attempting to start a local H2O server...\n", 82 | "; OpenJDK 64-Bit Server VM (build 11.0.6+8-b765.1, mixed mode)\n", 83 | " Starting server from C:\\Users\\i5 lenov\\AppData\\Roaming\\Python\\Python37\\site-packages\\h2o\\backend\\bin\\h2o.jar\n", 84 | " Ice root: C:\\Users\\I5LENO~1\\AppData\\Local\\Temp\\tmp_fwqbteg\n", 85 | " JVM stdout: C:\\Users\\I5LENO~1\\AppData\\Local\\Temp\\tmp_fwqbteg\\h2o_i5_lenov_started_from_python.out\n", 86 | " JVM stderr: C:\\Users\\I5LENO~1\\AppData\\Local\\Temp\\tmp_fwqbteg\\h2o_i5_lenov_started_from_python.err\n", 87 | " Server is running at http://127.0.0.1:54321\n", 88 | "Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n", 89 | "Warning: Your H2O cluster version is too old (1 year, 1 month and 2 days)! Please download and install the latest version from http://h2o.ai/download/\n" 90 | ] 91 | }, 92 | { 93 | "data": { 94 | "text/html": [ 95 | "
\n", 96 | "\n", 97 | "\n", 98 | "\n", 99 | "\n", 100 | "\n", 101 | "\n", 102 | "\n", 103 | "\n", 104 | "\n", 105 | "\n", 106 | "\n", 107 | "\n", 108 | "\n", 109 | "\n", 110 | "\n", 111 | "\n", 112 | "\n", 113 | "\n", 114 | "\n", 115 | "\n", 116 | "\n", 117 | "\n", 118 | "\n", 119 | "\n", 120 | "\n", 121 | "\n", 122 | "\n", 123 | "\n", 124 | "\n", 125 | "\n", 126 | "
H2O_cluster_uptime:04 secs
H2O_cluster_timezone:Africa/Harare
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.30.0.7
H2O_cluster_version_age:1 year, 1 month and 2 days !!!
H2O_cluster_name:H2O_from_python_i5_lenov_v8qjih
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:2.975 Gb
H2O_cluster_total_cores:4
H2O_cluster_allowed_cores:4
H2O_cluster_status:accepting new members, healthy
H2O_connection_url:http://127.0.0.1:54321
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
H2O_API_Extensions:Amazon S3, Algos, AutoML, Core V3, TargetEncoder, Core V4
Python_version:3.7.6 final
" 127 | ], 128 | "text/plain": [ 129 | "-------------------------- ---------------------------------------------------------\n", 130 | "H2O_cluster_uptime: 04 secs\n", 131 | "H2O_cluster_timezone: Africa/Harare\n", 132 | "H2O_data_parsing_timezone: UTC\n", 133 | "H2O_cluster_version: 3.30.0.7\n", 134 | "H2O_cluster_version_age: 1 year, 1 month and 2 days !!!\n", 135 | "H2O_cluster_name: H2O_from_python_i5_lenov_v8qjih\n", 136 | "H2O_cluster_total_nodes: 1\n", 137 | "H2O_cluster_free_memory: 2.975 Gb\n", 138 | "H2O_cluster_total_cores: 4\n", 139 | "H2O_cluster_allowed_cores: 4\n", 140 | "H2O_cluster_status: accepting new members, healthy\n", 141 | "H2O_connection_url: http://127.0.0.1:54321\n", 142 | "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", 143 | "H2O_internal_security: False\n", 144 | "H2O_API_Extensions: Amazon S3, Algos, AutoML, Core V3, TargetEncoder, Core V4\n", 145 | "Python_version: 3.7.6 final\n", 146 | "-------------------------- ---------------------------------------------------------" 147 | ] 148 | }, 149 | "metadata": {}, 150 | "output_type": "display_data" 151 | } 152 | ], 153 | "source": [ 154 | "import h2o as initialize_h2o\n", 155 | "initialize_h2o.init()" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 4, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "Parse progress: |█████████████████████████████████████████████████████████| 100%\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "h2o_data = initialize_h2o.H2OFrame(initial_data)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 5, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "int_x = initial_data.iloc[::,0:19]\n", 182 | "fin_x = initial_data.iloc[::,19:21]\n", 183 | "x_combined = pd.concat([int_x, fin_x], axis=1)\n", 184 | "x_list = list(x_combined.columns)\n", 185 | "y_list = initial_data.columns[19]\n", 186 | "y = y_list\n", 187 | "x = h2o_data.col_names\n", 188 | "x.remove(y_list)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 6, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "h2o_training_data, h2o_validation_data, h2o_test_data = h2o_data.split_frame(ratios=[.8,.1])" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "## Develop the AutoML Model" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 7, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "AutoML progress: |\n", 217 | "11:57:21.755: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.\n", 218 | "11:57:21.787: AutoML: XGBoost is not available; skipping it.\n", 219 | "\n", 220 | "████████████████████████████████████████████████████████| 100%\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "from h2o.automl import H2OAutoML\n", 226 | "h2o_automatic_ml = H2OAutoML(max_runtime_secs = 240)\n", 227 | "h2o_automatic_ml.train(x= x,y= y,training_frame = h2o_training_data, validation_frame = h2o_validation_data)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "## Leader" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 8, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/html": [ 245 | "\n", 246 | "\n", 247 | "\n", 248 | "\n", 249 | "\n", 250 | "\n", 251 | "\n", 252 | "\n", 253 | "\n", 254 | "\n", 255 | "\n", 256 | "\n", 257 | "\n", 258 | "\n", 259 | "\n", 260 | "\n", 261 | "
model_id mean_residual_deviance rmse mse mae rmsle
StackedEnsemble_AllModels_AutoML_20210824_115721 18371.3135.54118371.386.62570.482043
StackedEnsemble_BestOfFamily_AutoML_20210824_115721 18566.8136.26 18566.887.79590.477828
GBM_1_AutoML_20210824_115721 19392.8139.25819392.890.06520.490441
GBM_3_AutoML_20210824_115721 19397.8139.27619397.890.34670.490855
GBM_2_AutoML_20210824_115721 19467.3139.52519467.389.99080.491887
GBM_grid__1_AutoML_20210824_115721_model_4 19607.9140.02819607.991.33110.495784
GBM_4_AutoML_20210824_115721 19633.4140.11919633.489.95530.499693
XRT_1_AutoML_20210824_115721 19717.6140.42 19717.689.74740.485144
GBM_grid__1_AutoML_20210824_115721_model_2 19805.8140.73319805.891.70920.494005
GBM_grid__1_AutoML_20210824_115721_model_9 19858.6140.92119858.690.08390.521174
" 262 | ] 263 | }, 264 | "metadata": {}, 265 | "output_type": "display_data" 266 | }, 267 | { 268 | "data": { 269 | "text/plain": [] 270 | }, 271 | "execution_count": 8, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "h2o_method_ranking = h2o_automatic_ml.leaderboard\n", 278 | "h2o_method_ranking" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 9, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "Model Details\n", 291 | "=============\n", 292 | "H2OStackedEnsembleEstimator : Stacked Ensemble\n", 293 | "Model Key: StackedEnsemble_AllModels_AutoML_20210824_115721\n", 294 | "\n", 295 | "No model summary for this model\n", 296 | "\n", 297 | "ModelMetricsRegressionGLM: stackedensemble\n", 298 | "** Reported on train data. **\n", 299 | "\n", 300 | "MSE: 4900.131929292816\n", 301 | "RMSE: 70.00094234574857\n", 302 | "MAE: 45.791336342731775\n", 303 | "RMSLE: 0.3107457400431741\n", 304 | "R^2: 0.9421217261180531\n", 305 | "Mean Residual Deviance: 4900.131929292816\n", 306 | "Null degrees of freedom: 7288\n", 307 | "Residual degrees of freedom: 7276\n", 308 | "Null deviance: 617106545.1167994\n", 309 | "Residual deviance: 35717061.632615335\n", 310 | "AIC: 82648.0458245655\n", 311 | "\n", 312 | "ModelMetricsRegressionGLM: stackedensemble\n", 313 | "** Reported on validation data. **\n", 314 | "\n", 315 | "MSE: 17967.946605287365\n", 316 | "RMSE: 134.04456947331872\n", 317 | "MAE: 83.66742154264892\n", 318 | "RMSLE: 0.43883929226519075\n", 319 | "R^2: 0.7718029372846945\n", 320 | "Mean Residual Deviance: 17967.946605287365\n", 321 | "Null degrees of freedom: 930\n", 322 | "Residual degrees of freedom: 918\n", 323 | "Null deviance: 73564698.35450743\n", 324 | "Residual deviance: 16728158.289522538\n", 325 | "AIC: 11790.460469478105\n", 326 | "\n", 327 | "ModelMetricsRegressionGLM: stackedensemble\n", 328 | "** Reported on cross-validation data. **\n", 329 | "\n", 330 | "MSE: 18371.339394332357\n", 331 | "RMSE: 135.5409140973026\n", 332 | "MAE: 86.62574587341507\n", 333 | "RMSLE: 0.4820432203973515\n", 334 | "R^2: 0.7830055540572105\n", 335 | "Mean Residual Deviance: 18371.339394332357\n", 336 | "Null degrees of freedom: 7288\n", 337 | "Residual degrees of freedom: 7275\n", 338 | "Null deviance: 617269595.4214188\n", 339 | "Residual deviance: 133908692.84528854\n", 340 | "AIC: 92282.67565857261\n" 341 | ] 342 | }, 343 | { 344 | "data": { 345 | "text/plain": [] 346 | }, 347 | "execution_count": 9, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "highest_ranking_method = h2o_automatic_ml.leader\n", 354 | "highest_ranking_method" 355 | ] 356 | } 357 | ], 358 | "metadata": { 359 | "kernelspec": { 360 | "display_name": "Python 3", 361 | "language": "python", 362 | "name": "python3" 363 | }, 364 | "language_info": { 365 | "codemirror_mode": { 366 | "name": "ipython", 367 | "version": 3 368 | }, 369 | "file_extension": ".py", 370 | "mimetype": "text/x-python", 371 | "name": "python", 372 | "nbconvert_exporter": "python", 373 | "pygments_lexer": "ipython3", 374 | "version": "3.7.6" 375 | } 376 | }, 377 | "nbformat": 4, 378 | "nbformat_minor": 4 379 | } 380 | --------------------------------------------------------------------------------