├── README.md ├── Gradient Boosting ├── init └── GBM-Classification.ipynb └── Introduction to Boosting ├── init └── Adaboost-Classifier-Updated.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Boosting -------------------------------------------------------------------------------- /Gradient Boosting/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Introduction to Boosting/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Introduction to Boosting/Adaboost-Classifier-Updated.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#Importing the libraries\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "import seaborn as sns\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "%matplotlib inline\n", 15 | "import plotly.offline as py\n", 16 | "import plotly.graph_objs as go\n", 17 | "from sklearn.model_selection import train_test_split\n", 18 | "from sklearn.tree import DecisionTreeClassifier\n", 19 | "from sklearn.ensemble import AdaBoostClassifier\n", 20 | "from sklearn.preprocessing import StandardScaler\n", 21 | "from sklearn.metrics import (accuracy_score, log_loss, confusion_matrix)\n", 22 | "#Suppressing warnings\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings('ignore')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Step 1 - Loading Dataset" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "scrolled": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "#Importing the Dataset\n", 43 | "df = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "df.head(3)\n", 53 | "\n", 54 | "# Dep Var - Attrition ( Yes/No) - Binary Classification Problem \n", 55 | "\n", 56 | "# 34 - indepedent variable " 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "#Checking the number of 'Yes' and 'No' in 'Attrition'\n", 66 | "ax = sns.catplot(x=\"Attrition\", kind=\"count\", palette=\"ch:.25\", data=df);\n", 67 | "ax.set(xlabel = 'Attrition', ylabel = 'Number of Employees')\n", 68 | "plt.show()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "checking if any missing values in the dataframe. " 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "#Identifying columns with missing information\n", 85 | "missing_col = df.columns[df.isnull().any()].values\n", 86 | "print('The missing columns in the dataset are: ',missing_col)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "df.isnull().sum()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "df.info()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Step 2 - Feature Engineering\n", 112 | "\n", 113 | "The numeric and categorical fields need to be treated separately.The following few steps separate the numeric and categorical fields and drops the target field 'Attrition' from the feature set." 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "df.dtypes" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "df['JobRole'].head()" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "df.shape" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "#Extracting the Numeric and Categorical features\n", 150 | "df_num = pd.DataFrame(data = df.select_dtypes(include = ['int64']))\n", 151 | "df_cat = pd.DataFrame(data = df.select_dtypes(include = ['object']))\n", 152 | "print(\"Shape of Numeric: \",df_num.shape)\n", 153 | "print(\"Shape of Categorical: \",df_cat.shape)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "### 2.1 Encoding Categorical Fields\n", 161 | "\n", 162 | "The categorical fields have been encoded using the get_dummies() function of Pandas." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "#Dropping 'Attrition' from df_cat before encoding\n", 172 | "df_cat = df_cat.drop(['Attrition'], axis=1) \n", 173 | "\n", 174 | "#Encoding using Pandas' get_dummies\n", 175 | "df_cat_encoded = pd.get_dummies(df_cat)\n", 176 | "df_cat_encoded.head(5)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "### 2.2 Scaling Numeric Fields\n", 184 | "\n", 185 | "The numeric fields have been scaled next for best results. `StandardScaler()` has been used for the same. After scaling the numeric features, they will be merged with the categorical features." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "#Combining the Categorical and Numeric features\n", 195 | "df_transformed_final = pd.concat([df_num_scaled,df_cat_encoded], axis = 1)\n", 196 | "print(\"Shape of final dataframe: \",df_transformed_final.shape)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "#Combining the Categorical and Numeric features\n", 206 | "df_transformed_final = pd.concat([df_num,df_cat_encoded], axis = 1)\n", 207 | "print(\"Shape of final dataframe: \",df_transformed_final.shape)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "#Extracting the target variable - 'Attrition'\n", 217 | "target = df['Attrition']\n", 218 | "\n", 219 | "#Mapping 'Yes' to 1 and 'No' to 0\n", 220 | "map = {'Yes':1, 'No':0}\n", 221 | "target = target.apply(lambda x: map[x])\n", 222 | "\n", 223 | "print(\"Shape of target: \",target.shape)\n", 224 | "\n", 225 | "X = df_transformed_final #Features\n", 226 | "y = target #Target" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "### 2.3 Train and Test Split\n", 234 | "\n", 235 | "The data is next split into training and test dataset using the train_test_split functionality of sklearn." 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "#Splitting into Train and Test dataset in 80-20 ratio\n", 245 | "X_train, X_test, y_train, y_test = train_test_split(X,y,train_size = 0.8, random_state = 0, stratify = y)\n", 246 | "print(\"Shape of X Train: \",X_train.shape)\n", 247 | "print(\"Shape of X Test: \",X_test.shape)\n", 248 | "print(\"Shape of y Train: \",y_train.shape)\n", 249 | "print(\"Shape of y Test: \",y_test.shape)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "## Step 3 - Model Fitting\n", 257 | "\n" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "# Adaboost Classifier\n", 265 | "##### The most important parameters are base_estimator, n_estimators and learning_rate.\n", 266 | "\n", 267 | "##### 1. base_estimator - It is the learning algorithm to use to train the weak models. The default Learning Algorithm is DecisionTreeClassifier with Max Depth of 1\n", 268 | "\n", 269 | "##### 2. n_estimators - It is the number of models to iteratively train.\n", 270 | "\n", 271 | "##### 3.learning_rate - It is the contribution of each model to the weights and default value for it is 1. There is a trade-off between learning_rate and n_estimators. Reducing the learning rate will forcing the model train slower (but sometimes resulting in better performance scores). Decreasing the learning rate L makes the coefficients α_m smaller, which reduces the amplitude of the sample_weights at each step (As per weight formula we use at each step for updating weights). " 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "#Using adaBoosting to predict 'Attrition' \n", 281 | "adaboost = AdaBoostClassifier(n_estimators=200, random_state=1)\n", 282 | "\n", 283 | "\n", 284 | "# No of Models \n", 285 | "\n", 286 | "# from sklearn.ensemble.AdaBoost\n", 287 | "# Accuracy or AUC is chagin with no of Model ( Weak Models )" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "#Fitting Model\n", 297 | "adaboost.fit(X_train, y_train)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "#pred\n", 307 | "y_pred = adaboost.predict(X_test)\n", 308 | "\n", 309 | "\n", 310 | "# from sklearn.metrics." 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "print('Accuracy of the model is: ',accuracy_score(y_test, y_pred))" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "#Confusion Matrix\n", 329 | "cm = confusion_matrix(y_test, y_pred)\n", 330 | "print('The confusion Matrix : \\n',cm)" 331 | ] 332 | } 333 | ], 334 | "metadata": { 335 | "kernelspec": { 336 | "display_name": "Python 3", 337 | "language": "python", 338 | "name": "python3" 339 | }, 340 | "language_info": { 341 | "codemirror_mode": { 342 | "name": "ipython", 343 | "version": 3 344 | }, 345 | "file_extension": ".py", 346 | "mimetype": "text/x-python", 347 | "name": "python", 348 | "nbconvert_exporter": "python", 349 | "pygments_lexer": "ipython3", 350 | "version": "3.7.7" 351 | } 352 | }, 353 | "nbformat": 4, 354 | "nbformat_minor": 1 355 | } 356 | -------------------------------------------------------------------------------- /Gradient Boosting/GBM-Classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "_cell_guid": "adfbe30e-7ebb-0f88-d917-d9a8f97c638e" 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/html": [ 13 | " \n", 28 | " " 29 | ] 30 | }, 31 | "metadata": {}, 32 | "output_type": "display_data" 33 | } 34 | ], 35 | "source": [ 36 | "import numpy as np \n", 37 | "import pandas as pd \n", 38 | "\n", 39 | "\n", 40 | "import plotly.offline as py\n", 41 | "py.init_notebook_mode(connected=True)\n", 42 | "import plotly.graph_objs as go\n", 43 | "import plotly.tools as tls\n", 44 | "import seaborn as sns\n", 45 | "import matplotlib.pyplot as plt\n", 46 | "%matplotlib inline\n", 47 | "\n", 48 | "\n", 49 | "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", 50 | "from sklearn.linear_model import LogisticRegression\n", 51 | "from sklearn.metrics import accuracy_score, log_loss\n", 52 | "from imblearn.over_sampling import SMOTE\n", 53 | "import xgboost as xgb\n", 54 | "from sklearn.model_selection import train_test_split\n", 55 | "\n", 56 | "\n", 57 | "# Import and suppress warnings\n", 58 | "import warnings\n", 59 | "warnings.filterwarnings('ignore')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "_cell_guid": "5af03c82-cb84-d943-f82c-fc0a15d46b48" 66 | }, 67 | "source": [ 68 | "# 1. Exploratory Data Analysis\n", 69 | "\n", 70 | "Let us load in the dataset via the trusty Pandas package into a dataframe object which we call **attrition** and have a quick look at the first few rows" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": { 77 | "_cell_guid": "e035b071-50f8-43ca-9611-fc47272bb05e" 78 | }, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/html": [ 83 | "
\n", 84 | "\n", 97 | "\n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
AgeAttritionBusinessTravelDailyRateDepartmentDistanceFromHomeEducationEducationFieldEmployeeCountEmployeeNumber...RelationshipSatisfactionStandardHoursStockOptionLevelTotalWorkingYearsTrainingTimesLastYearWorkLifeBalanceYearsAtCompanyYearsInCurrentRoleYearsSinceLastPromotionYearsWithCurrManager
041YesTravel_Rarely1102Sales12Life Sciences11...18008016405
149NoTravel_Frequently279Research & Development81Life Sciences12...4801103310717
237YesTravel_Rarely1373Research & Development22Other14...28007330000
333NoTravel_Frequently1392Research & Development34Life Sciences15...38008338730
427NoTravel_Rarely591Research & Development21Medical17...48016332222
\n", 247 | "

5 rows × 35 columns

\n", 248 | "
" 249 | ], 250 | "text/plain": [ 251 | " Age Attrition BusinessTravel DailyRate Department \\\n", 252 | "0 41 Yes Travel_Rarely 1102 Sales \n", 253 | "1 49 No Travel_Frequently 279 Research & Development \n", 254 | "2 37 Yes Travel_Rarely 1373 Research & Development \n", 255 | "3 33 No Travel_Frequently 1392 Research & Development \n", 256 | "4 27 No Travel_Rarely 591 Research & Development \n", 257 | "\n", 258 | " DistanceFromHome Education EducationField EmployeeCount EmployeeNumber \\\n", 259 | "0 1 2 Life Sciences 1 1 \n", 260 | "1 8 1 Life Sciences 1 2 \n", 261 | "2 2 2 Other 1 4 \n", 262 | "3 3 4 Life Sciences 1 5 \n", 263 | "4 2 1 Medical 1 7 \n", 264 | "\n", 265 | " ... RelationshipSatisfaction StandardHours StockOptionLevel \\\n", 266 | "0 ... 1 80 0 \n", 267 | "1 ... 4 80 1 \n", 268 | "2 ... 2 80 0 \n", 269 | "3 ... 3 80 0 \n", 270 | "4 ... 4 80 1 \n", 271 | "\n", 272 | " TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany \\\n", 273 | "0 8 0 1 6 \n", 274 | "1 10 3 3 10 \n", 275 | "2 7 3 3 0 \n", 276 | "3 8 3 3 8 \n", 277 | "4 6 3 3 2 \n", 278 | "\n", 279 | " YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager \n", 280 | "0 4 0 5 \n", 281 | "1 7 1 7 \n", 282 | "2 0 0 0 \n", 283 | "3 7 3 0 \n", 284 | "4 2 2 2 \n", 285 | "\n", 286 | "[5 rows x 35 columns]" 287 | ] 288 | }, 289 | "execution_count": 3, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "attrition = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')\n", 296 | "attrition.head()" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 4, 302 | "metadata": { 303 | "_cell_guid": "57e2bf45-5920-af03-50c1-b5bba334eb11" 304 | }, 305 | "outputs": [ 306 | { 307 | "data": { 308 | "text/plain": [ 309 | "Age False\n", 310 | "Attrition False\n", 311 | "BusinessTravel False\n", 312 | "DailyRate False\n", 313 | "Department False\n", 314 | "DistanceFromHome False\n", 315 | "Education False\n", 316 | "EducationField False\n", 317 | "EmployeeCount False\n", 318 | "EmployeeNumber False\n", 319 | "EnvironmentSatisfaction False\n", 320 | "Gender False\n", 321 | "HourlyRate False\n", 322 | "JobInvolvement False\n", 323 | "JobLevel False\n", 324 | "JobRole False\n", 325 | "JobSatisfaction False\n", 326 | "MaritalStatus False\n", 327 | "MonthlyIncome False\n", 328 | "MonthlyRate False\n", 329 | "NumCompaniesWorked False\n", 330 | "Over18 False\n", 331 | "OverTime False\n", 332 | "PercentSalaryHike False\n", 333 | "PerformanceRating False\n", 334 | "RelationshipSatisfaction False\n", 335 | "StandardHours False\n", 336 | "StockOptionLevel False\n", 337 | "TotalWorkingYears False\n", 338 | "TrainingTimesLastYear False\n", 339 | "WorkLifeBalance False\n", 340 | "YearsAtCompany False\n", 341 | "YearsInCurrentRole False\n", 342 | "YearsSinceLastPromotion False\n", 343 | "YearsWithCurrManager False\n", 344 | "dtype: bool" 345 | ] 346 | }, 347 | "execution_count": 4, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "# Looking for NaN\n", 354 | "attrition.isnull().any()" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 5, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "# attrition.Age.fillna('')" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": { 369 | "_cell_guid": "5c5dc2ed-7608-4d84-c4f6-c591a3be7570" 370 | }, 371 | "source": [ 372 | "### Correlation of Features\n" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 6, 378 | "metadata": {}, 379 | "outputs": [ 380 | { 381 | "data": { 382 | "text/html": [ 383 | "
\n", 384 | "\n", 397 | "\n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | "
AgeDailyRateDistanceFromHomeEducationEmployeeCountEmployeeNumberEnvironmentSatisfactionHourlyRateJobInvolvementJobLevel...RelationshipSatisfactionStandardHoursStockOptionLevelTotalWorkingYearsTrainingTimesLastYearWorkLifeBalanceYearsAtCompanyYearsInCurrentRoleYearsSinceLastPromotionYearsWithCurrManager
Age1.0000000.010661-0.0016860.208034NaN-0.0101450.0101460.0242870.0298200.509604...0.053535NaN0.0375100.680381-0.019621-0.0214900.3113090.2129010.2165130.202089
DailyRate0.0106611.000000-0.004985-0.016806NaN-0.0509900.0183550.0233810.0461350.002966...0.007846NaN0.0421430.0145150.002453-0.037848-0.0340550.009932-0.033229-0.026363
DistanceFromHome-0.001686-0.0049851.0000000.021042NaN0.032916-0.0160750.0311310.0087830.005303...0.006557NaN0.0448720.004628-0.036942-0.0265560.0095080.0188450.0100290.014406
Education0.208034-0.0168060.0210421.000000NaN0.042070-0.0271280.0167750.0424380.101589...-0.009118NaN0.0184220.148280-0.0251000.0098190.0691140.0602360.0542540.069065
EmployeeCountNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
EmployeeNumber-0.010145-0.0509900.0329160.042070NaN1.0000000.0176210.035179-0.006888-0.018519...-0.069861NaN0.062227-0.0143650.0236030.010309-0.011240-0.008416-0.009019-0.009197
EnvironmentSatisfaction0.0101460.018355-0.016075-0.027128NaN0.0176211.000000-0.049857-0.0082780.001212...0.007665NaN0.003432-0.002693-0.0193590.0276270.0014580.0180070.016194-0.004999
HourlyRate0.0242870.0233810.0311310.016775NaN0.035179-0.0498571.0000000.042861-0.027853...0.001330NaN0.050263-0.002334-0.008548-0.004607-0.019582-0.024106-0.026716-0.020123
JobInvolvement0.0298200.0461350.0087830.042438NaN-0.006888-0.0082780.0428611.000000-0.012630...0.034297NaN0.021523-0.005533-0.015338-0.014617-0.0213550.008717-0.0241840.025976
JobLevel0.5096040.0029660.0053030.101589NaN-0.0185190.001212-0.027853-0.0126301.000000...0.021642NaN0.0139840.782208-0.0181910.0378180.5347390.3894470.3538850.375281
JobSatisfaction-0.0048920.030571-0.003669-0.011296NaN-0.046247-0.006784-0.071335-0.021476-0.001944...-0.012454NaN0.010690-0.020185-0.005779-0.019459-0.003803-0.002305-0.018214-0.027656
MonthlyIncome0.4978550.007707-0.0170140.094961NaN-0.014829-0.006259-0.015794-0.0152710.950300...0.025873NaN0.0054080.772893-0.0217360.0306830.5142850.3638180.3449780.344079
MonthlyRate0.028051-0.0321820.027473-0.026084NaN0.0126480.037600-0.015297-0.0163220.039563...-0.004085NaN-0.0343230.0264420.0014670.007963-0.023655-0.0128150.001567-0.036746
NumCompaniesWorked0.2996350.038153-0.0292510.126317NaN-0.0012510.0125940.0221570.0150120.142501...0.052733NaN0.0300750.237639-0.066054-0.008366-0.118421-0.090754-0.036814-0.110319
PercentSalaryHike0.0036340.0227040.040235-0.011111NaN-0.012944-0.031701-0.009062-0.017205-0.034730...-0.040490NaN0.007528-0.020608-0.005221-0.003280-0.035991-0.001520-0.022154-0.011985
PerformanceRating0.0019040.0004730.027110-0.024539NaN-0.020359-0.029548-0.002172-0.029071-0.021222...-0.031351NaN0.0035060.006744-0.0155790.0025720.0034350.0349860.0178960.022827
RelationshipSatisfaction0.0535350.0078460.006557-0.009118NaN-0.0698610.0076650.0013300.0342970.021642...1.000000NaN-0.0459520.0240540.0024970.0196040.019367-0.0151230.033493-0.000867
StandardHoursNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
StockOptionLevel0.0375100.0421430.0448720.018422NaN0.0622270.0034320.0502630.0215230.013984...-0.045952NaN1.0000000.0101360.0112740.0041290.0150580.0508180.0143520.024698
TotalWorkingYears0.6803810.0145150.0046280.148280NaN-0.014365-0.002693-0.002334-0.0055330.782208...0.024054NaN0.0101361.000000-0.0356620.0010080.6281330.4603650.4048580.459188
TrainingTimesLastYear-0.0196210.002453-0.036942-0.025100NaN0.023603-0.019359-0.008548-0.015338-0.018191...0.002497NaN0.011274-0.0356621.0000000.0280720.003569-0.005738-0.002067-0.004096
WorkLifeBalance-0.021490-0.037848-0.0265560.009819NaN0.0103090.027627-0.004607-0.0146170.037818...0.019604NaN0.0041290.0010080.0280721.0000000.0120890.0498560.0089410.002759
YearsAtCompany0.311309-0.0340550.0095080.069114NaN-0.0112400.001458-0.019582-0.0213550.534739...0.019367NaN0.0150580.6281330.0035690.0120891.0000000.7587540.6184090.769212
YearsInCurrentRole0.2129010.0099320.0188450.060236NaN-0.0084160.018007-0.0241060.0087170.389447...-0.015123NaN0.0508180.460365-0.0057380.0498560.7587541.0000000.5480560.714365
YearsSinceLastPromotion0.216513-0.0332290.0100290.054254NaN-0.0090190.016194-0.026716-0.0241840.353885...0.033493NaN0.0143520.404858-0.0020670.0089410.6184090.5480561.0000000.510224
YearsWithCurrManager0.202089-0.0263630.0144060.069065NaN-0.009197-0.004999-0.0201230.0259760.375281...-0.000867NaN0.0246980.459188-0.0040960.0027590.7692120.7143650.5102241.000000
\n", 1051 | "

26 rows × 26 columns

\n", 1052 | "
" 1053 | ], 1054 | "text/plain": [ 1055 | " Age DailyRate DistanceFromHome Education \\\n", 1056 | "Age 1.000000 0.010661 -0.001686 0.208034 \n", 1057 | "DailyRate 0.010661 1.000000 -0.004985 -0.016806 \n", 1058 | "DistanceFromHome -0.001686 -0.004985 1.000000 0.021042 \n", 1059 | "Education 0.208034 -0.016806 0.021042 1.000000 \n", 1060 | "EmployeeCount NaN NaN NaN NaN \n", 1061 | "EmployeeNumber -0.010145 -0.050990 0.032916 0.042070 \n", 1062 | "EnvironmentSatisfaction 0.010146 0.018355 -0.016075 -0.027128 \n", 1063 | "HourlyRate 0.024287 0.023381 0.031131 0.016775 \n", 1064 | "JobInvolvement 0.029820 0.046135 0.008783 0.042438 \n", 1065 | "JobLevel 0.509604 0.002966 0.005303 0.101589 \n", 1066 | "JobSatisfaction -0.004892 0.030571 -0.003669 -0.011296 \n", 1067 | "MonthlyIncome 0.497855 0.007707 -0.017014 0.094961 \n", 1068 | "MonthlyRate 0.028051 -0.032182 0.027473 -0.026084 \n", 1069 | "NumCompaniesWorked 0.299635 0.038153 -0.029251 0.126317 \n", 1070 | "PercentSalaryHike 0.003634 0.022704 0.040235 -0.011111 \n", 1071 | "PerformanceRating 0.001904 0.000473 0.027110 -0.024539 \n", 1072 | "RelationshipSatisfaction 0.053535 0.007846 0.006557 -0.009118 \n", 1073 | "StandardHours NaN NaN NaN NaN \n", 1074 | "StockOptionLevel 0.037510 0.042143 0.044872 0.018422 \n", 1075 | "TotalWorkingYears 0.680381 0.014515 0.004628 0.148280 \n", 1076 | "TrainingTimesLastYear -0.019621 0.002453 -0.036942 -0.025100 \n", 1077 | "WorkLifeBalance -0.021490 -0.037848 -0.026556 0.009819 \n", 1078 | "YearsAtCompany 0.311309 -0.034055 0.009508 0.069114 \n", 1079 | "YearsInCurrentRole 0.212901 0.009932 0.018845 0.060236 \n", 1080 | "YearsSinceLastPromotion 0.216513 -0.033229 0.010029 0.054254 \n", 1081 | "YearsWithCurrManager 0.202089 -0.026363 0.014406 0.069065 \n", 1082 | "\n", 1083 | " EmployeeCount EmployeeNumber \\\n", 1084 | "Age NaN -0.010145 \n", 1085 | "DailyRate NaN -0.050990 \n", 1086 | "DistanceFromHome NaN 0.032916 \n", 1087 | "Education NaN 0.042070 \n", 1088 | "EmployeeCount NaN NaN \n", 1089 | "EmployeeNumber NaN 1.000000 \n", 1090 | "EnvironmentSatisfaction NaN 0.017621 \n", 1091 | "HourlyRate NaN 0.035179 \n", 1092 | "JobInvolvement NaN -0.006888 \n", 1093 | "JobLevel NaN -0.018519 \n", 1094 | "JobSatisfaction NaN -0.046247 \n", 1095 | "MonthlyIncome NaN -0.014829 \n", 1096 | "MonthlyRate NaN 0.012648 \n", 1097 | "NumCompaniesWorked NaN -0.001251 \n", 1098 | "PercentSalaryHike NaN -0.012944 \n", 1099 | "PerformanceRating NaN -0.020359 \n", 1100 | "RelationshipSatisfaction NaN -0.069861 \n", 1101 | "StandardHours NaN NaN \n", 1102 | "StockOptionLevel NaN 0.062227 \n", 1103 | "TotalWorkingYears NaN -0.014365 \n", 1104 | "TrainingTimesLastYear NaN 0.023603 \n", 1105 | "WorkLifeBalance NaN 0.010309 \n", 1106 | "YearsAtCompany NaN -0.011240 \n", 1107 | "YearsInCurrentRole NaN -0.008416 \n", 1108 | "YearsSinceLastPromotion NaN -0.009019 \n", 1109 | "YearsWithCurrManager NaN -0.009197 \n", 1110 | "\n", 1111 | " EnvironmentSatisfaction HourlyRate JobInvolvement \\\n", 1112 | "Age 0.010146 0.024287 0.029820 \n", 1113 | "DailyRate 0.018355 0.023381 0.046135 \n", 1114 | "DistanceFromHome -0.016075 0.031131 0.008783 \n", 1115 | "Education -0.027128 0.016775 0.042438 \n", 1116 | "EmployeeCount NaN NaN NaN \n", 1117 | "EmployeeNumber 0.017621 0.035179 -0.006888 \n", 1118 | "EnvironmentSatisfaction 1.000000 -0.049857 -0.008278 \n", 1119 | "HourlyRate -0.049857 1.000000 0.042861 \n", 1120 | "JobInvolvement -0.008278 0.042861 1.000000 \n", 1121 | "JobLevel 0.001212 -0.027853 -0.012630 \n", 1122 | "JobSatisfaction -0.006784 -0.071335 -0.021476 \n", 1123 | "MonthlyIncome -0.006259 -0.015794 -0.015271 \n", 1124 | "MonthlyRate 0.037600 -0.015297 -0.016322 \n", 1125 | "NumCompaniesWorked 0.012594 0.022157 0.015012 \n", 1126 | "PercentSalaryHike -0.031701 -0.009062 -0.017205 \n", 1127 | "PerformanceRating -0.029548 -0.002172 -0.029071 \n", 1128 | "RelationshipSatisfaction 0.007665 0.001330 0.034297 \n", 1129 | "StandardHours NaN NaN NaN \n", 1130 | "StockOptionLevel 0.003432 0.050263 0.021523 \n", 1131 | "TotalWorkingYears -0.002693 -0.002334 -0.005533 \n", 1132 | "TrainingTimesLastYear -0.019359 -0.008548 -0.015338 \n", 1133 | "WorkLifeBalance 0.027627 -0.004607 -0.014617 \n", 1134 | "YearsAtCompany 0.001458 -0.019582 -0.021355 \n", 1135 | "YearsInCurrentRole 0.018007 -0.024106 0.008717 \n", 1136 | "YearsSinceLastPromotion 0.016194 -0.026716 -0.024184 \n", 1137 | "YearsWithCurrManager -0.004999 -0.020123 0.025976 \n", 1138 | "\n", 1139 | " JobLevel ... RelationshipSatisfaction \\\n", 1140 | "Age 0.509604 ... 0.053535 \n", 1141 | "DailyRate 0.002966 ... 0.007846 \n", 1142 | "DistanceFromHome 0.005303 ... 0.006557 \n", 1143 | "Education 0.101589 ... -0.009118 \n", 1144 | "EmployeeCount NaN ... NaN \n", 1145 | "EmployeeNumber -0.018519 ... -0.069861 \n", 1146 | "EnvironmentSatisfaction 0.001212 ... 0.007665 \n", 1147 | "HourlyRate -0.027853 ... 0.001330 \n", 1148 | "JobInvolvement -0.012630 ... 0.034297 \n", 1149 | "JobLevel 1.000000 ... 0.021642 \n", 1150 | "JobSatisfaction -0.001944 ... -0.012454 \n", 1151 | "MonthlyIncome 0.950300 ... 0.025873 \n", 1152 | "MonthlyRate 0.039563 ... -0.004085 \n", 1153 | "NumCompaniesWorked 0.142501 ... 0.052733 \n", 1154 | "PercentSalaryHike -0.034730 ... -0.040490 \n", 1155 | "PerformanceRating -0.021222 ... -0.031351 \n", 1156 | "RelationshipSatisfaction 0.021642 ... 1.000000 \n", 1157 | "StandardHours NaN ... NaN \n", 1158 | "StockOptionLevel 0.013984 ... -0.045952 \n", 1159 | "TotalWorkingYears 0.782208 ... 0.024054 \n", 1160 | "TrainingTimesLastYear -0.018191 ... 0.002497 \n", 1161 | "WorkLifeBalance 0.037818 ... 0.019604 \n", 1162 | "YearsAtCompany 0.534739 ... 0.019367 \n", 1163 | "YearsInCurrentRole 0.389447 ... -0.015123 \n", 1164 | "YearsSinceLastPromotion 0.353885 ... 0.033493 \n", 1165 | "YearsWithCurrManager 0.375281 ... -0.000867 \n", 1166 | "\n", 1167 | " StandardHours StockOptionLevel TotalWorkingYears \\\n", 1168 | "Age NaN 0.037510 0.680381 \n", 1169 | "DailyRate NaN 0.042143 0.014515 \n", 1170 | "DistanceFromHome NaN 0.044872 0.004628 \n", 1171 | "Education NaN 0.018422 0.148280 \n", 1172 | "EmployeeCount NaN NaN NaN \n", 1173 | "EmployeeNumber NaN 0.062227 -0.014365 \n", 1174 | "EnvironmentSatisfaction NaN 0.003432 -0.002693 \n", 1175 | "HourlyRate NaN 0.050263 -0.002334 \n", 1176 | "JobInvolvement NaN 0.021523 -0.005533 \n", 1177 | "JobLevel NaN 0.013984 0.782208 \n", 1178 | "JobSatisfaction NaN 0.010690 -0.020185 \n", 1179 | "MonthlyIncome NaN 0.005408 0.772893 \n", 1180 | "MonthlyRate NaN -0.034323 0.026442 \n", 1181 | "NumCompaniesWorked NaN 0.030075 0.237639 \n", 1182 | "PercentSalaryHike NaN 0.007528 -0.020608 \n", 1183 | "PerformanceRating NaN 0.003506 0.006744 \n", 1184 | "RelationshipSatisfaction NaN -0.045952 0.024054 \n", 1185 | "StandardHours NaN NaN NaN \n", 1186 | "StockOptionLevel NaN 1.000000 0.010136 \n", 1187 | "TotalWorkingYears NaN 0.010136 1.000000 \n", 1188 | "TrainingTimesLastYear NaN 0.011274 -0.035662 \n", 1189 | "WorkLifeBalance NaN 0.004129 0.001008 \n", 1190 | "YearsAtCompany NaN 0.015058 0.628133 \n", 1191 | "YearsInCurrentRole NaN 0.050818 0.460365 \n", 1192 | "YearsSinceLastPromotion NaN 0.014352 0.404858 \n", 1193 | "YearsWithCurrManager NaN 0.024698 0.459188 \n", 1194 | "\n", 1195 | " TrainingTimesLastYear WorkLifeBalance \\\n", 1196 | "Age -0.019621 -0.021490 \n", 1197 | "DailyRate 0.002453 -0.037848 \n", 1198 | "DistanceFromHome -0.036942 -0.026556 \n", 1199 | "Education -0.025100 0.009819 \n", 1200 | "EmployeeCount NaN NaN \n", 1201 | "EmployeeNumber 0.023603 0.010309 \n", 1202 | "EnvironmentSatisfaction -0.019359 0.027627 \n", 1203 | "HourlyRate -0.008548 -0.004607 \n", 1204 | "JobInvolvement -0.015338 -0.014617 \n", 1205 | "JobLevel -0.018191 0.037818 \n", 1206 | "JobSatisfaction -0.005779 -0.019459 \n", 1207 | "MonthlyIncome -0.021736 0.030683 \n", 1208 | "MonthlyRate 0.001467 0.007963 \n", 1209 | "NumCompaniesWorked -0.066054 -0.008366 \n", 1210 | "PercentSalaryHike -0.005221 -0.003280 \n", 1211 | "PerformanceRating -0.015579 0.002572 \n", 1212 | "RelationshipSatisfaction 0.002497 0.019604 \n", 1213 | "StandardHours NaN NaN \n", 1214 | "StockOptionLevel 0.011274 0.004129 \n", 1215 | "TotalWorkingYears -0.035662 0.001008 \n", 1216 | "TrainingTimesLastYear 1.000000 0.028072 \n", 1217 | "WorkLifeBalance 0.028072 1.000000 \n", 1218 | "YearsAtCompany 0.003569 0.012089 \n", 1219 | "YearsInCurrentRole -0.005738 0.049856 \n", 1220 | "YearsSinceLastPromotion -0.002067 0.008941 \n", 1221 | "YearsWithCurrManager -0.004096 0.002759 \n", 1222 | "\n", 1223 | " YearsAtCompany YearsInCurrentRole \\\n", 1224 | "Age 0.311309 0.212901 \n", 1225 | "DailyRate -0.034055 0.009932 \n", 1226 | "DistanceFromHome 0.009508 0.018845 \n", 1227 | "Education 0.069114 0.060236 \n", 1228 | "EmployeeCount NaN NaN \n", 1229 | "EmployeeNumber -0.011240 -0.008416 \n", 1230 | "EnvironmentSatisfaction 0.001458 0.018007 \n", 1231 | "HourlyRate -0.019582 -0.024106 \n", 1232 | "JobInvolvement -0.021355 0.008717 \n", 1233 | "JobLevel 0.534739 0.389447 \n", 1234 | "JobSatisfaction -0.003803 -0.002305 \n", 1235 | "MonthlyIncome 0.514285 0.363818 \n", 1236 | "MonthlyRate -0.023655 -0.012815 \n", 1237 | "NumCompaniesWorked -0.118421 -0.090754 \n", 1238 | "PercentSalaryHike -0.035991 -0.001520 \n", 1239 | "PerformanceRating 0.003435 0.034986 \n", 1240 | "RelationshipSatisfaction 0.019367 -0.015123 \n", 1241 | "StandardHours NaN NaN \n", 1242 | "StockOptionLevel 0.015058 0.050818 \n", 1243 | "TotalWorkingYears 0.628133 0.460365 \n", 1244 | "TrainingTimesLastYear 0.003569 -0.005738 \n", 1245 | "WorkLifeBalance 0.012089 0.049856 \n", 1246 | "YearsAtCompany 1.000000 0.758754 \n", 1247 | "YearsInCurrentRole 0.758754 1.000000 \n", 1248 | "YearsSinceLastPromotion 0.618409 0.548056 \n", 1249 | "YearsWithCurrManager 0.769212 0.714365 \n", 1250 | "\n", 1251 | " YearsSinceLastPromotion YearsWithCurrManager \n", 1252 | "Age 0.216513 0.202089 \n", 1253 | "DailyRate -0.033229 -0.026363 \n", 1254 | "DistanceFromHome 0.010029 0.014406 \n", 1255 | "Education 0.054254 0.069065 \n", 1256 | "EmployeeCount NaN NaN \n", 1257 | "EmployeeNumber -0.009019 -0.009197 \n", 1258 | "EnvironmentSatisfaction 0.016194 -0.004999 \n", 1259 | "HourlyRate -0.026716 -0.020123 \n", 1260 | "JobInvolvement -0.024184 0.025976 \n", 1261 | "JobLevel 0.353885 0.375281 \n", 1262 | "JobSatisfaction -0.018214 -0.027656 \n", 1263 | "MonthlyIncome 0.344978 0.344079 \n", 1264 | "MonthlyRate 0.001567 -0.036746 \n", 1265 | "NumCompaniesWorked -0.036814 -0.110319 \n", 1266 | "PercentSalaryHike -0.022154 -0.011985 \n", 1267 | "PerformanceRating 0.017896 0.022827 \n", 1268 | "RelationshipSatisfaction 0.033493 -0.000867 \n", 1269 | "StandardHours NaN NaN \n", 1270 | "StockOptionLevel 0.014352 0.024698 \n", 1271 | "TotalWorkingYears 0.404858 0.459188 \n", 1272 | "TrainingTimesLastYear -0.002067 -0.004096 \n", 1273 | "WorkLifeBalance 0.008941 0.002759 \n", 1274 | "YearsAtCompany 0.618409 0.769212 \n", 1275 | "YearsInCurrentRole 0.548056 0.714365 \n", 1276 | "YearsSinceLastPromotion 1.000000 0.510224 \n", 1277 | "YearsWithCurrManager 0.510224 1.000000 \n", 1278 | "\n", 1279 | "[26 rows x 26 columns]" 1280 | ] 1281 | }, 1282 | "execution_count": 6, 1283 | "metadata": {}, 1284 | "output_type": "execute_result" 1285 | } 1286 | ], 1287 | "source": [ 1288 | "attrition.corr()" 1289 | ] 1290 | }, 1291 | { 1292 | "cell_type": "markdown", 1293 | "metadata": { 1294 | "_cell_guid": "112cef65-78b8-7790-e705-b173beea6986" 1295 | }, 1296 | "source": [ 1297 | "# Feature Engineering & Categorical Encoding\n", 1298 | "\n", 1299 | "Task of Feature engineering and numerically encoding the categorical values in our dataset." 1300 | ] 1301 | }, 1302 | { 1303 | "cell_type": "code", 1304 | "execution_count": 7, 1305 | "metadata": {}, 1306 | "outputs": [], 1307 | "source": [ 1308 | "# attrition.shape" 1309 | ] 1310 | }, 1311 | { 1312 | "cell_type": "code", 1313 | "execution_count": 8, 1314 | "metadata": {}, 1315 | "outputs": [ 1316 | { 1317 | "data": { 1318 | "text/plain": [ 1319 | "Age int64\n", 1320 | "Attrition object\n", 1321 | "BusinessTravel object\n", 1322 | "DailyRate int64\n", 1323 | "Department object\n", 1324 | "DistanceFromHome int64\n", 1325 | "Education int64\n", 1326 | "EducationField object\n", 1327 | "EmployeeCount int64\n", 1328 | "EmployeeNumber int64\n", 1329 | "EnvironmentSatisfaction int64\n", 1330 | "Gender object\n", 1331 | "HourlyRate int64\n", 1332 | "JobInvolvement int64\n", 1333 | "JobLevel int64\n", 1334 | "JobRole object\n", 1335 | "JobSatisfaction int64\n", 1336 | "MaritalStatus object\n", 1337 | "MonthlyIncome int64\n", 1338 | "MonthlyRate int64\n", 1339 | "NumCompaniesWorked int64\n", 1340 | "Over18 object\n", 1341 | "OverTime object\n", 1342 | "PercentSalaryHike int64\n", 1343 | "PerformanceRating int64\n", 1344 | "RelationshipSatisfaction int64\n", 1345 | "StandardHours int64\n", 1346 | "StockOptionLevel int64\n", 1347 | "TotalWorkingYears int64\n", 1348 | "TrainingTimesLastYear int64\n", 1349 | "WorkLifeBalance int64\n", 1350 | "YearsAtCompany int64\n", 1351 | "YearsInCurrentRole int64\n", 1352 | "YearsSinceLastPromotion int64\n", 1353 | "YearsWithCurrManager int64\n", 1354 | "dtype: object" 1355 | ] 1356 | }, 1357 | "execution_count": 8, 1358 | "metadata": {}, 1359 | "output_type": "execute_result" 1360 | } 1361 | ], 1362 | "source": [ 1363 | "attrition.dtypes" 1364 | ] 1365 | }, 1366 | { 1367 | "cell_type": "code", 1368 | "execution_count": 9, 1369 | "metadata": { 1370 | "_cell_guid": "937385c7-7b7f-f6d0-d974-0527a7118e98" 1371 | }, 1372 | "outputs": [], 1373 | "source": [ 1374 | "# Empty list to store columns with categorical data\n", 1375 | "categorical = []\n", 1376 | "for col, value in attrition.iteritems():\n", 1377 | " if value.dtype == 'object':\n", 1378 | " categorical.append(col)\n", 1379 | "\n", 1380 | "# Store the numerical columns in a list numerical\n", 1381 | "numerical = attrition.columns.difference(categorical)" 1382 | ] 1383 | }, 1384 | { 1385 | "cell_type": "code", 1386 | "execution_count": 10, 1387 | "metadata": {}, 1388 | "outputs": [ 1389 | { 1390 | "data": { 1391 | "text/plain": [ 1392 | "Index(['Age', 'DailyRate', 'DistanceFromHome', 'Education', 'EmployeeCount',\n", 1393 | " 'EmployeeNumber', 'EnvironmentSatisfaction', 'HourlyRate',\n", 1394 | " 'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome',\n", 1395 | " 'MonthlyRate', 'NumCompaniesWorked', 'PercentSalaryHike',\n", 1396 | " 'PerformanceRating', 'RelationshipSatisfaction', 'StandardHours',\n", 1397 | " 'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear',\n", 1398 | " 'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole',\n", 1399 | " 'YearsSinceLastPromotion', 'YearsWithCurrManager'],\n", 1400 | " dtype='object')" 1401 | ] 1402 | }, 1403 | "execution_count": 10, 1404 | "metadata": {}, 1405 | "output_type": "execute_result" 1406 | } 1407 | ], 1408 | "source": [ 1409 | "numerical" 1410 | ] 1411 | }, 1412 | { 1413 | "cell_type": "code", 1414 | "execution_count": 11, 1415 | "metadata": {}, 1416 | "outputs": [ 1417 | { 1418 | "data": { 1419 | "text/plain": [ 1420 | "['Attrition',\n", 1421 | " 'BusinessTravel',\n", 1422 | " 'Department',\n", 1423 | " 'EducationField',\n", 1424 | " 'Gender',\n", 1425 | " 'JobRole',\n", 1426 | " 'MaritalStatus',\n", 1427 | " 'Over18',\n", 1428 | " 'OverTime']" 1429 | ] 1430 | }, 1431 | "execution_count": 11, 1432 | "metadata": {}, 1433 | "output_type": "execute_result" 1434 | } 1435 | ], 1436 | "source": [ 1437 | "categorical" 1438 | ] 1439 | }, 1440 | { 1441 | "cell_type": "code", 1442 | "execution_count": 12, 1443 | "metadata": { 1444 | "_cell_guid": "5ec5cd49-f8b3-e36b-75dd-ac95fe0373ac" 1445 | }, 1446 | "outputs": [], 1447 | "source": [ 1448 | "# Store the categorical data in a dataframe called attrition_cat\n", 1449 | "attrition_cat = attrition[categorical]\n", 1450 | "attrition_cat = attrition_cat.drop(['Attrition'], axis=1) # Dropping the target column" 1451 | ] 1452 | }, 1453 | { 1454 | "cell_type": "code", 1455 | "execution_count": 13, 1456 | "metadata": {}, 1457 | "outputs": [ 1458 | { 1459 | "data": { 1460 | "text/html": [ 1461 | "
\n", 1462 | "\n", 1475 | "\n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | "
BusinessTravelDepartmentEducationFieldGenderJobRoleMaritalStatusOver18OverTime
0Travel_RarelySalesLife SciencesFemaleSales ExecutiveSingleYYes
1Travel_FrequentlyResearch & DevelopmentLife SciencesMaleResearch ScientistMarriedYNo
2Travel_RarelyResearch & DevelopmentOtherMaleLaboratory TechnicianSingleYYes
3Travel_FrequentlyResearch & DevelopmentLife SciencesFemaleResearch ScientistMarriedYYes
4Travel_RarelyResearch & DevelopmentMedicalMaleLaboratory TechnicianMarriedYNo
...........................
1465Travel_FrequentlyResearch & DevelopmentMedicalMaleLaboratory TechnicianMarriedYNo
1466Travel_RarelyResearch & DevelopmentMedicalMaleHealthcare RepresentativeMarriedYNo
1467Travel_RarelyResearch & DevelopmentLife SciencesMaleManufacturing DirectorMarriedYYes
1468Travel_FrequentlySalesMedicalMaleSales ExecutiveMarriedYNo
1469Travel_RarelyResearch & DevelopmentMedicalMaleLaboratory TechnicianMarriedYNo
\n", 1613 | "

1470 rows × 8 columns

\n", 1614 | "
" 1615 | ], 1616 | "text/plain": [ 1617 | " BusinessTravel Department EducationField Gender \\\n", 1618 | "0 Travel_Rarely Sales Life Sciences Female \n", 1619 | "1 Travel_Frequently Research & Development Life Sciences Male \n", 1620 | "2 Travel_Rarely Research & Development Other Male \n", 1621 | "3 Travel_Frequently Research & Development Life Sciences Female \n", 1622 | "4 Travel_Rarely Research & Development Medical Male \n", 1623 | "... ... ... ... ... \n", 1624 | "1465 Travel_Frequently Research & Development Medical Male \n", 1625 | "1466 Travel_Rarely Research & Development Medical Male \n", 1626 | "1467 Travel_Rarely Research & Development Life Sciences Male \n", 1627 | "1468 Travel_Frequently Sales Medical Male \n", 1628 | "1469 Travel_Rarely Research & Development Medical Male \n", 1629 | "\n", 1630 | " JobRole MaritalStatus Over18 OverTime \n", 1631 | "0 Sales Executive Single Y Yes \n", 1632 | "1 Research Scientist Married Y No \n", 1633 | "2 Laboratory Technician Single Y Yes \n", 1634 | "3 Research Scientist Married Y Yes \n", 1635 | "4 Laboratory Technician Married Y No \n", 1636 | "... ... ... ... ... \n", 1637 | "1465 Laboratory Technician Married Y No \n", 1638 | "1466 Healthcare Representative Married Y No \n", 1639 | "1467 Manufacturing Director Married Y Yes \n", 1640 | "1468 Sales Executive Married Y No \n", 1641 | "1469 Laboratory Technician Married Y No \n", 1642 | "\n", 1643 | "[1470 rows x 8 columns]" 1644 | ] 1645 | }, 1646 | "execution_count": 13, 1647 | "metadata": {}, 1648 | "output_type": "execute_result" 1649 | } 1650 | ], 1651 | "source": [ 1652 | "attrition_cat" 1653 | ] 1654 | }, 1655 | { 1656 | "cell_type": "markdown", 1657 | "metadata": { 1658 | "_cell_guid": "7c3c0c95-3725-80dd-0a73-5c840451a438" 1659 | }, 1660 | "source": [ 1661 | "Applying the **get_dummies** method" 1662 | ] 1663 | }, 1664 | { 1665 | "cell_type": "code", 1666 | "execution_count": 14, 1667 | "metadata": {}, 1668 | "outputs": [], 1669 | "source": [ 1670 | "# How can you convert categorial or string or object data into Numerical Format ?\n", 1671 | "\n", 1672 | "# Process of converting your cat data into numerical format - Encoding process \n", 1673 | "\n", 1674 | "# Encoding (15 More )\n", 1675 | "\n", 1676 | "# Label Encoding \n", 1677 | "\n", 1678 | "# One Hot Encoding ( OHE)\n", 1679 | "\n", 1680 | "# Cat_A \n", 1681 | "\n", 1682 | "# Male\n", 1683 | "#Female \n", 1684 | "#Male\n", 1685 | "#Female\n", 1686 | "# Prefer_not_to_say\n", 1687 | "# Male \n", 1688 | "\n", 1689 | "# OHE \n", 1690 | "\n", 1691 | " # Cat_A_Male #Cat_A_Female #Cat_A_Prefer_not_to_say\n", 1692 | "#1# Male 1 0 0 \n", 1693 | "#2#Female 0 1 0\n", 1694 | "#3#Male 1 0 0\n", 1695 | "#4#Female 0 1 0\n", 1696 | "#5# Prefer_not_to_say 0 0 1\n", 1697 | "#6# Male \n", 1698 | "\n", 1699 | "\n", 1700 | "\n", 1701 | "# Label Encoding \n", 1702 | "\n", 1703 | "# Cat_A \n", 1704 | "\n", 1705 | "# Male 2 \n", 1706 | "#Female 1\n", 1707 | "#Male 2\n", 1708 | "#Female 1\n", 1709 | "# Prefer_not_to_say 3\n", 1710 | "# Male 2\n", 1711 | "\n", 1712 | "# Target Encoding \n", 1713 | "# Mean Encoding \n", 1714 | "\n", 1715 | "\n", 1716 | "\n", 1717 | "\n", 1718 | "\n", 1719 | "\n", 1720 | "\n", 1721 | "\n", 1722 | "\n", 1723 | "\n", 1724 | "\n", 1725 | "\n", 1726 | "\n", 1727 | "\n", 1728 | "\n", 1729 | "\n", 1730 | "\n", 1731 | "\n", 1732 | "\n", 1733 | "\n", 1734 | "\n", 1735 | "\n", 1736 | "\n", 1737 | "\n", 1738 | "\n", 1739 | "\n", 1740 | "\n", 1741 | "\n", 1742 | "\n" 1743 | ] 1744 | }, 1745 | { 1746 | "cell_type": "code", 1747 | "execution_count": 15, 1748 | "metadata": {}, 1749 | "outputs": [ 1750 | { 1751 | "data": { 1752 | "text/plain": [ 1753 | "array([2, 2, 1, 0], dtype=int64)" 1754 | ] 1755 | }, 1756 | "execution_count": 15, 1757 | "metadata": {}, 1758 | "output_type": "execute_result" 1759 | } 1760 | ], 1761 | "source": [ 1762 | "# Filter your object datatypes \n", 1763 | "\n", 1764 | "from sklearn import preprocessing\n", 1765 | "\n", 1766 | "le = preprocessing.LabelEncoder()\n", 1767 | "le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\n", 1768 | "le.transform([\"tokyo\", \"tokyo\", \"paris\",\"amsterdam\"])\n", 1769 | "\n", 1770 | "# list(le.classes_)\n", 1771 | "\n", 1772 | "\n", 1773 | "#0 ,1,2" 1774 | ] 1775 | }, 1776 | { 1777 | "cell_type": "code", 1778 | "execution_count": null, 1779 | "metadata": {}, 1780 | "outputs": [], 1781 | "source": [] 1782 | }, 1783 | { 1784 | "cell_type": "code", 1785 | "execution_count": 16, 1786 | "metadata": { 1787 | "_cell_guid": "7ea5b0d8-1f13-e56b-72cf-bcbe7dd6fad2" 1788 | }, 1789 | "outputs": [ 1790 | { 1791 | "data": { 1792 | "text/html": [ 1793 | "
\n", 1794 | "\n", 1807 | "\n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | "
BusinessTravel_Non-TravelBusinessTravel_Travel_FrequentlyBusinessTravel_Travel_RarelyDepartment_Human ResourcesDepartment_Research & DevelopmentDepartment_SalesEducationField_Human ResourcesEducationField_Life SciencesEducationField_MarketingEducationField_Medical...JobRole_Research DirectorJobRole_Research ScientistJobRole_Sales ExecutiveJobRole_Sales RepresentativeMaritalStatus_DivorcedMaritalStatus_MarriedMaritalStatus_SingleOver18_YOverTime_NoOverTime_Yes
00010010100...0010001101
10100100100...0100010110
20010100000...0000001101
\n", 1909 | "

3 rows × 29 columns

\n", 1910 | "
" 1911 | ], 1912 | "text/plain": [ 1913 | " BusinessTravel_Non-Travel BusinessTravel_Travel_Frequently \\\n", 1914 | "0 0 0 \n", 1915 | "1 0 1 \n", 1916 | "2 0 0 \n", 1917 | "\n", 1918 | " BusinessTravel_Travel_Rarely Department_Human Resources \\\n", 1919 | "0 1 0 \n", 1920 | "1 0 0 \n", 1921 | "2 1 0 \n", 1922 | "\n", 1923 | " Department_Research & Development Department_Sales \\\n", 1924 | "0 0 1 \n", 1925 | "1 1 0 \n", 1926 | "2 1 0 \n", 1927 | "\n", 1928 | " EducationField_Human Resources EducationField_Life Sciences \\\n", 1929 | "0 0 1 \n", 1930 | "1 0 1 \n", 1931 | "2 0 0 \n", 1932 | "\n", 1933 | " EducationField_Marketing EducationField_Medical ... \\\n", 1934 | "0 0 0 ... \n", 1935 | "1 0 0 ... \n", 1936 | "2 0 0 ... \n", 1937 | "\n", 1938 | " JobRole_Research Director JobRole_Research Scientist \\\n", 1939 | "0 0 0 \n", 1940 | "1 0 1 \n", 1941 | "2 0 0 \n", 1942 | "\n", 1943 | " JobRole_Sales Executive JobRole_Sales Representative \\\n", 1944 | "0 1 0 \n", 1945 | "1 0 0 \n", 1946 | "2 0 0 \n", 1947 | "\n", 1948 | " MaritalStatus_Divorced MaritalStatus_Married MaritalStatus_Single \\\n", 1949 | "0 0 0 1 \n", 1950 | "1 0 1 0 \n", 1951 | "2 0 0 1 \n", 1952 | "\n", 1953 | " Over18_Y OverTime_No OverTime_Yes \n", 1954 | "0 1 0 1 \n", 1955 | "1 1 1 0 \n", 1956 | "2 1 0 1 \n", 1957 | "\n", 1958 | "[3 rows x 29 columns]" 1959 | ] 1960 | }, 1961 | "execution_count": 16, 1962 | "metadata": {}, 1963 | "output_type": "execute_result" 1964 | } 1965 | ], 1966 | "source": [ 1967 | "attrition_cat = pd.get_dummies(attrition_cat)\n", 1968 | "attrition_cat.head(3)" 1969 | ] 1970 | }, 1971 | { 1972 | "cell_type": "code", 1973 | "execution_count": 17, 1974 | "metadata": { 1975 | "_cell_guid": "de8b3a57-6aba-eae7-2be3-dbe0ae761d6a" 1976 | }, 1977 | "outputs": [], 1978 | "source": [ 1979 | "# Store the numerical features to a dataframe attrition_num\n", 1980 | "attrition_num = attrition[numerical]" 1981 | ] 1982 | }, 1983 | { 1984 | "cell_type": "markdown", 1985 | "metadata": { 1986 | "_cell_guid": "9de23a93-10b6-33b8-eea8-0cf44c6e5e08" 1987 | }, 1988 | "source": [ 1989 | "let's concat numerical and caterogial dfs" 1990 | ] 1991 | }, 1992 | { 1993 | "cell_type": "code", 1994 | "execution_count": 18, 1995 | "metadata": { 1996 | "_cell_guid": "b90b69ba-f19d-0707-7c2c-183b8d01130f" 1997 | }, 1998 | "outputs": [], 1999 | "source": [ 2000 | "# Concat the two dataframes together columnwise\n", 2001 | "attrition_final = pd.concat([attrition_num, attrition_cat], axis=1)" 2002 | ] 2003 | }, 2004 | { 2005 | "cell_type": "code", 2006 | "execution_count": 19, 2007 | "metadata": {}, 2008 | "outputs": [ 2009 | { 2010 | "data": { 2011 | "text/plain": [ 2012 | "(1470, 55)" 2013 | ] 2014 | }, 2015 | "execution_count": 19, 2016 | "metadata": {}, 2017 | "output_type": "execute_result" 2018 | } 2019 | ], 2020 | "source": [ 2021 | "attrition_final.shape" 2022 | ] 2023 | }, 2024 | { 2025 | "cell_type": "markdown", 2026 | "metadata": { 2027 | "_cell_guid": "1a295568-fab4-b79a-bc0d-be32ad032b3e" 2028 | }, 2029 | "source": [ 2030 | "**Target variable**\n", 2031 | "\n", 2032 | "The target in this case is given by the column **Attrition** which contains categorical variables therefore requires numerical encoding. We numerically encode it by creating a dictionary with the mapping given as 1 : Yes and 0 : No" 2033 | ] 2034 | }, 2035 | { 2036 | "cell_type": "code", 2037 | "execution_count": 20, 2038 | "metadata": { 2039 | "_cell_guid": "bfa5e82f-2dd3-1bee-5b2b-367468be7040" 2040 | }, 2041 | "outputs": [ 2042 | { 2043 | "data": { 2044 | "text/plain": [ 2045 | "0 1\n", 2046 | "1 0\n", 2047 | "2 1\n", 2048 | "Name: Attrition, dtype: int64" 2049 | ] 2050 | }, 2051 | "execution_count": 20, 2052 | "metadata": {}, 2053 | "output_type": "execute_result" 2054 | } 2055 | ], 2056 | "source": [ 2057 | "# Define a dictionary for the target mapping\n", 2058 | "target_map = {'Yes':1, 'No':0}\n", 2059 | "# Use the pandas apply method to numerically encode our attrition target variable\n", 2060 | "target = attrition[\"Attrition\"].apply(lambda x: target_map[x])\n", 2061 | "target.head(3)" 2062 | ] 2063 | }, 2064 | { 2065 | "cell_type": "markdown", 2066 | "metadata": { 2067 | "_cell_guid": "5564e6e1-83ed-75de-2540-0d037e31291b" 2068 | }, 2069 | "source": [ 2070 | "\n", 2071 | "**Splitting Data into Train and Test sets**\n" 2072 | ] 2073 | }, 2074 | { 2075 | "cell_type": "code", 2076 | "execution_count": 21, 2077 | "metadata": { 2078 | "_cell_guid": "c197f8ee-76b0-7137-f001-83f969637521" 2079 | }, 2080 | "outputs": [], 2081 | "source": [ 2082 | "# Split data into train and test sets as well as for validation and testing\n", 2083 | "train, test, target_train, target_test = train_test_split(attrition_final, target, train_size= 0.75,random_state=0);" 2084 | ] 2085 | }, 2086 | { 2087 | "cell_type": "markdown", 2088 | "metadata": {}, 2089 | "source": [ 2090 | "# Implementing Machine Learning Models\n" 2091 | ] 2092 | }, 2093 | { 2094 | "cell_type": "markdown", 2095 | "metadata": { 2096 | "_cell_guid": "610cfa87-0b9d-4671-cd51-c99ef9c9151d" 2097 | }, 2098 | "source": [ 2099 | "## GBM Classifier\n", 2100 | "\n" 2101 | ] 2102 | }, 2103 | { 2104 | "cell_type": "markdown", 2105 | "metadata": {}, 2106 | "source": [ 2107 | "### 1.n_estimators - No of Trees in the Model\n", 2108 | "\n", 2109 | "### 2.max_features - The number of features to consider while searching for a best split.Thumb Rule to have Square root of no of Columns\n", 2110 | "\n", 2111 | "### 3.max_depth - Maximum Depth of Tree and can be used to control overfiting \n", 2112 | "\n", 2113 | "### 4.min_samples_leaf - Minimum samples (or observations) required in a terminal node or leaf.In general we need to have lower values for it for Imbalanced problems\n", 2114 | "\n", 2115 | "### 5.subsample- The fraction of samples to be used for fitting the individual base learners\n", 2116 | "\n", 2117 | "### 6.learning_rate - Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators" 2118 | ] 2119 | }, 2120 | { 2121 | "cell_type": "code", 2122 | "execution_count": 44, 2123 | "metadata": {}, 2124 | "outputs": [ 2125 | { 2126 | "data": { 2127 | "text/plain": [ 2128 | "{'ccp_alpha': 0.0,\n", 2129 | " 'criterion': 'friedman_mse',\n", 2130 | " 'init': None,\n", 2131 | " 'learning_rate': 0.1,\n", 2132 | " 'loss': 'deviance',\n", 2133 | " 'max_depth': 3,\n", 2134 | " 'max_features': None,\n", 2135 | " 'max_leaf_nodes': None,\n", 2136 | " 'min_impurity_decrease': 0.0,\n", 2137 | " 'min_impurity_split': None,\n", 2138 | " 'min_samples_leaf': 1,\n", 2139 | " 'min_samples_split': 2,\n", 2140 | " 'min_weight_fraction_leaf': 0.0,\n", 2141 | " 'n_estimators': 100,\n", 2142 | " 'n_iter_no_change': None,\n", 2143 | " 'presort': 'deprecated',\n", 2144 | " 'random_state': 100,\n", 2145 | " 'subsample': 1.0,\n", 2146 | " 'tol': 0.0001,\n", 2147 | " 'validation_fraction': 0.1,\n", 2148 | " 'verbose': 0,\n", 2149 | " 'warm_start': False}" 2150 | ] 2151 | }, 2152 | "execution_count": 44, 2153 | "metadata": {}, 2154 | "output_type": "execute_result" 2155 | } 2156 | ], 2157 | "source": [ 2158 | "gb = GradientBoostingClassifier(random_state=100) # default \n", 2159 | "gb.get_params()" 2160 | ] 2161 | }, 2162 | { 2163 | "cell_type": "code", 2164 | "execution_count": 45, 2165 | "metadata": { 2166 | "_cell_guid": "ed6a837e-2864-291c-be8d-3c8e9ed900b7" 2167 | }, 2168 | "outputs": [], 2169 | "source": [ 2170 | "# Fit the model to our train and target\n", 2171 | "gb.fit(train, target_train)\n", 2172 | "# Get our predictions\n", 2173 | "gb_predictions = gb.predict(test)" 2174 | ] 2175 | }, 2176 | { 2177 | "cell_type": "code", 2178 | "execution_count": 46, 2179 | "metadata": {}, 2180 | "outputs": [ 2181 | { 2182 | "data": { 2183 | "text/plain": [ 2184 | "array([[0.95425715, 0.04574285],\n", 2185 | " [0.96266164, 0.03733836],\n", 2186 | " [0.86753512, 0.13246488],\n", 2187 | " [0.93446343, 0.06553657],\n", 2188 | " [0.11595517, 0.88404483],\n", 2189 | " [0.69684147, 0.30315853],\n", 2190 | " [0.64887562, 0.35112438],\n", 2191 | " [0.95196738, 0.04803262],\n", 2192 | " [0.97038412, 0.02961588],\n", 2193 | " [0.86642399, 0.13357601],\n", 2194 | " [0.94437031, 0.05562969],\n", 2195 | " [0.91453905, 0.08546095],\n", 2196 | " [0.97008856, 0.02991144],\n", 2197 | " [0.30172989, 0.69827011],\n", 2198 | " [0.94679291, 0.05320709],\n", 2199 | " [0.98937899, 0.01062101],\n", 2200 | " [0.94587557, 0.05412443],\n", 2201 | " [0.93925719, 0.06074281],\n", 2202 | " [0.94572155, 0.05427845],\n", 2203 | " [0.92824768, 0.07175232],\n", 2204 | " [0.63031267, 0.36968733],\n", 2205 | " [0.95095032, 0.04904968],\n", 2206 | " [0.96466225, 0.03533775],\n", 2207 | " [0.97249008, 0.02750992],\n", 2208 | " [0.58256377, 0.41743623],\n", 2209 | " [0.75837581, 0.24162419],\n", 2210 | " [0.95692266, 0.04307734],\n", 2211 | " [0.97394765, 0.02605235],\n", 2212 | " [0.25660257, 0.74339743],\n", 2213 | " [0.96627476, 0.03372524],\n", 2214 | " [0.9370103 , 0.0629897 ],\n", 2215 | " [0.97054113, 0.02945887],\n", 2216 | " [0.72602826, 0.27397174],\n", 2217 | " [0.94088637, 0.05911363],\n", 2218 | " [0.8540458 , 0.1459542 ],\n", 2219 | " [0.95643054, 0.04356946],\n", 2220 | " [0.92795664, 0.07204336],\n", 2221 | " [0.95077212, 0.04922788],\n", 2222 | " [0.96403289, 0.03596711],\n", 2223 | " [0.89832621, 0.10167379],\n", 2224 | " [0.93184448, 0.06815552],\n", 2225 | " [0.98246084, 0.01753916],\n", 2226 | " [0.98249482, 0.01750518],\n", 2227 | " [0.9445623 , 0.0554377 ],\n", 2228 | " [0.9739705 , 0.0260295 ],\n", 2229 | " [0.39098348, 0.60901652],\n", 2230 | " [0.84584692, 0.15415308],\n", 2231 | " [0.98543184, 0.01456816],\n", 2232 | " [0.12275346, 0.87724654],\n", 2233 | " [0.41831239, 0.58168761],\n", 2234 | " [0.88842615, 0.11157385],\n", 2235 | " [0.45888689, 0.54111311],\n", 2236 | " [0.94905763, 0.05094237],\n", 2237 | " [0.95377671, 0.04622329],\n", 2238 | " [0.38737978, 0.61262022],\n", 2239 | " [0.81782947, 0.18217053],\n", 2240 | " [0.98358002, 0.01641998],\n", 2241 | " [0.90739507, 0.09260493],\n", 2242 | " [0.98341059, 0.01658941],\n", 2243 | " [0.52594093, 0.47405907],\n", 2244 | " [0.98112677, 0.01887323],\n", 2245 | " [0.90242903, 0.09757097],\n", 2246 | " [0.76861349, 0.23138651],\n", 2247 | " [0.94486355, 0.05513645],\n", 2248 | " [0.53386057, 0.46613943],\n", 2249 | " [0.96150163, 0.03849837],\n", 2250 | " [0.8207475 , 0.1792525 ],\n", 2251 | " [0.92160034, 0.07839966],\n", 2252 | " [0.94286369, 0.05713631],\n", 2253 | " [0.95425839, 0.04574161],\n", 2254 | " [0.94541621, 0.05458379],\n", 2255 | " [0.70456971, 0.29543029],\n", 2256 | " [0.89822164, 0.10177836],\n", 2257 | " [0.97057804, 0.02942196],\n", 2258 | " [0.9116692 , 0.0883308 ],\n", 2259 | " [0.96863902, 0.03136098],\n", 2260 | " [0.97495523, 0.02504477],\n", 2261 | " [0.92495345, 0.07504655],\n", 2262 | " [0.41259222, 0.58740778],\n", 2263 | " [0.98987829, 0.01012171],\n", 2264 | " [0.98695265, 0.01304735],\n", 2265 | " [0.97879634, 0.02120366],\n", 2266 | " [0.9380963 , 0.0619037 ],\n", 2267 | " [0.95352286, 0.04647714],\n", 2268 | " [0.93758617, 0.06241383],\n", 2269 | " [0.955901 , 0.044099 ],\n", 2270 | " [0.97398947, 0.02601053],\n", 2271 | " [0.82717248, 0.17282752],\n", 2272 | " [0.93887772, 0.06112228],\n", 2273 | " [0.96171254, 0.03828746],\n", 2274 | " [0.32999572, 0.67000428],\n", 2275 | " [0.96346516, 0.03653484],\n", 2276 | " [0.88623647, 0.11376353],\n", 2277 | " [0.73941826, 0.26058174],\n", 2278 | " [0.98771043, 0.01228957],\n", 2279 | " [0.9712543 , 0.0287457 ],\n", 2280 | " [0.89449503, 0.10550497],\n", 2281 | " [0.58046794, 0.41953206],\n", 2282 | " [0.97774197, 0.02225803],\n", 2283 | " [0.95302046, 0.04697954],\n", 2284 | " [0.71342454, 0.28657546],\n", 2285 | " [0.8061877 , 0.1938123 ],\n", 2286 | " [0.49475417, 0.50524583],\n", 2287 | " [0.97057666, 0.02942334],\n", 2288 | " [0.96443989, 0.03556011],\n", 2289 | " [0.96042126, 0.03957874],\n", 2290 | " [0.92572113, 0.07427887],\n", 2291 | " [0.62023522, 0.37976478],\n", 2292 | " [0.48119761, 0.51880239],\n", 2293 | " [0.84650471, 0.15349529],\n", 2294 | " [0.62243893, 0.37756107],\n", 2295 | " [0.85932631, 0.14067369],\n", 2296 | " [0.95462151, 0.04537849],\n", 2297 | " [0.96619621, 0.03380379],\n", 2298 | " [0.94581538, 0.05418462],\n", 2299 | " [0.91689517, 0.08310483],\n", 2300 | " [0.95839369, 0.04160631],\n", 2301 | " [0.8794354 , 0.1205646 ],\n", 2302 | " [0.98362608, 0.01637392],\n", 2303 | " [0.98281878, 0.01718122],\n", 2304 | " [0.96947689, 0.03052311],\n", 2305 | " [0.97334027, 0.02665973],\n", 2306 | " [0.95974568, 0.04025432],\n", 2307 | " [0.12036692, 0.87963308],\n", 2308 | " [0.82297479, 0.17702521],\n", 2309 | " [0.98153225, 0.01846775],\n", 2310 | " [0.98694682, 0.01305318],\n", 2311 | " [0.91438861, 0.08561139],\n", 2312 | " [0.89250181, 0.10749819],\n", 2313 | " [0.9602713 , 0.0397287 ],\n", 2314 | " [0.98262507, 0.01737493],\n", 2315 | " [0.48009381, 0.51990619],\n", 2316 | " [0.33955706, 0.66044294],\n", 2317 | " [0.64491347, 0.35508653],\n", 2318 | " [0.8168872 , 0.1831128 ],\n", 2319 | " [0.50657794, 0.49342206],\n", 2320 | " [0.56711104, 0.43288896],\n", 2321 | " [0.9007447 , 0.0992553 ],\n", 2322 | " [0.90199207, 0.09800793],\n", 2323 | " [0.95356113, 0.04643887],\n", 2324 | " [0.96705584, 0.03294416],\n", 2325 | " [0.90884378, 0.09115622],\n", 2326 | " [0.9349426 , 0.0650574 ],\n", 2327 | " [0.91939924, 0.08060076],\n", 2328 | " [0.84578704, 0.15421296],\n", 2329 | " [0.97456423, 0.02543577],\n", 2330 | " [0.84751971, 0.15248029],\n", 2331 | " [0.96033593, 0.03966407],\n", 2332 | " [0.92545517, 0.07454483],\n", 2333 | " [0.97048199, 0.02951801],\n", 2334 | " [0.97032391, 0.02967609],\n", 2335 | " [0.91630897, 0.08369103],\n", 2336 | " [0.93079044, 0.06920956],\n", 2337 | " [0.40456246, 0.59543754],\n", 2338 | " [0.947625 , 0.052375 ],\n", 2339 | " [0.91449881, 0.08550119],\n", 2340 | " [0.98636154, 0.01363846],\n", 2341 | " [0.71405454, 0.28594546],\n", 2342 | " [0.96510197, 0.03489803],\n", 2343 | " [0.97168808, 0.02831192],\n", 2344 | " [0.77305162, 0.22694838],\n", 2345 | " [0.98524719, 0.01475281],\n", 2346 | " [0.56610324, 0.43389676],\n", 2347 | " [0.06973742, 0.93026258],\n", 2348 | " [0.96669022, 0.03330978],\n", 2349 | " [0.86957648, 0.13042352],\n", 2350 | " [0.85638175, 0.14361825],\n", 2351 | " [0.96746578, 0.03253422],\n", 2352 | " [0.96326231, 0.03673769],\n", 2353 | " [0.98008622, 0.01991378],\n", 2354 | " [0.9631766 , 0.0368234 ],\n", 2355 | " [0.94571755, 0.05428245],\n", 2356 | " [0.95353727, 0.04646273],\n", 2357 | " [0.76749272, 0.23250728],\n", 2358 | " [0.97156023, 0.02843977],\n", 2359 | " [0.84776155, 0.15223845],\n", 2360 | " [0.93983408, 0.06016592],\n", 2361 | " [0.7128298 , 0.2871702 ],\n", 2362 | " [0.87254419, 0.12745581],\n", 2363 | " [0.91867668, 0.08132332],\n", 2364 | " [0.96659066, 0.03340934],\n", 2365 | " [0.96842656, 0.03157344],\n", 2366 | " [0.98440216, 0.01559784],\n", 2367 | " [0.52822561, 0.47177439],\n", 2368 | " [0.97013695, 0.02986305],\n", 2369 | " [0.975773 , 0.024227 ],\n", 2370 | " [0.72460519, 0.27539481],\n", 2371 | " [0.92948195, 0.07051805],\n", 2372 | " [0.92364724, 0.07635276],\n", 2373 | " [0.8141039 , 0.1858961 ],\n", 2374 | " [0.81531336, 0.18468664],\n", 2375 | " [0.34193098, 0.65806902],\n", 2376 | " [0.94062026, 0.05937974],\n", 2377 | " [0.98605656, 0.01394344],\n", 2378 | " [0.96238386, 0.03761614],\n", 2379 | " [0.89240038, 0.10759962],\n", 2380 | " [0.96629132, 0.03370868],\n", 2381 | " [0.96743579, 0.03256421],\n", 2382 | " [0.47861277, 0.52138723],\n", 2383 | " [0.95445889, 0.04554111],\n", 2384 | " [0.77024837, 0.22975163],\n", 2385 | " [0.93913958, 0.06086042],\n", 2386 | " [0.81173832, 0.18826168],\n", 2387 | " [0.97146037, 0.02853963],\n", 2388 | " [0.94145861, 0.05854139],\n", 2389 | " [0.93356602, 0.06643398],\n", 2390 | " [0.75972578, 0.24027422],\n", 2391 | " [0.98540422, 0.01459578],\n", 2392 | " [0.95597808, 0.04402192],\n", 2393 | " [0.97471758, 0.02528242],\n", 2394 | " [0.94873919, 0.05126081],\n", 2395 | " [0.97093324, 0.02906676],\n", 2396 | " [0.86302403, 0.13697597],\n", 2397 | " [0.83032931, 0.16967069],\n", 2398 | " [0.96277411, 0.03722589],\n", 2399 | " [0.97446945, 0.02553055],\n", 2400 | " [0.98062402, 0.01937598],\n", 2401 | " [0.87761169, 0.12238831],\n", 2402 | " [0.97251425, 0.02748575],\n", 2403 | " [0.63691449, 0.36308551],\n", 2404 | " [0.72636885, 0.27363115],\n", 2405 | " [0.77240465, 0.22759535],\n", 2406 | " [0.57649195, 0.42350805],\n", 2407 | " [0.67980935, 0.32019065],\n", 2408 | " [0.82615901, 0.17384099],\n", 2409 | " [0.96841481, 0.03158519],\n", 2410 | " [0.93445502, 0.06554498],\n", 2411 | " [0.69565772, 0.30434228],\n", 2412 | " [0.27794715, 0.72205285],\n", 2413 | " [0.92601046, 0.07398954],\n", 2414 | " [0.96144666, 0.03855334],\n", 2415 | " [0.59763048, 0.40236952],\n", 2416 | " [0.95776276, 0.04223724],\n", 2417 | " [0.92687147, 0.07312853],\n", 2418 | " [0.96753432, 0.03246568],\n", 2419 | " [0.61570908, 0.38429092],\n", 2420 | " [0.86399975, 0.13600025],\n", 2421 | " [0.98923344, 0.01076656],\n", 2422 | " [0.88397265, 0.11602735],\n", 2423 | " [0.97806179, 0.02193821],\n", 2424 | " [0.79986193, 0.20013807],\n", 2425 | " [0.80545456, 0.19454544],\n", 2426 | " [0.9721119 , 0.0278881 ],\n", 2427 | " [0.95970082, 0.04029918],\n", 2428 | " [0.86890865, 0.13109135],\n", 2429 | " [0.95813176, 0.04186824],\n", 2430 | " [0.97568072, 0.02431928],\n", 2431 | " [0.69268457, 0.30731543],\n", 2432 | " [0.92598788, 0.07401212],\n", 2433 | " [0.91900926, 0.08099074],\n", 2434 | " [0.91581725, 0.08418275],\n", 2435 | " [0.8345903 , 0.1654097 ],\n", 2436 | " [0.65988095, 0.34011905],\n", 2437 | " [0.94964915, 0.05035085],\n", 2438 | " [0.79333725, 0.20666275],\n", 2439 | " [0.92374445, 0.07625555],\n", 2440 | " [0.96868775, 0.03131225],\n", 2441 | " [0.35561514, 0.64438486],\n", 2442 | " [0.84094598, 0.15905402],\n", 2443 | " [0.38892717, 0.61107283],\n", 2444 | " [0.85167427, 0.14832573],\n", 2445 | " [0.96307142, 0.03692858],\n", 2446 | " [0.88462374, 0.11537626],\n", 2447 | " [0.94908531, 0.05091469],\n", 2448 | " [0.88240951, 0.11759049],\n", 2449 | " [0.79423159, 0.20576841],\n", 2450 | " [0.97788665, 0.02211335],\n", 2451 | " [0.62121259, 0.37878741],\n", 2452 | " [0.86255923, 0.13744077],\n", 2453 | " [0.97136832, 0.02863168],\n", 2454 | " [0.95266044, 0.04733956],\n", 2455 | " [0.96974757, 0.03025243],\n", 2456 | " [0.95422737, 0.04577263],\n", 2457 | " [0.94976773, 0.05023227],\n", 2458 | " [0.96809168, 0.03190832],\n", 2459 | " [0.97957092, 0.02042908],\n", 2460 | " [0.91742816, 0.08257184],\n", 2461 | " [0.74667356, 0.25332644],\n", 2462 | " [0.93221129, 0.06778871],\n", 2463 | " [0.67372023, 0.32627977],\n", 2464 | " [0.66684839, 0.33315161],\n", 2465 | " [0.97330889, 0.02669111],\n", 2466 | " [0.91337794, 0.08662206],\n", 2467 | " [0.77366167, 0.22633833],\n", 2468 | " [0.8832038 , 0.1167962 ],\n", 2469 | " [0.84292213, 0.15707787],\n", 2470 | " [0.99294 , 0.00706 ],\n", 2471 | " [0.94024791, 0.05975209],\n", 2472 | " [0.96593479, 0.03406521],\n", 2473 | " [0.97629651, 0.02370349],\n", 2474 | " [0.93867593, 0.06132407],\n", 2475 | " [0.59379026, 0.40620974],\n", 2476 | " [0.94611828, 0.05388172],\n", 2477 | " [0.94685051, 0.05314949],\n", 2478 | " [0.92676488, 0.07323512],\n", 2479 | " [0.9703074 , 0.0296926 ],\n", 2480 | " [0.89317638, 0.10682362],\n", 2481 | " [0.79986704, 0.20013296],\n", 2482 | " [0.02633175, 0.97366825],\n", 2483 | " [0.97025032, 0.02974968],\n", 2484 | " [0.89974809, 0.10025191],\n", 2485 | " [0.91638632, 0.08361368],\n", 2486 | " [0.78031964, 0.21968036],\n", 2487 | " [0.91008332, 0.08991668],\n", 2488 | " [0.97953917, 0.02046083],\n", 2489 | " [0.95730388, 0.04269612],\n", 2490 | " [0.7377896 , 0.2622104 ],\n", 2491 | " [0.88001116, 0.11998884],\n", 2492 | " [0.98293977, 0.01706023],\n", 2493 | " [0.96724203, 0.03275797],\n", 2494 | " [0.68747934, 0.31252066],\n", 2495 | " [0.96403622, 0.03596378],\n", 2496 | " [0.8867743 , 0.1132257 ],\n", 2497 | " [0.96202999, 0.03797001],\n", 2498 | " [0.82005794, 0.17994206],\n", 2499 | " [0.92286609, 0.07713391],\n", 2500 | " [0.88474497, 0.11525503],\n", 2501 | " [0.92484127, 0.07515873],\n", 2502 | " [0.70115256, 0.29884744],\n", 2503 | " [0.97183716, 0.02816284],\n", 2504 | " [0.5462985 , 0.4537015 ],\n", 2505 | " [0.9573019 , 0.0426981 ],\n", 2506 | " [0.92033062, 0.07966938],\n", 2507 | " [0.85225757, 0.14774243],\n", 2508 | " [0.95370205, 0.04629795],\n", 2509 | " [0.96747068, 0.03252932],\n", 2510 | " [0.7458445 , 0.2541555 ],\n", 2511 | " [0.74339205, 0.25660795],\n", 2512 | " [0.98721474, 0.01278526],\n", 2513 | " [0.96306272, 0.03693728],\n", 2514 | " [0.9579974 , 0.0420026 ],\n", 2515 | " [0.82413419, 0.17586581],\n", 2516 | " [0.89696197, 0.10303803],\n", 2517 | " [0.24911204, 0.75088796],\n", 2518 | " [0.82281444, 0.17718556],\n", 2519 | " [0.89850766, 0.10149234],\n", 2520 | " [0.92595595, 0.07404405],\n", 2521 | " [0.97562403, 0.02437597],\n", 2522 | " [0.97172923, 0.02827077],\n", 2523 | " [0.93588728, 0.06411272],\n", 2524 | " [0.88409016, 0.11590984],\n", 2525 | " [0.50668915, 0.49331085],\n", 2526 | " [0.93318445, 0.06681555],\n", 2527 | " [0.93502909, 0.06497091],\n", 2528 | " [0.93028959, 0.06971041],\n", 2529 | " [0.94739208, 0.05260792],\n", 2530 | " [0.95415688, 0.04584312],\n", 2531 | " [0.65663195, 0.34336805],\n", 2532 | " [0.96911857, 0.03088143],\n", 2533 | " [0.7018124 , 0.2981876 ],\n", 2534 | " [0.98190499, 0.01809501],\n", 2535 | " [0.94059124, 0.05940876],\n", 2536 | " [0.92597402, 0.07402598],\n", 2537 | " [0.92941033, 0.07058967],\n", 2538 | " [0.945513 , 0.054487 ],\n", 2539 | " [0.98032438, 0.01967562],\n", 2540 | " [0.95757909, 0.04242091],\n", 2541 | " [0.94447651, 0.05552349],\n", 2542 | " [0.97012588, 0.02987412],\n", 2543 | " [0.44250021, 0.55749979],\n", 2544 | " [0.98552445, 0.01447555],\n", 2545 | " [0.97023514, 0.02976486],\n", 2546 | " [0.96078622, 0.03921378],\n", 2547 | " [0.88696047, 0.11303953],\n", 2548 | " [0.94499311, 0.05500689],\n", 2549 | " [0.86110176, 0.13889824],\n", 2550 | " [0.95385708, 0.04614292],\n", 2551 | " [0.70609297, 0.29390703]])" 2552 | ] 2553 | }, 2554 | "execution_count": 46, 2555 | "metadata": {}, 2556 | "output_type": "execute_result" 2557 | } 2558 | ], 2559 | "source": [ 2560 | "gb_predictions_prob = gb.predict_proba(test)\n", 2561 | "gb_predictions_prob" 2562 | ] 2563 | }, 2564 | { 2565 | "cell_type": "code", 2566 | "execution_count": null, 2567 | "metadata": {}, 2568 | "outputs": [], 2569 | "source": [ 2570 | "# Gradient Boosting Parameters\n", 2571 | "# gb_params ={\n", 2572 | "# 'n_estimators': 500, # no of Trees \n", 2573 | "# 'learning_rate' : 0.2,\n", 2574 | "# 'max_depth': 11,\n", 2575 | "# 'min_samples_leaf': 2,\n", 2576 | "# 'subsample': 1,\n", 2577 | "# 'max_features' : 'sqrt',\n", 2578 | "# 'random_state' : 100,\n", 2579 | "# 'verbose': 0\n", 2580 | "# }\n", 2581 | "\n", 2582 | "#gb = GradientBoostingClassifier(**gb_params) # After Doing HPT , we can pass the paramaters" 2583 | ] 2584 | }, 2585 | { 2586 | "cell_type": "code", 2587 | "execution_count": 47, 2588 | "metadata": { 2589 | "_cell_guid": "40c37011-76df-fcc7-9cd0-e689374a8d1a" 2590 | }, 2591 | "outputs": [ 2592 | { 2593 | "data": { 2594 | "text/plain": [ 2595 | "0.8831521739130435" 2596 | ] 2597 | }, 2598 | "execution_count": 47, 2599 | "metadata": {}, 2600 | "output_type": "execute_result" 2601 | } 2602 | ], 2603 | "source": [ 2604 | "accuracy_score(target_test, gb_predictions)" 2605 | ] 2606 | }, 2607 | { 2608 | "cell_type": "markdown", 2609 | "metadata": { 2610 | "_cell_guid": "21cc0476-b03e-731f-97b4-89d81977c3a7" 2611 | }, 2612 | "source": [ 2613 | "### Feature Importance Gradient Boosting Model\n" 2614 | ] 2615 | }, 2616 | { 2617 | "cell_type": "code", 2618 | "execution_count": 48, 2619 | "metadata": {}, 2620 | "outputs": [ 2621 | { 2622 | "data": { 2623 | "text/plain": [ 2624 | "array([0.06652223, 0.0588816 , 0.03073138, 0.00432496, 0. ,\n", 2625 | " 0.03904932, 0.03439457, 0.01631039, 0.03338705, 0.03211924,\n", 2626 | " 0.02490683, 0.10492537, 0.02377928, 0.0315579 , 0.02005508,\n", 2627 | " 0. , 0.01282365, 0. , 0.04186334, 0.04785753,\n", 2628 | " 0.00882029, 0.02921662, 0.02106675, 0.00975893, 0.02529042,\n", 2629 | " 0.05801712, 0.00119296, 0.02512782, 0. , 0. ,\n", 2630 | " 0.00335323, 0.00150605, 0.0042737 , 0.0005249 , 0.00822779,\n", 2631 | " 0.00420823, 0.0016531 , 0.00426846, 0.00052944, 0.00090874,\n", 2632 | " 0.00121754, 0.00014525, 0.01109724, 0. , 0.00334464,\n", 2633 | " 0. , 0.00517715, 0.01111343, 0.01076333, 0.00311429,\n", 2634 | " 0.00155062, 0.01744231, 0. , 0.05617885, 0.0474211 ])" 2635 | ] 2636 | }, 2637 | "execution_count": 48, 2638 | "metadata": {}, 2639 | "output_type": "execute_result" 2640 | } 2641 | ], 2642 | "source": [ 2643 | "gb.feature_importances_" 2644 | ] 2645 | }, 2646 | { 2647 | "cell_type": "code", 2648 | "execution_count": 49, 2649 | "metadata": { 2650 | "_cell_guid": "082ca641-ffd2-fc3b-a7b6-9418b08767d9" 2651 | }, 2652 | "outputs": [ 2653 | { 2654 | "data": { 2655 | "application/vnd.plotly.v1+json": { 2656 | "config": { 2657 | "linkText": "Export to plot.ly", 2658 | "plotlyServerURL": "https://plot.ly", 2659 | "showLink": false 2660 | }, 2661 | "data": [ 2662 | { 2663 | "marker": { 2664 | "color": [ 2665 | 0.06652223253695257, 2666 | 0.05888159883551992, 2667 | 0.03073137823187192, 2668 | 0.004324958198515217, 2669 | 0, 2670 | 0.03904932419117631, 2671 | 0.03439456715586092, 2672 | 0.01631038927940496, 2673 | 0.03338704516488361, 2674 | 0.03211924296440046, 2675 | 0.024906829658417066, 2676 | 0.10492536820487945, 2677 | 0.02377927623243685, 2678 | 0.031557897004027106, 2679 | 0.020055083719287114, 2680 | 0, 2681 | 0.012823654096422492, 2682 | 0, 2683 | 0.04186333616035551, 2684 | 0.04785753324320872, 2685 | 0.008820286459808887, 2686 | 0.029216621375744902, 2687 | 0.021066753459770205, 2688 | 0.009758927945108273, 2689 | 0.025290422087271346, 2690 | 0.05801711636083825, 2691 | 0.001192961971420996, 2692 | 0.025127819791960826, 2693 | 0, 2694 | 0, 2695 | 0.0033532256344687922, 2696 | 0.0015060503101848842, 2697 | 0.004273699405845098, 2698 | 0.0005249018883639997, 2699 | 0.00822778740581938, 2700 | 0.004208227048439592, 2701 | 0.0016531018368706766, 2702 | 0.0042684633108618355, 2703 | 0.0005294377436253811, 2704 | 0.000908740704327891, 2705 | 0.0012175398421370041, 2706 | 0.00014524848123212858, 2707 | 0.011097244488818544, 2708 | 0, 2709 | 0.0033446426566392054, 2710 | 0, 2711 | 0.005177148028722981, 2712 | 0.011113429006340464, 2713 | 0.010763326636927086, 2714 | 0.003114285866218408, 2715 | 0.0015506180803909163, 2716 | 0.017442314085269278, 2717 | 0, 2718 | 0.05617884533997763, 2719 | 0.04742109786897487 2720 | ], 2721 | "colorscale": [ 2722 | [ 2723 | 0, 2724 | "rgb(12,51,131)" 2725 | ], 2726 | [ 2727 | 0.25, 2728 | "rgb(10,136,186)" 2729 | ], 2730 | [ 2731 | 0.5, 2732 | "rgb(242,211,56)" 2733 | ], 2734 | [ 2735 | 0.75, 2736 | "rgb(242,143,56)" 2737 | ], 2738 | [ 2739 | 1, 2740 | "rgb(217,30,30)" 2741 | ] 2742 | ], 2743 | "showscale": true, 2744 | "size": 12, 2745 | "sizemode": "diameter", 2746 | "sizeref": 1.3 2747 | }, 2748 | "mode": "markers", 2749 | "text": [ 2750 | "Age", 2751 | "DailyRate", 2752 | "DistanceFromHome", 2753 | "Education", 2754 | "EmployeeCount", 2755 | "EmployeeNumber", 2756 | "EnvironmentSatisfaction", 2757 | "HourlyRate", 2758 | "JobInvolvement", 2759 | "JobLevel", 2760 | "JobSatisfaction", 2761 | "MonthlyIncome", 2762 | "MonthlyRate", 2763 | "NumCompaniesWorked", 2764 | "PercentSalaryHike", 2765 | "PerformanceRating", 2766 | "RelationshipSatisfaction", 2767 | "StandardHours", 2768 | "StockOptionLevel", 2769 | "TotalWorkingYears", 2770 | "TrainingTimesLastYear", 2771 | "WorkLifeBalance", 2772 | "YearsAtCompany", 2773 | "YearsInCurrentRole", 2774 | "YearsSinceLastPromotion", 2775 | "YearsWithCurrManager", 2776 | "BusinessTravel_Non-Travel", 2777 | "BusinessTravel_Travel_Frequently", 2778 | "BusinessTravel_Travel_Rarely", 2779 | "Department_Human Resources", 2780 | "Department_Research & Development", 2781 | "Department_Sales", 2782 | "EducationField_Human Resources", 2783 | "EducationField_Life Sciences", 2784 | "EducationField_Marketing", 2785 | "EducationField_Medical", 2786 | "EducationField_Other", 2787 | "EducationField_Technical Degree", 2788 | "Gender_Female", 2789 | "Gender_Male", 2790 | "JobRole_Healthcare Representative", 2791 | "JobRole_Human Resources", 2792 | "JobRole_Laboratory Technician", 2793 | "JobRole_Manager", 2794 | "JobRole_Manufacturing Director", 2795 | "JobRole_Research Director", 2796 | "JobRole_Research Scientist", 2797 | "JobRole_Sales Executive", 2798 | "JobRole_Sales Representative", 2799 | "MaritalStatus_Divorced", 2800 | "MaritalStatus_Married", 2801 | "MaritalStatus_Single", 2802 | "Over18_Y", 2803 | "OverTime_No", 2804 | "OverTime_Yes" 2805 | ], 2806 | "type": "scatter", 2807 | "x": [ 2808 | "Age", 2809 | "DailyRate", 2810 | "DistanceFromHome", 2811 | "Education", 2812 | "EmployeeCount", 2813 | "EmployeeNumber", 2814 | "EnvironmentSatisfaction", 2815 | "HourlyRate", 2816 | "JobInvolvement", 2817 | "JobLevel", 2818 | "JobSatisfaction", 2819 | "MonthlyIncome", 2820 | "MonthlyRate", 2821 | "NumCompaniesWorked", 2822 | "PercentSalaryHike", 2823 | "PerformanceRating", 2824 | "RelationshipSatisfaction", 2825 | "StandardHours", 2826 | "StockOptionLevel", 2827 | "TotalWorkingYears", 2828 | "TrainingTimesLastYear", 2829 | "WorkLifeBalance", 2830 | "YearsAtCompany", 2831 | "YearsInCurrentRole", 2832 | "YearsSinceLastPromotion", 2833 | "YearsWithCurrManager", 2834 | "BusinessTravel_Non-Travel", 2835 | "BusinessTravel_Travel_Frequently", 2836 | "BusinessTravel_Travel_Rarely", 2837 | "Department_Human Resources", 2838 | "Department_Research & Development", 2839 | "Department_Sales", 2840 | "EducationField_Human Resources", 2841 | "EducationField_Life Sciences", 2842 | "EducationField_Marketing", 2843 | "EducationField_Medical", 2844 | "EducationField_Other", 2845 | "EducationField_Technical Degree", 2846 | "Gender_Female", 2847 | "Gender_Male", 2848 | "JobRole_Healthcare Representative", 2849 | "JobRole_Human Resources", 2850 | "JobRole_Laboratory Technician", 2851 | "JobRole_Manager", 2852 | "JobRole_Manufacturing Director", 2853 | "JobRole_Research Director", 2854 | "JobRole_Research Scientist", 2855 | "JobRole_Sales Executive", 2856 | "JobRole_Sales Representative", 2857 | "MaritalStatus_Divorced", 2858 | "MaritalStatus_Married", 2859 | "MaritalStatus_Single", 2860 | "Over18_Y", 2861 | "OverTime_No", 2862 | "OverTime_Yes" 2863 | ], 2864 | "y": [ 2865 | 0.06652223253695257, 2866 | 0.05888159883551992, 2867 | 0.03073137823187192, 2868 | 0.004324958198515217, 2869 | 0, 2870 | 0.03904932419117631, 2871 | 0.03439456715586092, 2872 | 0.01631038927940496, 2873 | 0.03338704516488361, 2874 | 0.03211924296440046, 2875 | 0.024906829658417066, 2876 | 0.10492536820487945, 2877 | 0.02377927623243685, 2878 | 0.031557897004027106, 2879 | 0.020055083719287114, 2880 | 0, 2881 | 0.012823654096422492, 2882 | 0, 2883 | 0.04186333616035551, 2884 | 0.04785753324320872, 2885 | 0.008820286459808887, 2886 | 0.029216621375744902, 2887 | 0.021066753459770205, 2888 | 0.009758927945108273, 2889 | 0.025290422087271346, 2890 | 0.05801711636083825, 2891 | 0.001192961971420996, 2892 | 0.025127819791960826, 2893 | 0, 2894 | 0, 2895 | 0.0033532256344687922, 2896 | 0.0015060503101848842, 2897 | 0.004273699405845098, 2898 | 0.0005249018883639997, 2899 | 0.00822778740581938, 2900 | 0.004208227048439592, 2901 | 0.0016531018368706766, 2902 | 0.0042684633108618355, 2903 | 0.0005294377436253811, 2904 | 0.000908740704327891, 2905 | 0.0012175398421370041, 2906 | 0.00014524848123212858, 2907 | 0.011097244488818544, 2908 | 0, 2909 | 0.0033446426566392054, 2910 | 0, 2911 | 0.005177148028722981, 2912 | 0.011113429006340464, 2913 | 0.010763326636927086, 2914 | 0.003114285866218408, 2915 | 0.0015506180803909163, 2916 | 0.017442314085269278, 2917 | 0, 2918 | 0.05617884533997763, 2919 | 0.04742109786897487 2920 | ] 2921 | } 2922 | ], 2923 | "layout": { 2924 | "autosize": true, 2925 | "hovermode": "closest", 2926 | "showlegend": false, 2927 | "template": { 2928 | "data": { 2929 | "bar": [ 2930 | { 2931 | "error_x": { 2932 | "color": "#2a3f5f" 2933 | }, 2934 | "error_y": { 2935 | "color": "#2a3f5f" 2936 | }, 2937 | "marker": { 2938 | "line": { 2939 | "color": "#E5ECF6", 2940 | "width": 0.5 2941 | } 2942 | }, 2943 | "type": "bar" 2944 | } 2945 | ], 2946 | "barpolar": [ 2947 | { 2948 | "marker": { 2949 | "line": { 2950 | "color": "#E5ECF6", 2951 | "width": 0.5 2952 | } 2953 | }, 2954 | "type": "barpolar" 2955 | } 2956 | ], 2957 | "carpet": [ 2958 | { 2959 | "aaxis": { 2960 | "endlinecolor": "#2a3f5f", 2961 | "gridcolor": "white", 2962 | "linecolor": "white", 2963 | "minorgridcolor": "white", 2964 | "startlinecolor": "#2a3f5f" 2965 | }, 2966 | "baxis": { 2967 | "endlinecolor": "#2a3f5f", 2968 | "gridcolor": "white", 2969 | "linecolor": "white", 2970 | "minorgridcolor": "white", 2971 | "startlinecolor": "#2a3f5f" 2972 | }, 2973 | "type": "carpet" 2974 | } 2975 | ], 2976 | "choropleth": [ 2977 | { 2978 | "colorbar": { 2979 | "outlinewidth": 0, 2980 | "ticks": "" 2981 | }, 2982 | "type": "choropleth" 2983 | } 2984 | ], 2985 | "contour": [ 2986 | { 2987 | "colorbar": { 2988 | "outlinewidth": 0, 2989 | "ticks": "" 2990 | }, 2991 | "colorscale": [ 2992 | [ 2993 | 0, 2994 | "#0d0887" 2995 | ], 2996 | [ 2997 | 0.1111111111111111, 2998 | "#46039f" 2999 | ], 3000 | [ 3001 | 0.2222222222222222, 3002 | "#7201a8" 3003 | ], 3004 | [ 3005 | 0.3333333333333333, 3006 | "#9c179e" 3007 | ], 3008 | [ 3009 | 0.4444444444444444, 3010 | "#bd3786" 3011 | ], 3012 | [ 3013 | 0.5555555555555556, 3014 | "#d8576b" 3015 | ], 3016 | [ 3017 | 0.6666666666666666, 3018 | "#ed7953" 3019 | ], 3020 | [ 3021 | 0.7777777777777778, 3022 | "#fb9f3a" 3023 | ], 3024 | [ 3025 | 0.8888888888888888, 3026 | "#fdca26" 3027 | ], 3028 | [ 3029 | 1, 3030 | "#f0f921" 3031 | ] 3032 | ], 3033 | "type": "contour" 3034 | } 3035 | ], 3036 | "contourcarpet": [ 3037 | { 3038 | "colorbar": { 3039 | "outlinewidth": 0, 3040 | "ticks": "" 3041 | }, 3042 | "type": "contourcarpet" 3043 | } 3044 | ], 3045 | "heatmap": [ 3046 | { 3047 | "colorbar": { 3048 | "outlinewidth": 0, 3049 | "ticks": "" 3050 | }, 3051 | "colorscale": [ 3052 | [ 3053 | 0, 3054 | "#0d0887" 3055 | ], 3056 | [ 3057 | 0.1111111111111111, 3058 | "#46039f" 3059 | ], 3060 | [ 3061 | 0.2222222222222222, 3062 | "#7201a8" 3063 | ], 3064 | [ 3065 | 0.3333333333333333, 3066 | "#9c179e" 3067 | ], 3068 | [ 3069 | 0.4444444444444444, 3070 | "#bd3786" 3071 | ], 3072 | [ 3073 | 0.5555555555555556, 3074 | "#d8576b" 3075 | ], 3076 | [ 3077 | 0.6666666666666666, 3078 | "#ed7953" 3079 | ], 3080 | [ 3081 | 0.7777777777777778, 3082 | "#fb9f3a" 3083 | ], 3084 | [ 3085 | 0.8888888888888888, 3086 | "#fdca26" 3087 | ], 3088 | [ 3089 | 1, 3090 | "#f0f921" 3091 | ] 3092 | ], 3093 | "type": "heatmap" 3094 | } 3095 | ], 3096 | "heatmapgl": [ 3097 | { 3098 | "colorbar": { 3099 | "outlinewidth": 0, 3100 | "ticks": "" 3101 | }, 3102 | "colorscale": [ 3103 | [ 3104 | 0, 3105 | "#0d0887" 3106 | ], 3107 | [ 3108 | 0.1111111111111111, 3109 | "#46039f" 3110 | ], 3111 | [ 3112 | 0.2222222222222222, 3113 | "#7201a8" 3114 | ], 3115 | [ 3116 | 0.3333333333333333, 3117 | "#9c179e" 3118 | ], 3119 | [ 3120 | 0.4444444444444444, 3121 | "#bd3786" 3122 | ], 3123 | [ 3124 | 0.5555555555555556, 3125 | "#d8576b" 3126 | ], 3127 | [ 3128 | 0.6666666666666666, 3129 | "#ed7953" 3130 | ], 3131 | [ 3132 | 0.7777777777777778, 3133 | "#fb9f3a" 3134 | ], 3135 | [ 3136 | 0.8888888888888888, 3137 | "#fdca26" 3138 | ], 3139 | [ 3140 | 1, 3141 | "#f0f921" 3142 | ] 3143 | ], 3144 | "type": "heatmapgl" 3145 | } 3146 | ], 3147 | "histogram": [ 3148 | { 3149 | "marker": { 3150 | "colorbar": { 3151 | "outlinewidth": 0, 3152 | "ticks": "" 3153 | } 3154 | }, 3155 | "type": "histogram" 3156 | } 3157 | ], 3158 | "histogram2d": [ 3159 | { 3160 | "colorbar": { 3161 | "outlinewidth": 0, 3162 | "ticks": "" 3163 | }, 3164 | "colorscale": [ 3165 | [ 3166 | 0, 3167 | "#0d0887" 3168 | ], 3169 | [ 3170 | 0.1111111111111111, 3171 | "#46039f" 3172 | ], 3173 | [ 3174 | 0.2222222222222222, 3175 | "#7201a8" 3176 | ], 3177 | [ 3178 | 0.3333333333333333, 3179 | "#9c179e" 3180 | ], 3181 | [ 3182 | 0.4444444444444444, 3183 | "#bd3786" 3184 | ], 3185 | [ 3186 | 0.5555555555555556, 3187 | "#d8576b" 3188 | ], 3189 | [ 3190 | 0.6666666666666666, 3191 | "#ed7953" 3192 | ], 3193 | [ 3194 | 0.7777777777777778, 3195 | "#fb9f3a" 3196 | ], 3197 | [ 3198 | 0.8888888888888888, 3199 | "#fdca26" 3200 | ], 3201 | [ 3202 | 1, 3203 | "#f0f921" 3204 | ] 3205 | ], 3206 | "type": "histogram2d" 3207 | } 3208 | ], 3209 | "histogram2dcontour": [ 3210 | { 3211 | "colorbar": { 3212 | "outlinewidth": 0, 3213 | "ticks": "" 3214 | }, 3215 | "colorscale": [ 3216 | [ 3217 | 0, 3218 | "#0d0887" 3219 | ], 3220 | [ 3221 | 0.1111111111111111, 3222 | "#46039f" 3223 | ], 3224 | [ 3225 | 0.2222222222222222, 3226 | "#7201a8" 3227 | ], 3228 | [ 3229 | 0.3333333333333333, 3230 | "#9c179e" 3231 | ], 3232 | [ 3233 | 0.4444444444444444, 3234 | "#bd3786" 3235 | ], 3236 | [ 3237 | 0.5555555555555556, 3238 | "#d8576b" 3239 | ], 3240 | [ 3241 | 0.6666666666666666, 3242 | "#ed7953" 3243 | ], 3244 | [ 3245 | 0.7777777777777778, 3246 | "#fb9f3a" 3247 | ], 3248 | [ 3249 | 0.8888888888888888, 3250 | "#fdca26" 3251 | ], 3252 | [ 3253 | 1, 3254 | "#f0f921" 3255 | ] 3256 | ], 3257 | "type": "histogram2dcontour" 3258 | } 3259 | ], 3260 | "mesh3d": [ 3261 | { 3262 | "colorbar": { 3263 | "outlinewidth": 0, 3264 | "ticks": "" 3265 | }, 3266 | "type": "mesh3d" 3267 | } 3268 | ], 3269 | "parcoords": [ 3270 | { 3271 | "line": { 3272 | "colorbar": { 3273 | "outlinewidth": 0, 3274 | "ticks": "" 3275 | } 3276 | }, 3277 | "type": "parcoords" 3278 | } 3279 | ], 3280 | "pie": [ 3281 | { 3282 | "automargin": true, 3283 | "type": "pie" 3284 | } 3285 | ], 3286 | "scatter": [ 3287 | { 3288 | "marker": { 3289 | "colorbar": { 3290 | "outlinewidth": 0, 3291 | "ticks": "" 3292 | } 3293 | }, 3294 | "type": "scatter" 3295 | } 3296 | ], 3297 | "scatter3d": [ 3298 | { 3299 | "line": { 3300 | "colorbar": { 3301 | "outlinewidth": 0, 3302 | "ticks": "" 3303 | } 3304 | }, 3305 | "marker": { 3306 | "colorbar": { 3307 | "outlinewidth": 0, 3308 | "ticks": "" 3309 | } 3310 | }, 3311 | "type": "scatter3d" 3312 | } 3313 | ], 3314 | "scattercarpet": [ 3315 | { 3316 | "marker": { 3317 | "colorbar": { 3318 | "outlinewidth": 0, 3319 | "ticks": "" 3320 | } 3321 | }, 3322 | "type": "scattercarpet" 3323 | } 3324 | ], 3325 | "scattergeo": [ 3326 | { 3327 | "marker": { 3328 | "colorbar": { 3329 | "outlinewidth": 0, 3330 | "ticks": "" 3331 | } 3332 | }, 3333 | "type": "scattergeo" 3334 | } 3335 | ], 3336 | "scattergl": [ 3337 | { 3338 | "marker": { 3339 | "colorbar": { 3340 | "outlinewidth": 0, 3341 | "ticks": "" 3342 | } 3343 | }, 3344 | "type": "scattergl" 3345 | } 3346 | ], 3347 | "scattermapbox": [ 3348 | { 3349 | "marker": { 3350 | "colorbar": { 3351 | "outlinewidth": 0, 3352 | "ticks": "" 3353 | } 3354 | }, 3355 | "type": "scattermapbox" 3356 | } 3357 | ], 3358 | "scatterpolar": [ 3359 | { 3360 | "marker": { 3361 | "colorbar": { 3362 | "outlinewidth": 0, 3363 | "ticks": "" 3364 | } 3365 | }, 3366 | "type": "scatterpolar" 3367 | } 3368 | ], 3369 | "scatterpolargl": [ 3370 | { 3371 | "marker": { 3372 | "colorbar": { 3373 | "outlinewidth": 0, 3374 | "ticks": "" 3375 | } 3376 | }, 3377 | "type": "scatterpolargl" 3378 | } 3379 | ], 3380 | "scatterternary": [ 3381 | { 3382 | "marker": { 3383 | "colorbar": { 3384 | "outlinewidth": 0, 3385 | "ticks": "" 3386 | } 3387 | }, 3388 | "type": "scatterternary" 3389 | } 3390 | ], 3391 | "surface": [ 3392 | { 3393 | "colorbar": { 3394 | "outlinewidth": 0, 3395 | "ticks": "" 3396 | }, 3397 | "colorscale": [ 3398 | [ 3399 | 0, 3400 | "#0d0887" 3401 | ], 3402 | [ 3403 | 0.1111111111111111, 3404 | "#46039f" 3405 | ], 3406 | [ 3407 | 0.2222222222222222, 3408 | "#7201a8" 3409 | ], 3410 | [ 3411 | 0.3333333333333333, 3412 | "#9c179e" 3413 | ], 3414 | [ 3415 | 0.4444444444444444, 3416 | "#bd3786" 3417 | ], 3418 | [ 3419 | 0.5555555555555556, 3420 | "#d8576b" 3421 | ], 3422 | [ 3423 | 0.6666666666666666, 3424 | "#ed7953" 3425 | ], 3426 | [ 3427 | 0.7777777777777778, 3428 | "#fb9f3a" 3429 | ], 3430 | [ 3431 | 0.8888888888888888, 3432 | "#fdca26" 3433 | ], 3434 | [ 3435 | 1, 3436 | "#f0f921" 3437 | ] 3438 | ], 3439 | "type": "surface" 3440 | } 3441 | ], 3442 | "table": [ 3443 | { 3444 | "cells": { 3445 | "fill": { 3446 | "color": "#EBF0F8" 3447 | }, 3448 | "line": { 3449 | "color": "white" 3450 | } 3451 | }, 3452 | "header": { 3453 | "fill": { 3454 | "color": "#C8D4E3" 3455 | }, 3456 | "line": { 3457 | "color": "white" 3458 | } 3459 | }, 3460 | "type": "table" 3461 | } 3462 | ] 3463 | }, 3464 | "layout": { 3465 | "annotationdefaults": { 3466 | "arrowcolor": "#2a3f5f", 3467 | "arrowhead": 0, 3468 | "arrowwidth": 1 3469 | }, 3470 | "coloraxis": { 3471 | "colorbar": { 3472 | "outlinewidth": 0, 3473 | "ticks": "" 3474 | } 3475 | }, 3476 | "colorscale": { 3477 | "diverging": [ 3478 | [ 3479 | 0, 3480 | "#8e0152" 3481 | ], 3482 | [ 3483 | 0.1, 3484 | "#c51b7d" 3485 | ], 3486 | [ 3487 | 0.2, 3488 | "#de77ae" 3489 | ], 3490 | [ 3491 | 0.3, 3492 | "#f1b6da" 3493 | ], 3494 | [ 3495 | 0.4, 3496 | "#fde0ef" 3497 | ], 3498 | [ 3499 | 0.5, 3500 | "#f7f7f7" 3501 | ], 3502 | [ 3503 | 0.6, 3504 | "#e6f5d0" 3505 | ], 3506 | [ 3507 | 0.7, 3508 | "#b8e186" 3509 | ], 3510 | [ 3511 | 0.8, 3512 | "#7fbc41" 3513 | ], 3514 | [ 3515 | 0.9, 3516 | "#4d9221" 3517 | ], 3518 | [ 3519 | 1, 3520 | "#276419" 3521 | ] 3522 | ], 3523 | "sequential": [ 3524 | [ 3525 | 0, 3526 | "#0d0887" 3527 | ], 3528 | [ 3529 | 0.1111111111111111, 3530 | "#46039f" 3531 | ], 3532 | [ 3533 | 0.2222222222222222, 3534 | "#7201a8" 3535 | ], 3536 | [ 3537 | 0.3333333333333333, 3538 | "#9c179e" 3539 | ], 3540 | [ 3541 | 0.4444444444444444, 3542 | "#bd3786" 3543 | ], 3544 | [ 3545 | 0.5555555555555556, 3546 | "#d8576b" 3547 | ], 3548 | [ 3549 | 0.6666666666666666, 3550 | "#ed7953" 3551 | ], 3552 | [ 3553 | 0.7777777777777778, 3554 | "#fb9f3a" 3555 | ], 3556 | [ 3557 | 0.8888888888888888, 3558 | "#fdca26" 3559 | ], 3560 | [ 3561 | 1, 3562 | "#f0f921" 3563 | ] 3564 | ], 3565 | "sequentialminus": [ 3566 | [ 3567 | 0, 3568 | "#0d0887" 3569 | ], 3570 | [ 3571 | 0.1111111111111111, 3572 | "#46039f" 3573 | ], 3574 | [ 3575 | 0.2222222222222222, 3576 | "#7201a8" 3577 | ], 3578 | [ 3579 | 0.3333333333333333, 3580 | "#9c179e" 3581 | ], 3582 | [ 3583 | 0.4444444444444444, 3584 | "#bd3786" 3585 | ], 3586 | [ 3587 | 0.5555555555555556, 3588 | "#d8576b" 3589 | ], 3590 | [ 3591 | 0.6666666666666666, 3592 | "#ed7953" 3593 | ], 3594 | [ 3595 | 0.7777777777777778, 3596 | "#fb9f3a" 3597 | ], 3598 | [ 3599 | 0.8888888888888888, 3600 | "#fdca26" 3601 | ], 3602 | [ 3603 | 1, 3604 | "#f0f921" 3605 | ] 3606 | ] 3607 | }, 3608 | "colorway": [ 3609 | "#636efa", 3610 | "#EF553B", 3611 | "#00cc96", 3612 | "#ab63fa", 3613 | "#FFA15A", 3614 | "#19d3f3", 3615 | "#FF6692", 3616 | "#B6E880", 3617 | "#FF97FF", 3618 | "#FECB52" 3619 | ], 3620 | "font": { 3621 | "color": "#2a3f5f" 3622 | }, 3623 | "geo": { 3624 | "bgcolor": "white", 3625 | "lakecolor": "white", 3626 | "landcolor": "#E5ECF6", 3627 | "showlakes": true, 3628 | "showland": true, 3629 | "subunitcolor": "white" 3630 | }, 3631 | "hoverlabel": { 3632 | "align": "left" 3633 | }, 3634 | "hovermode": "closest", 3635 | "mapbox": { 3636 | "style": "light" 3637 | }, 3638 | "paper_bgcolor": "white", 3639 | "plot_bgcolor": "#E5ECF6", 3640 | "polar": { 3641 | "angularaxis": { 3642 | "gridcolor": "white", 3643 | "linecolor": "white", 3644 | "ticks": "" 3645 | }, 3646 | "bgcolor": "#E5ECF6", 3647 | "radialaxis": { 3648 | "gridcolor": "white", 3649 | "linecolor": "white", 3650 | "ticks": "" 3651 | } 3652 | }, 3653 | "scene": { 3654 | "xaxis": { 3655 | "backgroundcolor": "#E5ECF6", 3656 | "gridcolor": "white", 3657 | "gridwidth": 2, 3658 | "linecolor": "white", 3659 | "showbackground": true, 3660 | "ticks": "", 3661 | "zerolinecolor": "white" 3662 | }, 3663 | "yaxis": { 3664 | "backgroundcolor": "#E5ECF6", 3665 | "gridcolor": "white", 3666 | "gridwidth": 2, 3667 | "linecolor": "white", 3668 | "showbackground": true, 3669 | "ticks": "", 3670 | "zerolinecolor": "white" 3671 | }, 3672 | "zaxis": { 3673 | "backgroundcolor": "#E5ECF6", 3674 | "gridcolor": "white", 3675 | "gridwidth": 2, 3676 | "linecolor": "white", 3677 | "showbackground": true, 3678 | "ticks": "", 3679 | "zerolinecolor": "white" 3680 | } 3681 | }, 3682 | "shapedefaults": { 3683 | "line": { 3684 | "color": "#2a3f5f" 3685 | } 3686 | }, 3687 | "ternary": { 3688 | "aaxis": { 3689 | "gridcolor": "white", 3690 | "linecolor": "white", 3691 | "ticks": "" 3692 | }, 3693 | "baxis": { 3694 | "gridcolor": "white", 3695 | "linecolor": "white", 3696 | "ticks": "" 3697 | }, 3698 | "bgcolor": "#E5ECF6", 3699 | "caxis": { 3700 | "gridcolor": "white", 3701 | "linecolor": "white", 3702 | "ticks": "" 3703 | } 3704 | }, 3705 | "title": { 3706 | "x": 0.05 3707 | }, 3708 | "xaxis": { 3709 | "automargin": true, 3710 | "gridcolor": "white", 3711 | "linecolor": "white", 3712 | "ticks": "", 3713 | "title": { 3714 | "standoff": 15 3715 | }, 3716 | "zerolinecolor": "white", 3717 | "zerolinewidth": 2 3718 | }, 3719 | "yaxis": { 3720 | "automargin": true, 3721 | "gridcolor": "white", 3722 | "linecolor": "white", 3723 | "ticks": "", 3724 | "title": { 3725 | "standoff": 15 3726 | }, 3727 | "zerolinecolor": "white", 3728 | "zerolinewidth": 2 3729 | } 3730 | } 3731 | }, 3732 | "title": { 3733 | "text": "GBM Model Feature Importance" 3734 | }, 3735 | "xaxis": { 3736 | "showgrid": false, 3737 | "showline": false, 3738 | "ticklen": 5, 3739 | "zeroline": false 3740 | }, 3741 | "yaxis": { 3742 | "gridwidth": 2, 3743 | "showgrid": false, 3744 | "ticklen": 5, 3745 | "title": { 3746 | "text": "Feature Importance" 3747 | }, 3748 | "zeroline": false 3749 | } 3750 | } 3751 | }, 3752 | "text/html": [ 3753 | "
\n", 3754 | " \n", 3755 | " \n", 3756 | "
\n", 3757 | " \n", 3795 | "
" 3796 | ] 3797 | }, 3798 | "metadata": {}, 3799 | "output_type": "display_data" 3800 | } 3801 | ], 3802 | "source": [ 3803 | "# Scatter plot \n", 3804 | "trace = go.Scatter(\n", 3805 | " y = gb.feature_importances_,\n", 3806 | " x = attrition_final.columns.values,\n", 3807 | " mode='markers',\n", 3808 | " marker=dict(\n", 3809 | " sizemode = 'diameter',\n", 3810 | " sizeref = 1.3,\n", 3811 | " size = 12,\n", 3812 | " color = gb.feature_importances_,\n", 3813 | " colorscale='Portland',\n", 3814 | " showscale=True\n", 3815 | " ),\n", 3816 | " text = attrition_final.columns.values\n", 3817 | ")\n", 3818 | "data = [trace]\n", 3819 | "\n", 3820 | "layout= go.Layout(\n", 3821 | " autosize= True,\n", 3822 | " title= 'GBM Model Feature Importance',\n", 3823 | " hovermode= 'closest',\n", 3824 | " xaxis= dict(\n", 3825 | " ticklen= 5,\n", 3826 | " showgrid=False,\n", 3827 | " zeroline=False,\n", 3828 | " showline=False\n", 3829 | " ),\n", 3830 | " yaxis=dict(\n", 3831 | " title= 'Feature Importance',\n", 3832 | " showgrid=False,\n", 3833 | " zeroline=False,\n", 3834 | " ticklen= 5,\n", 3835 | " gridwidth= 2\n", 3836 | " ),\n", 3837 | " showlegend= False\n", 3838 | ")\n", 3839 | "fig = go.Figure(data=data, layout=layout)\n", 3840 | "py.iplot(fig,filename='scatter')" 3841 | ] 3842 | } 3843 | ], 3844 | "metadata": { 3845 | "_change_revision": 2, 3846 | "_is_fork": false, 3847 | "kernelspec": { 3848 | "display_name": "Python 3", 3849 | "language": "python", 3850 | "name": "python3" 3851 | }, 3852 | "language_info": { 3853 | "codemirror_mode": { 3854 | "name": "ipython", 3855 | "version": 3 3856 | }, 3857 | "file_extension": ".py", 3858 | "mimetype": "text/x-python", 3859 | "name": "python", 3860 | "nbconvert_exporter": "python", 3861 | "pygments_lexer": "ipython3", 3862 | "version": "3.7.7" 3863 | } 3864 | }, 3865 | "nbformat": 4, 3866 | "nbformat_minor": 1 3867 | } 3868 | --------------------------------------------------------------------------------