├── README.md
├── Gradient Boosting
├── init
└── GBM-Classification.ipynb
└── Introduction to Boosting
├── init
└── Adaboost-Classifier-Updated.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Boosting
--------------------------------------------------------------------------------
/Gradient Boosting/init:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Introduction to Boosting/init:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Introduction to Boosting/Adaboost-Classifier-Updated.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#Importing the libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import seaborn as sns\n",
13 | "import matplotlib.pyplot as plt\n",
14 | "%matplotlib inline\n",
15 | "import plotly.offline as py\n",
16 | "import plotly.graph_objs as go\n",
17 | "from sklearn.model_selection import train_test_split\n",
18 | "from sklearn.tree import DecisionTreeClassifier\n",
19 | "from sklearn.ensemble import AdaBoostClassifier\n",
20 | "from sklearn.preprocessing import StandardScaler\n",
21 | "from sklearn.metrics import (accuracy_score, log_loss, confusion_matrix)\n",
22 | "#Suppressing warnings\n",
23 | "import warnings\n",
24 | "warnings.filterwarnings('ignore')"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "# Step 1 - Loading Dataset"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {
38 | "scrolled": true
39 | },
40 | "outputs": [],
41 | "source": [
42 | "#Importing the Dataset\n",
43 | "df = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "df.head(3)\n",
53 | "\n",
54 | "# Dep Var - Attrition ( Yes/No) - Binary Classification Problem \n",
55 | "\n",
56 | "# 34 - indepedent variable "
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {},
63 | "outputs": [],
64 | "source": [
65 | "#Checking the number of 'Yes' and 'No' in 'Attrition'\n",
66 | "ax = sns.catplot(x=\"Attrition\", kind=\"count\", palette=\"ch:.25\", data=df);\n",
67 | "ax.set(xlabel = 'Attrition', ylabel = 'Number of Employees')\n",
68 | "plt.show()"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "checking if any missing values in the dataframe. "
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "#Identifying columns with missing information\n",
85 | "missing_col = df.columns[df.isnull().any()].values\n",
86 | "print('The missing columns in the dataset are: ',missing_col)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "df.isnull().sum()"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "df.info()"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "## Step 2 - Feature Engineering\n",
112 | "\n",
113 | "The numeric and categorical fields need to be treated separately.The following few steps separate the numeric and categorical fields and drops the target field 'Attrition' from the feature set."
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "df.dtypes"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "df['JobRole'].head()"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "df.shape"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {},
147 | "outputs": [],
148 | "source": [
149 | "#Extracting the Numeric and Categorical features\n",
150 | "df_num = pd.DataFrame(data = df.select_dtypes(include = ['int64']))\n",
151 | "df_cat = pd.DataFrame(data = df.select_dtypes(include = ['object']))\n",
152 | "print(\"Shape of Numeric: \",df_num.shape)\n",
153 | "print(\"Shape of Categorical: \",df_cat.shape)"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "### 2.1 Encoding Categorical Fields\n",
161 | "\n",
162 | "The categorical fields have been encoded using the get_dummies() function of Pandas."
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": null,
168 | "metadata": {},
169 | "outputs": [],
170 | "source": [
171 | "#Dropping 'Attrition' from df_cat before encoding\n",
172 | "df_cat = df_cat.drop(['Attrition'], axis=1) \n",
173 | "\n",
174 | "#Encoding using Pandas' get_dummies\n",
175 | "df_cat_encoded = pd.get_dummies(df_cat)\n",
176 | "df_cat_encoded.head(5)"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "### 2.2 Scaling Numeric Fields\n",
184 | "\n",
185 | "The numeric fields have been scaled next for best results. `StandardScaler()` has been used for the same. After scaling the numeric features, they will be merged with the categorical features."
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "#Combining the Categorical and Numeric features\n",
195 | "df_transformed_final = pd.concat([df_num_scaled,df_cat_encoded], axis = 1)\n",
196 | "print(\"Shape of final dataframe: \",df_transformed_final.shape)"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": null,
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "#Combining the Categorical and Numeric features\n",
206 | "df_transformed_final = pd.concat([df_num,df_cat_encoded], axis = 1)\n",
207 | "print(\"Shape of final dataframe: \",df_transformed_final.shape)"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": null,
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "#Extracting the target variable - 'Attrition'\n",
217 | "target = df['Attrition']\n",
218 | "\n",
219 | "#Mapping 'Yes' to 1 and 'No' to 0\n",
220 | "map = {'Yes':1, 'No':0}\n",
221 | "target = target.apply(lambda x: map[x])\n",
222 | "\n",
223 | "print(\"Shape of target: \",target.shape)\n",
224 | "\n",
225 | "X = df_transformed_final #Features\n",
226 | "y = target #Target"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "metadata": {},
232 | "source": [
233 | "### 2.3 Train and Test Split\n",
234 | "\n",
235 | "The data is next split into training and test dataset using the train_test_split functionality of sklearn."
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "#Splitting into Train and Test dataset in 80-20 ratio\n",
245 | "X_train, X_test, y_train, y_test = train_test_split(X,y,train_size = 0.8, random_state = 0, stratify = y)\n",
246 | "print(\"Shape of X Train: \",X_train.shape)\n",
247 | "print(\"Shape of X Test: \",X_test.shape)\n",
248 | "print(\"Shape of y Train: \",y_train.shape)\n",
249 | "print(\"Shape of y Test: \",y_test.shape)"
250 | ]
251 | },
252 | {
253 | "cell_type": "markdown",
254 | "metadata": {},
255 | "source": [
256 | "## Step 3 - Model Fitting\n",
257 | "\n"
258 | ]
259 | },
260 | {
261 | "cell_type": "markdown",
262 | "metadata": {},
263 | "source": [
264 | "# Adaboost Classifier\n",
265 | "##### The most important parameters are base_estimator, n_estimators and learning_rate.\n",
266 | "\n",
267 | "##### 1. base_estimator - It is the learning algorithm to use to train the weak models. The default Learning Algorithm is DecisionTreeClassifier with Max Depth of 1\n",
268 | "\n",
269 | "##### 2. n_estimators - It is the number of models to iteratively train.\n",
270 | "\n",
271 | "##### 3.learning_rate - It is the contribution of each model to the weights and default value for it is 1. There is a trade-off between learning_rate and n_estimators. Reducing the learning rate will forcing the model train slower (but sometimes resulting in better performance scores). Decreasing the learning rate L makes the coefficients α_m smaller, which reduces the amplitude of the sample_weights at each step (As per weight formula we use at each step for updating weights). "
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "#Using adaBoosting to predict 'Attrition' \n",
281 | "adaboost = AdaBoostClassifier(n_estimators=200, random_state=1)\n",
282 | "\n",
283 | "\n",
284 | "# No of Models \n",
285 | "\n",
286 | "# from sklearn.ensemble.AdaBoost\n",
287 | "# Accuracy or AUC is chagin with no of Model ( Weak Models )"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": null,
293 | "metadata": {},
294 | "outputs": [],
295 | "source": [
296 | "#Fitting Model\n",
297 | "adaboost.fit(X_train, y_train)"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": null,
303 | "metadata": {},
304 | "outputs": [],
305 | "source": [
306 | "#pred\n",
307 | "y_pred = adaboost.predict(X_test)\n",
308 | "\n",
309 | "\n",
310 | "# from sklearn.metrics."
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": null,
316 | "metadata": {},
317 | "outputs": [],
318 | "source": [
319 | "print('Accuracy of the model is: ',accuracy_score(y_test, y_pred))"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": null,
325 | "metadata": {},
326 | "outputs": [],
327 | "source": [
328 | "#Confusion Matrix\n",
329 | "cm = confusion_matrix(y_test, y_pred)\n",
330 | "print('The confusion Matrix : \\n',cm)"
331 | ]
332 | }
333 | ],
334 | "metadata": {
335 | "kernelspec": {
336 | "display_name": "Python 3",
337 | "language": "python",
338 | "name": "python3"
339 | },
340 | "language_info": {
341 | "codemirror_mode": {
342 | "name": "ipython",
343 | "version": 3
344 | },
345 | "file_extension": ".py",
346 | "mimetype": "text/x-python",
347 | "name": "python",
348 | "nbconvert_exporter": "python",
349 | "pygments_lexer": "ipython3",
350 | "version": "3.7.7"
351 | }
352 | },
353 | "nbformat": 4,
354 | "nbformat_minor": 1
355 | }
356 |
--------------------------------------------------------------------------------
/Gradient Boosting/GBM-Classification.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {
7 | "_cell_guid": "adfbe30e-7ebb-0f88-d917-d9a8f97c638e"
8 | },
9 | "outputs": [
10 | {
11 | "data": {
12 | "text/html": [
13 | " \n",
28 | " "
29 | ]
30 | },
31 | "metadata": {},
32 | "output_type": "display_data"
33 | }
34 | ],
35 | "source": [
36 | "import numpy as np \n",
37 | "import pandas as pd \n",
38 | "\n",
39 | "\n",
40 | "import plotly.offline as py\n",
41 | "py.init_notebook_mode(connected=True)\n",
42 | "import plotly.graph_objs as go\n",
43 | "import plotly.tools as tls\n",
44 | "import seaborn as sns\n",
45 | "import matplotlib.pyplot as plt\n",
46 | "%matplotlib inline\n",
47 | "\n",
48 | "\n",
49 | "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
50 | "from sklearn.linear_model import LogisticRegression\n",
51 | "from sklearn.metrics import accuracy_score, log_loss\n",
52 | "from imblearn.over_sampling import SMOTE\n",
53 | "import xgboost as xgb\n",
54 | "from sklearn.model_selection import train_test_split\n",
55 | "\n",
56 | "\n",
57 | "# Import and suppress warnings\n",
58 | "import warnings\n",
59 | "warnings.filterwarnings('ignore')"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {
65 | "_cell_guid": "5af03c82-cb84-d943-f82c-fc0a15d46b48"
66 | },
67 | "source": [
68 | "# 1. Exploratory Data Analysis\n",
69 | "\n",
70 | "Let us load in the dataset via the trusty Pandas package into a dataframe object which we call **attrition** and have a quick look at the first few rows"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 3,
76 | "metadata": {
77 | "_cell_guid": "e035b071-50f8-43ca-9611-fc47272bb05e"
78 | },
79 | "outputs": [
80 | {
81 | "data": {
82 | "text/html": [
83 | "
\n",
84 | "\n",
97 | "
\n",
98 | " \n",
99 | " \n",
100 | " | \n",
101 | " Age | \n",
102 | " Attrition | \n",
103 | " BusinessTravel | \n",
104 | " DailyRate | \n",
105 | " Department | \n",
106 | " DistanceFromHome | \n",
107 | " Education | \n",
108 | " EducationField | \n",
109 | " EmployeeCount | \n",
110 | " EmployeeNumber | \n",
111 | " ... | \n",
112 | " RelationshipSatisfaction | \n",
113 | " StandardHours | \n",
114 | " StockOptionLevel | \n",
115 | " TotalWorkingYears | \n",
116 | " TrainingTimesLastYear | \n",
117 | " WorkLifeBalance | \n",
118 | " YearsAtCompany | \n",
119 | " YearsInCurrentRole | \n",
120 | " YearsSinceLastPromotion | \n",
121 | " YearsWithCurrManager | \n",
122 | "
\n",
123 | " \n",
124 | " \n",
125 | " \n",
126 | " | 0 | \n",
127 | " 41 | \n",
128 | " Yes | \n",
129 | " Travel_Rarely | \n",
130 | " 1102 | \n",
131 | " Sales | \n",
132 | " 1 | \n",
133 | " 2 | \n",
134 | " Life Sciences | \n",
135 | " 1 | \n",
136 | " 1 | \n",
137 | " ... | \n",
138 | " 1 | \n",
139 | " 80 | \n",
140 | " 0 | \n",
141 | " 8 | \n",
142 | " 0 | \n",
143 | " 1 | \n",
144 | " 6 | \n",
145 | " 4 | \n",
146 | " 0 | \n",
147 | " 5 | \n",
148 | "
\n",
149 | " \n",
150 | " | 1 | \n",
151 | " 49 | \n",
152 | " No | \n",
153 | " Travel_Frequently | \n",
154 | " 279 | \n",
155 | " Research & Development | \n",
156 | " 8 | \n",
157 | " 1 | \n",
158 | " Life Sciences | \n",
159 | " 1 | \n",
160 | " 2 | \n",
161 | " ... | \n",
162 | " 4 | \n",
163 | " 80 | \n",
164 | " 1 | \n",
165 | " 10 | \n",
166 | " 3 | \n",
167 | " 3 | \n",
168 | " 10 | \n",
169 | " 7 | \n",
170 | " 1 | \n",
171 | " 7 | \n",
172 | "
\n",
173 | " \n",
174 | " | 2 | \n",
175 | " 37 | \n",
176 | " Yes | \n",
177 | " Travel_Rarely | \n",
178 | " 1373 | \n",
179 | " Research & Development | \n",
180 | " 2 | \n",
181 | " 2 | \n",
182 | " Other | \n",
183 | " 1 | \n",
184 | " 4 | \n",
185 | " ... | \n",
186 | " 2 | \n",
187 | " 80 | \n",
188 | " 0 | \n",
189 | " 7 | \n",
190 | " 3 | \n",
191 | " 3 | \n",
192 | " 0 | \n",
193 | " 0 | \n",
194 | " 0 | \n",
195 | " 0 | \n",
196 | "
\n",
197 | " \n",
198 | " | 3 | \n",
199 | " 33 | \n",
200 | " No | \n",
201 | " Travel_Frequently | \n",
202 | " 1392 | \n",
203 | " Research & Development | \n",
204 | " 3 | \n",
205 | " 4 | \n",
206 | " Life Sciences | \n",
207 | " 1 | \n",
208 | " 5 | \n",
209 | " ... | \n",
210 | " 3 | \n",
211 | " 80 | \n",
212 | " 0 | \n",
213 | " 8 | \n",
214 | " 3 | \n",
215 | " 3 | \n",
216 | " 8 | \n",
217 | " 7 | \n",
218 | " 3 | \n",
219 | " 0 | \n",
220 | "
\n",
221 | " \n",
222 | " | 4 | \n",
223 | " 27 | \n",
224 | " No | \n",
225 | " Travel_Rarely | \n",
226 | " 591 | \n",
227 | " Research & Development | \n",
228 | " 2 | \n",
229 | " 1 | \n",
230 | " Medical | \n",
231 | " 1 | \n",
232 | " 7 | \n",
233 | " ... | \n",
234 | " 4 | \n",
235 | " 80 | \n",
236 | " 1 | \n",
237 | " 6 | \n",
238 | " 3 | \n",
239 | " 3 | \n",
240 | " 2 | \n",
241 | " 2 | \n",
242 | " 2 | \n",
243 | " 2 | \n",
244 | "
\n",
245 | " \n",
246 | "
\n",
247 | "
5 rows × 35 columns
\n",
248 | "
"
249 | ],
250 | "text/plain": [
251 | " Age Attrition BusinessTravel DailyRate Department \\\n",
252 | "0 41 Yes Travel_Rarely 1102 Sales \n",
253 | "1 49 No Travel_Frequently 279 Research & Development \n",
254 | "2 37 Yes Travel_Rarely 1373 Research & Development \n",
255 | "3 33 No Travel_Frequently 1392 Research & Development \n",
256 | "4 27 No Travel_Rarely 591 Research & Development \n",
257 | "\n",
258 | " DistanceFromHome Education EducationField EmployeeCount EmployeeNumber \\\n",
259 | "0 1 2 Life Sciences 1 1 \n",
260 | "1 8 1 Life Sciences 1 2 \n",
261 | "2 2 2 Other 1 4 \n",
262 | "3 3 4 Life Sciences 1 5 \n",
263 | "4 2 1 Medical 1 7 \n",
264 | "\n",
265 | " ... RelationshipSatisfaction StandardHours StockOptionLevel \\\n",
266 | "0 ... 1 80 0 \n",
267 | "1 ... 4 80 1 \n",
268 | "2 ... 2 80 0 \n",
269 | "3 ... 3 80 0 \n",
270 | "4 ... 4 80 1 \n",
271 | "\n",
272 | " TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany \\\n",
273 | "0 8 0 1 6 \n",
274 | "1 10 3 3 10 \n",
275 | "2 7 3 3 0 \n",
276 | "3 8 3 3 8 \n",
277 | "4 6 3 3 2 \n",
278 | "\n",
279 | " YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager \n",
280 | "0 4 0 5 \n",
281 | "1 7 1 7 \n",
282 | "2 0 0 0 \n",
283 | "3 7 3 0 \n",
284 | "4 2 2 2 \n",
285 | "\n",
286 | "[5 rows x 35 columns]"
287 | ]
288 | },
289 | "execution_count": 3,
290 | "metadata": {},
291 | "output_type": "execute_result"
292 | }
293 | ],
294 | "source": [
295 | "attrition = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')\n",
296 | "attrition.head()"
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": 4,
302 | "metadata": {
303 | "_cell_guid": "57e2bf45-5920-af03-50c1-b5bba334eb11"
304 | },
305 | "outputs": [
306 | {
307 | "data": {
308 | "text/plain": [
309 | "Age False\n",
310 | "Attrition False\n",
311 | "BusinessTravel False\n",
312 | "DailyRate False\n",
313 | "Department False\n",
314 | "DistanceFromHome False\n",
315 | "Education False\n",
316 | "EducationField False\n",
317 | "EmployeeCount False\n",
318 | "EmployeeNumber False\n",
319 | "EnvironmentSatisfaction False\n",
320 | "Gender False\n",
321 | "HourlyRate False\n",
322 | "JobInvolvement False\n",
323 | "JobLevel False\n",
324 | "JobRole False\n",
325 | "JobSatisfaction False\n",
326 | "MaritalStatus False\n",
327 | "MonthlyIncome False\n",
328 | "MonthlyRate False\n",
329 | "NumCompaniesWorked False\n",
330 | "Over18 False\n",
331 | "OverTime False\n",
332 | "PercentSalaryHike False\n",
333 | "PerformanceRating False\n",
334 | "RelationshipSatisfaction False\n",
335 | "StandardHours False\n",
336 | "StockOptionLevel False\n",
337 | "TotalWorkingYears False\n",
338 | "TrainingTimesLastYear False\n",
339 | "WorkLifeBalance False\n",
340 | "YearsAtCompany False\n",
341 | "YearsInCurrentRole False\n",
342 | "YearsSinceLastPromotion False\n",
343 | "YearsWithCurrManager False\n",
344 | "dtype: bool"
345 | ]
346 | },
347 | "execution_count": 4,
348 | "metadata": {},
349 | "output_type": "execute_result"
350 | }
351 | ],
352 | "source": [
353 | "# Looking for NaN\n",
354 | "attrition.isnull().any()"
355 | ]
356 | },
357 | {
358 | "cell_type": "code",
359 | "execution_count": 5,
360 | "metadata": {},
361 | "outputs": [],
362 | "source": [
363 | "# attrition.Age.fillna('')"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {
369 | "_cell_guid": "5c5dc2ed-7608-4d84-c4f6-c591a3be7570"
370 | },
371 | "source": [
372 | "### Correlation of Features\n"
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": 6,
378 | "metadata": {},
379 | "outputs": [
380 | {
381 | "data": {
382 | "text/html": [
383 | "\n",
384 | "\n",
397 | "
\n",
398 | " \n",
399 | " \n",
400 | " | \n",
401 | " Age | \n",
402 | " DailyRate | \n",
403 | " DistanceFromHome | \n",
404 | " Education | \n",
405 | " EmployeeCount | \n",
406 | " EmployeeNumber | \n",
407 | " EnvironmentSatisfaction | \n",
408 | " HourlyRate | \n",
409 | " JobInvolvement | \n",
410 | " JobLevel | \n",
411 | " ... | \n",
412 | " RelationshipSatisfaction | \n",
413 | " StandardHours | \n",
414 | " StockOptionLevel | \n",
415 | " TotalWorkingYears | \n",
416 | " TrainingTimesLastYear | \n",
417 | " WorkLifeBalance | \n",
418 | " YearsAtCompany | \n",
419 | " YearsInCurrentRole | \n",
420 | " YearsSinceLastPromotion | \n",
421 | " YearsWithCurrManager | \n",
422 | "
\n",
423 | " \n",
424 | " \n",
425 | " \n",
426 | " | Age | \n",
427 | " 1.000000 | \n",
428 | " 0.010661 | \n",
429 | " -0.001686 | \n",
430 | " 0.208034 | \n",
431 | " NaN | \n",
432 | " -0.010145 | \n",
433 | " 0.010146 | \n",
434 | " 0.024287 | \n",
435 | " 0.029820 | \n",
436 | " 0.509604 | \n",
437 | " ... | \n",
438 | " 0.053535 | \n",
439 | " NaN | \n",
440 | " 0.037510 | \n",
441 | " 0.680381 | \n",
442 | " -0.019621 | \n",
443 | " -0.021490 | \n",
444 | " 0.311309 | \n",
445 | " 0.212901 | \n",
446 | " 0.216513 | \n",
447 | " 0.202089 | \n",
448 | "
\n",
449 | " \n",
450 | " | DailyRate | \n",
451 | " 0.010661 | \n",
452 | " 1.000000 | \n",
453 | " -0.004985 | \n",
454 | " -0.016806 | \n",
455 | " NaN | \n",
456 | " -0.050990 | \n",
457 | " 0.018355 | \n",
458 | " 0.023381 | \n",
459 | " 0.046135 | \n",
460 | " 0.002966 | \n",
461 | " ... | \n",
462 | " 0.007846 | \n",
463 | " NaN | \n",
464 | " 0.042143 | \n",
465 | " 0.014515 | \n",
466 | " 0.002453 | \n",
467 | " -0.037848 | \n",
468 | " -0.034055 | \n",
469 | " 0.009932 | \n",
470 | " -0.033229 | \n",
471 | " -0.026363 | \n",
472 | "
\n",
473 | " \n",
474 | " | DistanceFromHome | \n",
475 | " -0.001686 | \n",
476 | " -0.004985 | \n",
477 | " 1.000000 | \n",
478 | " 0.021042 | \n",
479 | " NaN | \n",
480 | " 0.032916 | \n",
481 | " -0.016075 | \n",
482 | " 0.031131 | \n",
483 | " 0.008783 | \n",
484 | " 0.005303 | \n",
485 | " ... | \n",
486 | " 0.006557 | \n",
487 | " NaN | \n",
488 | " 0.044872 | \n",
489 | " 0.004628 | \n",
490 | " -0.036942 | \n",
491 | " -0.026556 | \n",
492 | " 0.009508 | \n",
493 | " 0.018845 | \n",
494 | " 0.010029 | \n",
495 | " 0.014406 | \n",
496 | "
\n",
497 | " \n",
498 | " | Education | \n",
499 | " 0.208034 | \n",
500 | " -0.016806 | \n",
501 | " 0.021042 | \n",
502 | " 1.000000 | \n",
503 | " NaN | \n",
504 | " 0.042070 | \n",
505 | " -0.027128 | \n",
506 | " 0.016775 | \n",
507 | " 0.042438 | \n",
508 | " 0.101589 | \n",
509 | " ... | \n",
510 | " -0.009118 | \n",
511 | " NaN | \n",
512 | " 0.018422 | \n",
513 | " 0.148280 | \n",
514 | " -0.025100 | \n",
515 | " 0.009819 | \n",
516 | " 0.069114 | \n",
517 | " 0.060236 | \n",
518 | " 0.054254 | \n",
519 | " 0.069065 | \n",
520 | "
\n",
521 | " \n",
522 | " | EmployeeCount | \n",
523 | " NaN | \n",
524 | " NaN | \n",
525 | " NaN | \n",
526 | " NaN | \n",
527 | " NaN | \n",
528 | " NaN | \n",
529 | " NaN | \n",
530 | " NaN | \n",
531 | " NaN | \n",
532 | " NaN | \n",
533 | " ... | \n",
534 | " NaN | \n",
535 | " NaN | \n",
536 | " NaN | \n",
537 | " NaN | \n",
538 | " NaN | \n",
539 | " NaN | \n",
540 | " NaN | \n",
541 | " NaN | \n",
542 | " NaN | \n",
543 | " NaN | \n",
544 | "
\n",
545 | " \n",
546 | " | EmployeeNumber | \n",
547 | " -0.010145 | \n",
548 | " -0.050990 | \n",
549 | " 0.032916 | \n",
550 | " 0.042070 | \n",
551 | " NaN | \n",
552 | " 1.000000 | \n",
553 | " 0.017621 | \n",
554 | " 0.035179 | \n",
555 | " -0.006888 | \n",
556 | " -0.018519 | \n",
557 | " ... | \n",
558 | " -0.069861 | \n",
559 | " NaN | \n",
560 | " 0.062227 | \n",
561 | " -0.014365 | \n",
562 | " 0.023603 | \n",
563 | " 0.010309 | \n",
564 | " -0.011240 | \n",
565 | " -0.008416 | \n",
566 | " -0.009019 | \n",
567 | " -0.009197 | \n",
568 | "
\n",
569 | " \n",
570 | " | EnvironmentSatisfaction | \n",
571 | " 0.010146 | \n",
572 | " 0.018355 | \n",
573 | " -0.016075 | \n",
574 | " -0.027128 | \n",
575 | " NaN | \n",
576 | " 0.017621 | \n",
577 | " 1.000000 | \n",
578 | " -0.049857 | \n",
579 | " -0.008278 | \n",
580 | " 0.001212 | \n",
581 | " ... | \n",
582 | " 0.007665 | \n",
583 | " NaN | \n",
584 | " 0.003432 | \n",
585 | " -0.002693 | \n",
586 | " -0.019359 | \n",
587 | " 0.027627 | \n",
588 | " 0.001458 | \n",
589 | " 0.018007 | \n",
590 | " 0.016194 | \n",
591 | " -0.004999 | \n",
592 | "
\n",
593 | " \n",
594 | " | HourlyRate | \n",
595 | " 0.024287 | \n",
596 | " 0.023381 | \n",
597 | " 0.031131 | \n",
598 | " 0.016775 | \n",
599 | " NaN | \n",
600 | " 0.035179 | \n",
601 | " -0.049857 | \n",
602 | " 1.000000 | \n",
603 | " 0.042861 | \n",
604 | " -0.027853 | \n",
605 | " ... | \n",
606 | " 0.001330 | \n",
607 | " NaN | \n",
608 | " 0.050263 | \n",
609 | " -0.002334 | \n",
610 | " -0.008548 | \n",
611 | " -0.004607 | \n",
612 | " -0.019582 | \n",
613 | " -0.024106 | \n",
614 | " -0.026716 | \n",
615 | " -0.020123 | \n",
616 | "
\n",
617 | " \n",
618 | " | JobInvolvement | \n",
619 | " 0.029820 | \n",
620 | " 0.046135 | \n",
621 | " 0.008783 | \n",
622 | " 0.042438 | \n",
623 | " NaN | \n",
624 | " -0.006888 | \n",
625 | " -0.008278 | \n",
626 | " 0.042861 | \n",
627 | " 1.000000 | \n",
628 | " -0.012630 | \n",
629 | " ... | \n",
630 | " 0.034297 | \n",
631 | " NaN | \n",
632 | " 0.021523 | \n",
633 | " -0.005533 | \n",
634 | " -0.015338 | \n",
635 | " -0.014617 | \n",
636 | " -0.021355 | \n",
637 | " 0.008717 | \n",
638 | " -0.024184 | \n",
639 | " 0.025976 | \n",
640 | "
\n",
641 | " \n",
642 | " | JobLevel | \n",
643 | " 0.509604 | \n",
644 | " 0.002966 | \n",
645 | " 0.005303 | \n",
646 | " 0.101589 | \n",
647 | " NaN | \n",
648 | " -0.018519 | \n",
649 | " 0.001212 | \n",
650 | " -0.027853 | \n",
651 | " -0.012630 | \n",
652 | " 1.000000 | \n",
653 | " ... | \n",
654 | " 0.021642 | \n",
655 | " NaN | \n",
656 | " 0.013984 | \n",
657 | " 0.782208 | \n",
658 | " -0.018191 | \n",
659 | " 0.037818 | \n",
660 | " 0.534739 | \n",
661 | " 0.389447 | \n",
662 | " 0.353885 | \n",
663 | " 0.375281 | \n",
664 | "
\n",
665 | " \n",
666 | " | JobSatisfaction | \n",
667 | " -0.004892 | \n",
668 | " 0.030571 | \n",
669 | " -0.003669 | \n",
670 | " -0.011296 | \n",
671 | " NaN | \n",
672 | " -0.046247 | \n",
673 | " -0.006784 | \n",
674 | " -0.071335 | \n",
675 | " -0.021476 | \n",
676 | " -0.001944 | \n",
677 | " ... | \n",
678 | " -0.012454 | \n",
679 | " NaN | \n",
680 | " 0.010690 | \n",
681 | " -0.020185 | \n",
682 | " -0.005779 | \n",
683 | " -0.019459 | \n",
684 | " -0.003803 | \n",
685 | " -0.002305 | \n",
686 | " -0.018214 | \n",
687 | " -0.027656 | \n",
688 | "
\n",
689 | " \n",
690 | " | MonthlyIncome | \n",
691 | " 0.497855 | \n",
692 | " 0.007707 | \n",
693 | " -0.017014 | \n",
694 | " 0.094961 | \n",
695 | " NaN | \n",
696 | " -0.014829 | \n",
697 | " -0.006259 | \n",
698 | " -0.015794 | \n",
699 | " -0.015271 | \n",
700 | " 0.950300 | \n",
701 | " ... | \n",
702 | " 0.025873 | \n",
703 | " NaN | \n",
704 | " 0.005408 | \n",
705 | " 0.772893 | \n",
706 | " -0.021736 | \n",
707 | " 0.030683 | \n",
708 | " 0.514285 | \n",
709 | " 0.363818 | \n",
710 | " 0.344978 | \n",
711 | " 0.344079 | \n",
712 | "
\n",
713 | " \n",
714 | " | MonthlyRate | \n",
715 | " 0.028051 | \n",
716 | " -0.032182 | \n",
717 | " 0.027473 | \n",
718 | " -0.026084 | \n",
719 | " NaN | \n",
720 | " 0.012648 | \n",
721 | " 0.037600 | \n",
722 | " -0.015297 | \n",
723 | " -0.016322 | \n",
724 | " 0.039563 | \n",
725 | " ... | \n",
726 | " -0.004085 | \n",
727 | " NaN | \n",
728 | " -0.034323 | \n",
729 | " 0.026442 | \n",
730 | " 0.001467 | \n",
731 | " 0.007963 | \n",
732 | " -0.023655 | \n",
733 | " -0.012815 | \n",
734 | " 0.001567 | \n",
735 | " -0.036746 | \n",
736 | "
\n",
737 | " \n",
738 | " | NumCompaniesWorked | \n",
739 | " 0.299635 | \n",
740 | " 0.038153 | \n",
741 | " -0.029251 | \n",
742 | " 0.126317 | \n",
743 | " NaN | \n",
744 | " -0.001251 | \n",
745 | " 0.012594 | \n",
746 | " 0.022157 | \n",
747 | " 0.015012 | \n",
748 | " 0.142501 | \n",
749 | " ... | \n",
750 | " 0.052733 | \n",
751 | " NaN | \n",
752 | " 0.030075 | \n",
753 | " 0.237639 | \n",
754 | " -0.066054 | \n",
755 | " -0.008366 | \n",
756 | " -0.118421 | \n",
757 | " -0.090754 | \n",
758 | " -0.036814 | \n",
759 | " -0.110319 | \n",
760 | "
\n",
761 | " \n",
762 | " | PercentSalaryHike | \n",
763 | " 0.003634 | \n",
764 | " 0.022704 | \n",
765 | " 0.040235 | \n",
766 | " -0.011111 | \n",
767 | " NaN | \n",
768 | " -0.012944 | \n",
769 | " -0.031701 | \n",
770 | " -0.009062 | \n",
771 | " -0.017205 | \n",
772 | " -0.034730 | \n",
773 | " ... | \n",
774 | " -0.040490 | \n",
775 | " NaN | \n",
776 | " 0.007528 | \n",
777 | " -0.020608 | \n",
778 | " -0.005221 | \n",
779 | " -0.003280 | \n",
780 | " -0.035991 | \n",
781 | " -0.001520 | \n",
782 | " -0.022154 | \n",
783 | " -0.011985 | \n",
784 | "
\n",
785 | " \n",
786 | " | PerformanceRating | \n",
787 | " 0.001904 | \n",
788 | " 0.000473 | \n",
789 | " 0.027110 | \n",
790 | " -0.024539 | \n",
791 | " NaN | \n",
792 | " -0.020359 | \n",
793 | " -0.029548 | \n",
794 | " -0.002172 | \n",
795 | " -0.029071 | \n",
796 | " -0.021222 | \n",
797 | " ... | \n",
798 | " -0.031351 | \n",
799 | " NaN | \n",
800 | " 0.003506 | \n",
801 | " 0.006744 | \n",
802 | " -0.015579 | \n",
803 | " 0.002572 | \n",
804 | " 0.003435 | \n",
805 | " 0.034986 | \n",
806 | " 0.017896 | \n",
807 | " 0.022827 | \n",
808 | "
\n",
809 | " \n",
810 | " | RelationshipSatisfaction | \n",
811 | " 0.053535 | \n",
812 | " 0.007846 | \n",
813 | " 0.006557 | \n",
814 | " -0.009118 | \n",
815 | " NaN | \n",
816 | " -0.069861 | \n",
817 | " 0.007665 | \n",
818 | " 0.001330 | \n",
819 | " 0.034297 | \n",
820 | " 0.021642 | \n",
821 | " ... | \n",
822 | " 1.000000 | \n",
823 | " NaN | \n",
824 | " -0.045952 | \n",
825 | " 0.024054 | \n",
826 | " 0.002497 | \n",
827 | " 0.019604 | \n",
828 | " 0.019367 | \n",
829 | " -0.015123 | \n",
830 | " 0.033493 | \n",
831 | " -0.000867 | \n",
832 | "
\n",
833 | " \n",
834 | " | StandardHours | \n",
835 | " NaN | \n",
836 | " NaN | \n",
837 | " NaN | \n",
838 | " NaN | \n",
839 | " NaN | \n",
840 | " NaN | \n",
841 | " NaN | \n",
842 | " NaN | \n",
843 | " NaN | \n",
844 | " NaN | \n",
845 | " ... | \n",
846 | " NaN | \n",
847 | " NaN | \n",
848 | " NaN | \n",
849 | " NaN | \n",
850 | " NaN | \n",
851 | " NaN | \n",
852 | " NaN | \n",
853 | " NaN | \n",
854 | " NaN | \n",
855 | " NaN | \n",
856 | "
\n",
857 | " \n",
858 | " | StockOptionLevel | \n",
859 | " 0.037510 | \n",
860 | " 0.042143 | \n",
861 | " 0.044872 | \n",
862 | " 0.018422 | \n",
863 | " NaN | \n",
864 | " 0.062227 | \n",
865 | " 0.003432 | \n",
866 | " 0.050263 | \n",
867 | " 0.021523 | \n",
868 | " 0.013984 | \n",
869 | " ... | \n",
870 | " -0.045952 | \n",
871 | " NaN | \n",
872 | " 1.000000 | \n",
873 | " 0.010136 | \n",
874 | " 0.011274 | \n",
875 | " 0.004129 | \n",
876 | " 0.015058 | \n",
877 | " 0.050818 | \n",
878 | " 0.014352 | \n",
879 | " 0.024698 | \n",
880 | "
\n",
881 | " \n",
882 | " | TotalWorkingYears | \n",
883 | " 0.680381 | \n",
884 | " 0.014515 | \n",
885 | " 0.004628 | \n",
886 | " 0.148280 | \n",
887 | " NaN | \n",
888 | " -0.014365 | \n",
889 | " -0.002693 | \n",
890 | " -0.002334 | \n",
891 | " -0.005533 | \n",
892 | " 0.782208 | \n",
893 | " ... | \n",
894 | " 0.024054 | \n",
895 | " NaN | \n",
896 | " 0.010136 | \n",
897 | " 1.000000 | \n",
898 | " -0.035662 | \n",
899 | " 0.001008 | \n",
900 | " 0.628133 | \n",
901 | " 0.460365 | \n",
902 | " 0.404858 | \n",
903 | " 0.459188 | \n",
904 | "
\n",
905 | " \n",
906 | " | TrainingTimesLastYear | \n",
907 | " -0.019621 | \n",
908 | " 0.002453 | \n",
909 | " -0.036942 | \n",
910 | " -0.025100 | \n",
911 | " NaN | \n",
912 | " 0.023603 | \n",
913 | " -0.019359 | \n",
914 | " -0.008548 | \n",
915 | " -0.015338 | \n",
916 | " -0.018191 | \n",
917 | " ... | \n",
918 | " 0.002497 | \n",
919 | " NaN | \n",
920 | " 0.011274 | \n",
921 | " -0.035662 | \n",
922 | " 1.000000 | \n",
923 | " 0.028072 | \n",
924 | " 0.003569 | \n",
925 | " -0.005738 | \n",
926 | " -0.002067 | \n",
927 | " -0.004096 | \n",
928 | "
\n",
929 | " \n",
930 | " | WorkLifeBalance | \n",
931 | " -0.021490 | \n",
932 | " -0.037848 | \n",
933 | " -0.026556 | \n",
934 | " 0.009819 | \n",
935 | " NaN | \n",
936 | " 0.010309 | \n",
937 | " 0.027627 | \n",
938 | " -0.004607 | \n",
939 | " -0.014617 | \n",
940 | " 0.037818 | \n",
941 | " ... | \n",
942 | " 0.019604 | \n",
943 | " NaN | \n",
944 | " 0.004129 | \n",
945 | " 0.001008 | \n",
946 | " 0.028072 | \n",
947 | " 1.000000 | \n",
948 | " 0.012089 | \n",
949 | " 0.049856 | \n",
950 | " 0.008941 | \n",
951 | " 0.002759 | \n",
952 | "
\n",
953 | " \n",
954 | " | YearsAtCompany | \n",
955 | " 0.311309 | \n",
956 | " -0.034055 | \n",
957 | " 0.009508 | \n",
958 | " 0.069114 | \n",
959 | " NaN | \n",
960 | " -0.011240 | \n",
961 | " 0.001458 | \n",
962 | " -0.019582 | \n",
963 | " -0.021355 | \n",
964 | " 0.534739 | \n",
965 | " ... | \n",
966 | " 0.019367 | \n",
967 | " NaN | \n",
968 | " 0.015058 | \n",
969 | " 0.628133 | \n",
970 | " 0.003569 | \n",
971 | " 0.012089 | \n",
972 | " 1.000000 | \n",
973 | " 0.758754 | \n",
974 | " 0.618409 | \n",
975 | " 0.769212 | \n",
976 | "
\n",
977 | " \n",
978 | " | YearsInCurrentRole | \n",
979 | " 0.212901 | \n",
980 | " 0.009932 | \n",
981 | " 0.018845 | \n",
982 | " 0.060236 | \n",
983 | " NaN | \n",
984 | " -0.008416 | \n",
985 | " 0.018007 | \n",
986 | " -0.024106 | \n",
987 | " 0.008717 | \n",
988 | " 0.389447 | \n",
989 | " ... | \n",
990 | " -0.015123 | \n",
991 | " NaN | \n",
992 | " 0.050818 | \n",
993 | " 0.460365 | \n",
994 | " -0.005738 | \n",
995 | " 0.049856 | \n",
996 | " 0.758754 | \n",
997 | " 1.000000 | \n",
998 | " 0.548056 | \n",
999 | " 0.714365 | \n",
1000 | "
\n",
1001 | " \n",
1002 | " | YearsSinceLastPromotion | \n",
1003 | " 0.216513 | \n",
1004 | " -0.033229 | \n",
1005 | " 0.010029 | \n",
1006 | " 0.054254 | \n",
1007 | " NaN | \n",
1008 | " -0.009019 | \n",
1009 | " 0.016194 | \n",
1010 | " -0.026716 | \n",
1011 | " -0.024184 | \n",
1012 | " 0.353885 | \n",
1013 | " ... | \n",
1014 | " 0.033493 | \n",
1015 | " NaN | \n",
1016 | " 0.014352 | \n",
1017 | " 0.404858 | \n",
1018 | " -0.002067 | \n",
1019 | " 0.008941 | \n",
1020 | " 0.618409 | \n",
1021 | " 0.548056 | \n",
1022 | " 1.000000 | \n",
1023 | " 0.510224 | \n",
1024 | "
\n",
1025 | " \n",
1026 | " | YearsWithCurrManager | \n",
1027 | " 0.202089 | \n",
1028 | " -0.026363 | \n",
1029 | " 0.014406 | \n",
1030 | " 0.069065 | \n",
1031 | " NaN | \n",
1032 | " -0.009197 | \n",
1033 | " -0.004999 | \n",
1034 | " -0.020123 | \n",
1035 | " 0.025976 | \n",
1036 | " 0.375281 | \n",
1037 | " ... | \n",
1038 | " -0.000867 | \n",
1039 | " NaN | \n",
1040 | " 0.024698 | \n",
1041 | " 0.459188 | \n",
1042 | " -0.004096 | \n",
1043 | " 0.002759 | \n",
1044 | " 0.769212 | \n",
1045 | " 0.714365 | \n",
1046 | " 0.510224 | \n",
1047 | " 1.000000 | \n",
1048 | "
\n",
1049 | " \n",
1050 | "
\n",
1051 | "
26 rows × 26 columns
\n",
1052 | "
"
1053 | ],
1054 | "text/plain": [
1055 | " Age DailyRate DistanceFromHome Education \\\n",
1056 | "Age 1.000000 0.010661 -0.001686 0.208034 \n",
1057 | "DailyRate 0.010661 1.000000 -0.004985 -0.016806 \n",
1058 | "DistanceFromHome -0.001686 -0.004985 1.000000 0.021042 \n",
1059 | "Education 0.208034 -0.016806 0.021042 1.000000 \n",
1060 | "EmployeeCount NaN NaN NaN NaN \n",
1061 | "EmployeeNumber -0.010145 -0.050990 0.032916 0.042070 \n",
1062 | "EnvironmentSatisfaction 0.010146 0.018355 -0.016075 -0.027128 \n",
1063 | "HourlyRate 0.024287 0.023381 0.031131 0.016775 \n",
1064 | "JobInvolvement 0.029820 0.046135 0.008783 0.042438 \n",
1065 | "JobLevel 0.509604 0.002966 0.005303 0.101589 \n",
1066 | "JobSatisfaction -0.004892 0.030571 -0.003669 -0.011296 \n",
1067 | "MonthlyIncome 0.497855 0.007707 -0.017014 0.094961 \n",
1068 | "MonthlyRate 0.028051 -0.032182 0.027473 -0.026084 \n",
1069 | "NumCompaniesWorked 0.299635 0.038153 -0.029251 0.126317 \n",
1070 | "PercentSalaryHike 0.003634 0.022704 0.040235 -0.011111 \n",
1071 | "PerformanceRating 0.001904 0.000473 0.027110 -0.024539 \n",
1072 | "RelationshipSatisfaction 0.053535 0.007846 0.006557 -0.009118 \n",
1073 | "StandardHours NaN NaN NaN NaN \n",
1074 | "StockOptionLevel 0.037510 0.042143 0.044872 0.018422 \n",
1075 | "TotalWorkingYears 0.680381 0.014515 0.004628 0.148280 \n",
1076 | "TrainingTimesLastYear -0.019621 0.002453 -0.036942 -0.025100 \n",
1077 | "WorkLifeBalance -0.021490 -0.037848 -0.026556 0.009819 \n",
1078 | "YearsAtCompany 0.311309 -0.034055 0.009508 0.069114 \n",
1079 | "YearsInCurrentRole 0.212901 0.009932 0.018845 0.060236 \n",
1080 | "YearsSinceLastPromotion 0.216513 -0.033229 0.010029 0.054254 \n",
1081 | "YearsWithCurrManager 0.202089 -0.026363 0.014406 0.069065 \n",
1082 | "\n",
1083 | " EmployeeCount EmployeeNumber \\\n",
1084 | "Age NaN -0.010145 \n",
1085 | "DailyRate NaN -0.050990 \n",
1086 | "DistanceFromHome NaN 0.032916 \n",
1087 | "Education NaN 0.042070 \n",
1088 | "EmployeeCount NaN NaN \n",
1089 | "EmployeeNumber NaN 1.000000 \n",
1090 | "EnvironmentSatisfaction NaN 0.017621 \n",
1091 | "HourlyRate NaN 0.035179 \n",
1092 | "JobInvolvement NaN -0.006888 \n",
1093 | "JobLevel NaN -0.018519 \n",
1094 | "JobSatisfaction NaN -0.046247 \n",
1095 | "MonthlyIncome NaN -0.014829 \n",
1096 | "MonthlyRate NaN 0.012648 \n",
1097 | "NumCompaniesWorked NaN -0.001251 \n",
1098 | "PercentSalaryHike NaN -0.012944 \n",
1099 | "PerformanceRating NaN -0.020359 \n",
1100 | "RelationshipSatisfaction NaN -0.069861 \n",
1101 | "StandardHours NaN NaN \n",
1102 | "StockOptionLevel NaN 0.062227 \n",
1103 | "TotalWorkingYears NaN -0.014365 \n",
1104 | "TrainingTimesLastYear NaN 0.023603 \n",
1105 | "WorkLifeBalance NaN 0.010309 \n",
1106 | "YearsAtCompany NaN -0.011240 \n",
1107 | "YearsInCurrentRole NaN -0.008416 \n",
1108 | "YearsSinceLastPromotion NaN -0.009019 \n",
1109 | "YearsWithCurrManager NaN -0.009197 \n",
1110 | "\n",
1111 | " EnvironmentSatisfaction HourlyRate JobInvolvement \\\n",
1112 | "Age 0.010146 0.024287 0.029820 \n",
1113 | "DailyRate 0.018355 0.023381 0.046135 \n",
1114 | "DistanceFromHome -0.016075 0.031131 0.008783 \n",
1115 | "Education -0.027128 0.016775 0.042438 \n",
1116 | "EmployeeCount NaN NaN NaN \n",
1117 | "EmployeeNumber 0.017621 0.035179 -0.006888 \n",
1118 | "EnvironmentSatisfaction 1.000000 -0.049857 -0.008278 \n",
1119 | "HourlyRate -0.049857 1.000000 0.042861 \n",
1120 | "JobInvolvement -0.008278 0.042861 1.000000 \n",
1121 | "JobLevel 0.001212 -0.027853 -0.012630 \n",
1122 | "JobSatisfaction -0.006784 -0.071335 -0.021476 \n",
1123 | "MonthlyIncome -0.006259 -0.015794 -0.015271 \n",
1124 | "MonthlyRate 0.037600 -0.015297 -0.016322 \n",
1125 | "NumCompaniesWorked 0.012594 0.022157 0.015012 \n",
1126 | "PercentSalaryHike -0.031701 -0.009062 -0.017205 \n",
1127 | "PerformanceRating -0.029548 -0.002172 -0.029071 \n",
1128 | "RelationshipSatisfaction 0.007665 0.001330 0.034297 \n",
1129 | "StandardHours NaN NaN NaN \n",
1130 | "StockOptionLevel 0.003432 0.050263 0.021523 \n",
1131 | "TotalWorkingYears -0.002693 -0.002334 -0.005533 \n",
1132 | "TrainingTimesLastYear -0.019359 -0.008548 -0.015338 \n",
1133 | "WorkLifeBalance 0.027627 -0.004607 -0.014617 \n",
1134 | "YearsAtCompany 0.001458 -0.019582 -0.021355 \n",
1135 | "YearsInCurrentRole 0.018007 -0.024106 0.008717 \n",
1136 | "YearsSinceLastPromotion 0.016194 -0.026716 -0.024184 \n",
1137 | "YearsWithCurrManager -0.004999 -0.020123 0.025976 \n",
1138 | "\n",
1139 | " JobLevel ... RelationshipSatisfaction \\\n",
1140 | "Age 0.509604 ... 0.053535 \n",
1141 | "DailyRate 0.002966 ... 0.007846 \n",
1142 | "DistanceFromHome 0.005303 ... 0.006557 \n",
1143 | "Education 0.101589 ... -0.009118 \n",
1144 | "EmployeeCount NaN ... NaN \n",
1145 | "EmployeeNumber -0.018519 ... -0.069861 \n",
1146 | "EnvironmentSatisfaction 0.001212 ... 0.007665 \n",
1147 | "HourlyRate -0.027853 ... 0.001330 \n",
1148 | "JobInvolvement -0.012630 ... 0.034297 \n",
1149 | "JobLevel 1.000000 ... 0.021642 \n",
1150 | "JobSatisfaction -0.001944 ... -0.012454 \n",
1151 | "MonthlyIncome 0.950300 ... 0.025873 \n",
1152 | "MonthlyRate 0.039563 ... -0.004085 \n",
1153 | "NumCompaniesWorked 0.142501 ... 0.052733 \n",
1154 | "PercentSalaryHike -0.034730 ... -0.040490 \n",
1155 | "PerformanceRating -0.021222 ... -0.031351 \n",
1156 | "RelationshipSatisfaction 0.021642 ... 1.000000 \n",
1157 | "StandardHours NaN ... NaN \n",
1158 | "StockOptionLevel 0.013984 ... -0.045952 \n",
1159 | "TotalWorkingYears 0.782208 ... 0.024054 \n",
1160 | "TrainingTimesLastYear -0.018191 ... 0.002497 \n",
1161 | "WorkLifeBalance 0.037818 ... 0.019604 \n",
1162 | "YearsAtCompany 0.534739 ... 0.019367 \n",
1163 | "YearsInCurrentRole 0.389447 ... -0.015123 \n",
1164 | "YearsSinceLastPromotion 0.353885 ... 0.033493 \n",
1165 | "YearsWithCurrManager 0.375281 ... -0.000867 \n",
1166 | "\n",
1167 | " StandardHours StockOptionLevel TotalWorkingYears \\\n",
1168 | "Age NaN 0.037510 0.680381 \n",
1169 | "DailyRate NaN 0.042143 0.014515 \n",
1170 | "DistanceFromHome NaN 0.044872 0.004628 \n",
1171 | "Education NaN 0.018422 0.148280 \n",
1172 | "EmployeeCount NaN NaN NaN \n",
1173 | "EmployeeNumber NaN 0.062227 -0.014365 \n",
1174 | "EnvironmentSatisfaction NaN 0.003432 -0.002693 \n",
1175 | "HourlyRate NaN 0.050263 -0.002334 \n",
1176 | "JobInvolvement NaN 0.021523 -0.005533 \n",
1177 | "JobLevel NaN 0.013984 0.782208 \n",
1178 | "JobSatisfaction NaN 0.010690 -0.020185 \n",
1179 | "MonthlyIncome NaN 0.005408 0.772893 \n",
1180 | "MonthlyRate NaN -0.034323 0.026442 \n",
1181 | "NumCompaniesWorked NaN 0.030075 0.237639 \n",
1182 | "PercentSalaryHike NaN 0.007528 -0.020608 \n",
1183 | "PerformanceRating NaN 0.003506 0.006744 \n",
1184 | "RelationshipSatisfaction NaN -0.045952 0.024054 \n",
1185 | "StandardHours NaN NaN NaN \n",
1186 | "StockOptionLevel NaN 1.000000 0.010136 \n",
1187 | "TotalWorkingYears NaN 0.010136 1.000000 \n",
1188 | "TrainingTimesLastYear NaN 0.011274 -0.035662 \n",
1189 | "WorkLifeBalance NaN 0.004129 0.001008 \n",
1190 | "YearsAtCompany NaN 0.015058 0.628133 \n",
1191 | "YearsInCurrentRole NaN 0.050818 0.460365 \n",
1192 | "YearsSinceLastPromotion NaN 0.014352 0.404858 \n",
1193 | "YearsWithCurrManager NaN 0.024698 0.459188 \n",
1194 | "\n",
1195 | " TrainingTimesLastYear WorkLifeBalance \\\n",
1196 | "Age -0.019621 -0.021490 \n",
1197 | "DailyRate 0.002453 -0.037848 \n",
1198 | "DistanceFromHome -0.036942 -0.026556 \n",
1199 | "Education -0.025100 0.009819 \n",
1200 | "EmployeeCount NaN NaN \n",
1201 | "EmployeeNumber 0.023603 0.010309 \n",
1202 | "EnvironmentSatisfaction -0.019359 0.027627 \n",
1203 | "HourlyRate -0.008548 -0.004607 \n",
1204 | "JobInvolvement -0.015338 -0.014617 \n",
1205 | "JobLevel -0.018191 0.037818 \n",
1206 | "JobSatisfaction -0.005779 -0.019459 \n",
1207 | "MonthlyIncome -0.021736 0.030683 \n",
1208 | "MonthlyRate 0.001467 0.007963 \n",
1209 | "NumCompaniesWorked -0.066054 -0.008366 \n",
1210 | "PercentSalaryHike -0.005221 -0.003280 \n",
1211 | "PerformanceRating -0.015579 0.002572 \n",
1212 | "RelationshipSatisfaction 0.002497 0.019604 \n",
1213 | "StandardHours NaN NaN \n",
1214 | "StockOptionLevel 0.011274 0.004129 \n",
1215 | "TotalWorkingYears -0.035662 0.001008 \n",
1216 | "TrainingTimesLastYear 1.000000 0.028072 \n",
1217 | "WorkLifeBalance 0.028072 1.000000 \n",
1218 | "YearsAtCompany 0.003569 0.012089 \n",
1219 | "YearsInCurrentRole -0.005738 0.049856 \n",
1220 | "YearsSinceLastPromotion -0.002067 0.008941 \n",
1221 | "YearsWithCurrManager -0.004096 0.002759 \n",
1222 | "\n",
1223 | " YearsAtCompany YearsInCurrentRole \\\n",
1224 | "Age 0.311309 0.212901 \n",
1225 | "DailyRate -0.034055 0.009932 \n",
1226 | "DistanceFromHome 0.009508 0.018845 \n",
1227 | "Education 0.069114 0.060236 \n",
1228 | "EmployeeCount NaN NaN \n",
1229 | "EmployeeNumber -0.011240 -0.008416 \n",
1230 | "EnvironmentSatisfaction 0.001458 0.018007 \n",
1231 | "HourlyRate -0.019582 -0.024106 \n",
1232 | "JobInvolvement -0.021355 0.008717 \n",
1233 | "JobLevel 0.534739 0.389447 \n",
1234 | "JobSatisfaction -0.003803 -0.002305 \n",
1235 | "MonthlyIncome 0.514285 0.363818 \n",
1236 | "MonthlyRate -0.023655 -0.012815 \n",
1237 | "NumCompaniesWorked -0.118421 -0.090754 \n",
1238 | "PercentSalaryHike -0.035991 -0.001520 \n",
1239 | "PerformanceRating 0.003435 0.034986 \n",
1240 | "RelationshipSatisfaction 0.019367 -0.015123 \n",
1241 | "StandardHours NaN NaN \n",
1242 | "StockOptionLevel 0.015058 0.050818 \n",
1243 | "TotalWorkingYears 0.628133 0.460365 \n",
1244 | "TrainingTimesLastYear 0.003569 -0.005738 \n",
1245 | "WorkLifeBalance 0.012089 0.049856 \n",
1246 | "YearsAtCompany 1.000000 0.758754 \n",
1247 | "YearsInCurrentRole 0.758754 1.000000 \n",
1248 | "YearsSinceLastPromotion 0.618409 0.548056 \n",
1249 | "YearsWithCurrManager 0.769212 0.714365 \n",
1250 | "\n",
1251 | " YearsSinceLastPromotion YearsWithCurrManager \n",
1252 | "Age 0.216513 0.202089 \n",
1253 | "DailyRate -0.033229 -0.026363 \n",
1254 | "DistanceFromHome 0.010029 0.014406 \n",
1255 | "Education 0.054254 0.069065 \n",
1256 | "EmployeeCount NaN NaN \n",
1257 | "EmployeeNumber -0.009019 -0.009197 \n",
1258 | "EnvironmentSatisfaction 0.016194 -0.004999 \n",
1259 | "HourlyRate -0.026716 -0.020123 \n",
1260 | "JobInvolvement -0.024184 0.025976 \n",
1261 | "JobLevel 0.353885 0.375281 \n",
1262 | "JobSatisfaction -0.018214 -0.027656 \n",
1263 | "MonthlyIncome 0.344978 0.344079 \n",
1264 | "MonthlyRate 0.001567 -0.036746 \n",
1265 | "NumCompaniesWorked -0.036814 -0.110319 \n",
1266 | "PercentSalaryHike -0.022154 -0.011985 \n",
1267 | "PerformanceRating 0.017896 0.022827 \n",
1268 | "RelationshipSatisfaction 0.033493 -0.000867 \n",
1269 | "StandardHours NaN NaN \n",
1270 | "StockOptionLevel 0.014352 0.024698 \n",
1271 | "TotalWorkingYears 0.404858 0.459188 \n",
1272 | "TrainingTimesLastYear -0.002067 -0.004096 \n",
1273 | "WorkLifeBalance 0.008941 0.002759 \n",
1274 | "YearsAtCompany 0.618409 0.769212 \n",
1275 | "YearsInCurrentRole 0.548056 0.714365 \n",
1276 | "YearsSinceLastPromotion 1.000000 0.510224 \n",
1277 | "YearsWithCurrManager 0.510224 1.000000 \n",
1278 | "\n",
1279 | "[26 rows x 26 columns]"
1280 | ]
1281 | },
1282 | "execution_count": 6,
1283 | "metadata": {},
1284 | "output_type": "execute_result"
1285 | }
1286 | ],
1287 | "source": [
1288 | "attrition.corr()"
1289 | ]
1290 | },
1291 | {
1292 | "cell_type": "markdown",
1293 | "metadata": {
1294 | "_cell_guid": "112cef65-78b8-7790-e705-b173beea6986"
1295 | },
1296 | "source": [
1297 | "# Feature Engineering & Categorical Encoding\n",
1298 | "\n",
1299 | "Task of Feature engineering and numerically encoding the categorical values in our dataset."
1300 | ]
1301 | },
1302 | {
1303 | "cell_type": "code",
1304 | "execution_count": 7,
1305 | "metadata": {},
1306 | "outputs": [],
1307 | "source": [
1308 | "# attrition.shape"
1309 | ]
1310 | },
1311 | {
1312 | "cell_type": "code",
1313 | "execution_count": 8,
1314 | "metadata": {},
1315 | "outputs": [
1316 | {
1317 | "data": {
1318 | "text/plain": [
1319 | "Age int64\n",
1320 | "Attrition object\n",
1321 | "BusinessTravel object\n",
1322 | "DailyRate int64\n",
1323 | "Department object\n",
1324 | "DistanceFromHome int64\n",
1325 | "Education int64\n",
1326 | "EducationField object\n",
1327 | "EmployeeCount int64\n",
1328 | "EmployeeNumber int64\n",
1329 | "EnvironmentSatisfaction int64\n",
1330 | "Gender object\n",
1331 | "HourlyRate int64\n",
1332 | "JobInvolvement int64\n",
1333 | "JobLevel int64\n",
1334 | "JobRole object\n",
1335 | "JobSatisfaction int64\n",
1336 | "MaritalStatus object\n",
1337 | "MonthlyIncome int64\n",
1338 | "MonthlyRate int64\n",
1339 | "NumCompaniesWorked int64\n",
1340 | "Over18 object\n",
1341 | "OverTime object\n",
1342 | "PercentSalaryHike int64\n",
1343 | "PerformanceRating int64\n",
1344 | "RelationshipSatisfaction int64\n",
1345 | "StandardHours int64\n",
1346 | "StockOptionLevel int64\n",
1347 | "TotalWorkingYears int64\n",
1348 | "TrainingTimesLastYear int64\n",
1349 | "WorkLifeBalance int64\n",
1350 | "YearsAtCompany int64\n",
1351 | "YearsInCurrentRole int64\n",
1352 | "YearsSinceLastPromotion int64\n",
1353 | "YearsWithCurrManager int64\n",
1354 | "dtype: object"
1355 | ]
1356 | },
1357 | "execution_count": 8,
1358 | "metadata": {},
1359 | "output_type": "execute_result"
1360 | }
1361 | ],
1362 | "source": [
1363 | "attrition.dtypes"
1364 | ]
1365 | },
1366 | {
1367 | "cell_type": "code",
1368 | "execution_count": 9,
1369 | "metadata": {
1370 | "_cell_guid": "937385c7-7b7f-f6d0-d974-0527a7118e98"
1371 | },
1372 | "outputs": [],
1373 | "source": [
1374 | "# Empty list to store columns with categorical data\n",
1375 | "categorical = []\n",
1376 | "for col, value in attrition.iteritems():\n",
1377 | " if value.dtype == 'object':\n",
1378 | " categorical.append(col)\n",
1379 | "\n",
1380 | "# Store the numerical columns in a list numerical\n",
1381 | "numerical = attrition.columns.difference(categorical)"
1382 | ]
1383 | },
1384 | {
1385 | "cell_type": "code",
1386 | "execution_count": 10,
1387 | "metadata": {},
1388 | "outputs": [
1389 | {
1390 | "data": {
1391 | "text/plain": [
1392 | "Index(['Age', 'DailyRate', 'DistanceFromHome', 'Education', 'EmployeeCount',\n",
1393 | " 'EmployeeNumber', 'EnvironmentSatisfaction', 'HourlyRate',\n",
1394 | " 'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome',\n",
1395 | " 'MonthlyRate', 'NumCompaniesWorked', 'PercentSalaryHike',\n",
1396 | " 'PerformanceRating', 'RelationshipSatisfaction', 'StandardHours',\n",
1397 | " 'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear',\n",
1398 | " 'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole',\n",
1399 | " 'YearsSinceLastPromotion', 'YearsWithCurrManager'],\n",
1400 | " dtype='object')"
1401 | ]
1402 | },
1403 | "execution_count": 10,
1404 | "metadata": {},
1405 | "output_type": "execute_result"
1406 | }
1407 | ],
1408 | "source": [
1409 | "numerical"
1410 | ]
1411 | },
1412 | {
1413 | "cell_type": "code",
1414 | "execution_count": 11,
1415 | "metadata": {},
1416 | "outputs": [
1417 | {
1418 | "data": {
1419 | "text/plain": [
1420 | "['Attrition',\n",
1421 | " 'BusinessTravel',\n",
1422 | " 'Department',\n",
1423 | " 'EducationField',\n",
1424 | " 'Gender',\n",
1425 | " 'JobRole',\n",
1426 | " 'MaritalStatus',\n",
1427 | " 'Over18',\n",
1428 | " 'OverTime']"
1429 | ]
1430 | },
1431 | "execution_count": 11,
1432 | "metadata": {},
1433 | "output_type": "execute_result"
1434 | }
1435 | ],
1436 | "source": [
1437 | "categorical"
1438 | ]
1439 | },
1440 | {
1441 | "cell_type": "code",
1442 | "execution_count": 12,
1443 | "metadata": {
1444 | "_cell_guid": "5ec5cd49-f8b3-e36b-75dd-ac95fe0373ac"
1445 | },
1446 | "outputs": [],
1447 | "source": [
1448 | "# Store the categorical data in a dataframe called attrition_cat\n",
1449 | "attrition_cat = attrition[categorical]\n",
1450 | "attrition_cat = attrition_cat.drop(['Attrition'], axis=1) # Dropping the target column"
1451 | ]
1452 | },
1453 | {
1454 | "cell_type": "code",
1455 | "execution_count": 13,
1456 | "metadata": {},
1457 | "outputs": [
1458 | {
1459 | "data": {
1460 | "text/html": [
1461 | "\n",
1462 | "\n",
1475 | "
\n",
1476 | " \n",
1477 | " \n",
1478 | " | \n",
1479 | " BusinessTravel | \n",
1480 | " Department | \n",
1481 | " EducationField | \n",
1482 | " Gender | \n",
1483 | " JobRole | \n",
1484 | " MaritalStatus | \n",
1485 | " Over18 | \n",
1486 | " OverTime | \n",
1487 | "
\n",
1488 | " \n",
1489 | " \n",
1490 | " \n",
1491 | " | 0 | \n",
1492 | " Travel_Rarely | \n",
1493 | " Sales | \n",
1494 | " Life Sciences | \n",
1495 | " Female | \n",
1496 | " Sales Executive | \n",
1497 | " Single | \n",
1498 | " Y | \n",
1499 | " Yes | \n",
1500 | "
\n",
1501 | " \n",
1502 | " | 1 | \n",
1503 | " Travel_Frequently | \n",
1504 | " Research & Development | \n",
1505 | " Life Sciences | \n",
1506 | " Male | \n",
1507 | " Research Scientist | \n",
1508 | " Married | \n",
1509 | " Y | \n",
1510 | " No | \n",
1511 | "
\n",
1512 | " \n",
1513 | " | 2 | \n",
1514 | " Travel_Rarely | \n",
1515 | " Research & Development | \n",
1516 | " Other | \n",
1517 | " Male | \n",
1518 | " Laboratory Technician | \n",
1519 | " Single | \n",
1520 | " Y | \n",
1521 | " Yes | \n",
1522 | "
\n",
1523 | " \n",
1524 | " | 3 | \n",
1525 | " Travel_Frequently | \n",
1526 | " Research & Development | \n",
1527 | " Life Sciences | \n",
1528 | " Female | \n",
1529 | " Research Scientist | \n",
1530 | " Married | \n",
1531 | " Y | \n",
1532 | " Yes | \n",
1533 | "
\n",
1534 | " \n",
1535 | " | 4 | \n",
1536 | " Travel_Rarely | \n",
1537 | " Research & Development | \n",
1538 | " Medical | \n",
1539 | " Male | \n",
1540 | " Laboratory Technician | \n",
1541 | " Married | \n",
1542 | " Y | \n",
1543 | " No | \n",
1544 | "
\n",
1545 | " \n",
1546 | " | ... | \n",
1547 | " ... | \n",
1548 | " ... | \n",
1549 | " ... | \n",
1550 | " ... | \n",
1551 | " ... | \n",
1552 | " ... | \n",
1553 | " ... | \n",
1554 | " ... | \n",
1555 | "
\n",
1556 | " \n",
1557 | " | 1465 | \n",
1558 | " Travel_Frequently | \n",
1559 | " Research & Development | \n",
1560 | " Medical | \n",
1561 | " Male | \n",
1562 | " Laboratory Technician | \n",
1563 | " Married | \n",
1564 | " Y | \n",
1565 | " No | \n",
1566 | "
\n",
1567 | " \n",
1568 | " | 1466 | \n",
1569 | " Travel_Rarely | \n",
1570 | " Research & Development | \n",
1571 | " Medical | \n",
1572 | " Male | \n",
1573 | " Healthcare Representative | \n",
1574 | " Married | \n",
1575 | " Y | \n",
1576 | " No | \n",
1577 | "
\n",
1578 | " \n",
1579 | " | 1467 | \n",
1580 | " Travel_Rarely | \n",
1581 | " Research & Development | \n",
1582 | " Life Sciences | \n",
1583 | " Male | \n",
1584 | " Manufacturing Director | \n",
1585 | " Married | \n",
1586 | " Y | \n",
1587 | " Yes | \n",
1588 | "
\n",
1589 | " \n",
1590 | " | 1468 | \n",
1591 | " Travel_Frequently | \n",
1592 | " Sales | \n",
1593 | " Medical | \n",
1594 | " Male | \n",
1595 | " Sales Executive | \n",
1596 | " Married | \n",
1597 | " Y | \n",
1598 | " No | \n",
1599 | "
\n",
1600 | " \n",
1601 | " | 1469 | \n",
1602 | " Travel_Rarely | \n",
1603 | " Research & Development | \n",
1604 | " Medical | \n",
1605 | " Male | \n",
1606 | " Laboratory Technician | \n",
1607 | " Married | \n",
1608 | " Y | \n",
1609 | " No | \n",
1610 | "
\n",
1611 | " \n",
1612 | "
\n",
1613 | "
1470 rows × 8 columns
\n",
1614 | "
"
1615 | ],
1616 | "text/plain": [
1617 | " BusinessTravel Department EducationField Gender \\\n",
1618 | "0 Travel_Rarely Sales Life Sciences Female \n",
1619 | "1 Travel_Frequently Research & Development Life Sciences Male \n",
1620 | "2 Travel_Rarely Research & Development Other Male \n",
1621 | "3 Travel_Frequently Research & Development Life Sciences Female \n",
1622 | "4 Travel_Rarely Research & Development Medical Male \n",
1623 | "... ... ... ... ... \n",
1624 | "1465 Travel_Frequently Research & Development Medical Male \n",
1625 | "1466 Travel_Rarely Research & Development Medical Male \n",
1626 | "1467 Travel_Rarely Research & Development Life Sciences Male \n",
1627 | "1468 Travel_Frequently Sales Medical Male \n",
1628 | "1469 Travel_Rarely Research & Development Medical Male \n",
1629 | "\n",
1630 | " JobRole MaritalStatus Over18 OverTime \n",
1631 | "0 Sales Executive Single Y Yes \n",
1632 | "1 Research Scientist Married Y No \n",
1633 | "2 Laboratory Technician Single Y Yes \n",
1634 | "3 Research Scientist Married Y Yes \n",
1635 | "4 Laboratory Technician Married Y No \n",
1636 | "... ... ... ... ... \n",
1637 | "1465 Laboratory Technician Married Y No \n",
1638 | "1466 Healthcare Representative Married Y No \n",
1639 | "1467 Manufacturing Director Married Y Yes \n",
1640 | "1468 Sales Executive Married Y No \n",
1641 | "1469 Laboratory Technician Married Y No \n",
1642 | "\n",
1643 | "[1470 rows x 8 columns]"
1644 | ]
1645 | },
1646 | "execution_count": 13,
1647 | "metadata": {},
1648 | "output_type": "execute_result"
1649 | }
1650 | ],
1651 | "source": [
1652 | "attrition_cat"
1653 | ]
1654 | },
1655 | {
1656 | "cell_type": "markdown",
1657 | "metadata": {
1658 | "_cell_guid": "7c3c0c95-3725-80dd-0a73-5c840451a438"
1659 | },
1660 | "source": [
1661 | "Applying the **get_dummies** method"
1662 | ]
1663 | },
1664 | {
1665 | "cell_type": "code",
1666 | "execution_count": 14,
1667 | "metadata": {},
1668 | "outputs": [],
1669 | "source": [
1670 | "# How can you convert categorial or string or object data into Numerical Format ?\n",
1671 | "\n",
1672 | "# Process of converting your cat data into numerical format - Encoding process \n",
1673 | "\n",
1674 | "# Encoding (15 More )\n",
1675 | "\n",
1676 | "# Label Encoding \n",
1677 | "\n",
1678 | "# One Hot Encoding ( OHE)\n",
1679 | "\n",
1680 | "# Cat_A \n",
1681 | "\n",
1682 | "# Male\n",
1683 | "#Female \n",
1684 | "#Male\n",
1685 | "#Female\n",
1686 | "# Prefer_not_to_say\n",
1687 | "# Male \n",
1688 | "\n",
1689 | "# OHE \n",
1690 | "\n",
1691 | " # Cat_A_Male #Cat_A_Female #Cat_A_Prefer_not_to_say\n",
1692 | "#1# Male 1 0 0 \n",
1693 | "#2#Female 0 1 0\n",
1694 | "#3#Male 1 0 0\n",
1695 | "#4#Female 0 1 0\n",
1696 | "#5# Prefer_not_to_say 0 0 1\n",
1697 | "#6# Male \n",
1698 | "\n",
1699 | "\n",
1700 | "\n",
1701 | "# Label Encoding \n",
1702 | "\n",
1703 | "# Cat_A \n",
1704 | "\n",
1705 | "# Male 2 \n",
1706 | "#Female 1\n",
1707 | "#Male 2\n",
1708 | "#Female 1\n",
1709 | "# Prefer_not_to_say 3\n",
1710 | "# Male 2\n",
1711 | "\n",
1712 | "# Target Encoding \n",
1713 | "# Mean Encoding \n",
1714 | "\n",
1715 | "\n",
1716 | "\n",
1717 | "\n",
1718 | "\n",
1719 | "\n",
1720 | "\n",
1721 | "\n",
1722 | "\n",
1723 | "\n",
1724 | "\n",
1725 | "\n",
1726 | "\n",
1727 | "\n",
1728 | "\n",
1729 | "\n",
1730 | "\n",
1731 | "\n",
1732 | "\n",
1733 | "\n",
1734 | "\n",
1735 | "\n",
1736 | "\n",
1737 | "\n",
1738 | "\n",
1739 | "\n",
1740 | "\n",
1741 | "\n",
1742 | "\n"
1743 | ]
1744 | },
1745 | {
1746 | "cell_type": "code",
1747 | "execution_count": 15,
1748 | "metadata": {},
1749 | "outputs": [
1750 | {
1751 | "data": {
1752 | "text/plain": [
1753 | "array([2, 2, 1, 0], dtype=int64)"
1754 | ]
1755 | },
1756 | "execution_count": 15,
1757 | "metadata": {},
1758 | "output_type": "execute_result"
1759 | }
1760 | ],
1761 | "source": [
1762 | "# Filter your object datatypes \n",
1763 | "\n",
1764 | "from sklearn import preprocessing\n",
1765 | "\n",
1766 | "le = preprocessing.LabelEncoder()\n",
1767 | "le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\n",
1768 | "le.transform([\"tokyo\", \"tokyo\", \"paris\",\"amsterdam\"])\n",
1769 | "\n",
1770 | "# list(le.classes_)\n",
1771 | "\n",
1772 | "\n",
1773 | "#0 ,1,2"
1774 | ]
1775 | },
1776 | {
1777 | "cell_type": "code",
1778 | "execution_count": null,
1779 | "metadata": {},
1780 | "outputs": [],
1781 | "source": []
1782 | },
1783 | {
1784 | "cell_type": "code",
1785 | "execution_count": 16,
1786 | "metadata": {
1787 | "_cell_guid": "7ea5b0d8-1f13-e56b-72cf-bcbe7dd6fad2"
1788 | },
1789 | "outputs": [
1790 | {
1791 | "data": {
1792 | "text/html": [
1793 | "\n",
1794 | "\n",
1807 | "
\n",
1808 | " \n",
1809 | " \n",
1810 | " | \n",
1811 | " BusinessTravel_Non-Travel | \n",
1812 | " BusinessTravel_Travel_Frequently | \n",
1813 | " BusinessTravel_Travel_Rarely | \n",
1814 | " Department_Human Resources | \n",
1815 | " Department_Research & Development | \n",
1816 | " Department_Sales | \n",
1817 | " EducationField_Human Resources | \n",
1818 | " EducationField_Life Sciences | \n",
1819 | " EducationField_Marketing | \n",
1820 | " EducationField_Medical | \n",
1821 | " ... | \n",
1822 | " JobRole_Research Director | \n",
1823 | " JobRole_Research Scientist | \n",
1824 | " JobRole_Sales Executive | \n",
1825 | " JobRole_Sales Representative | \n",
1826 | " MaritalStatus_Divorced | \n",
1827 | " MaritalStatus_Married | \n",
1828 | " MaritalStatus_Single | \n",
1829 | " Over18_Y | \n",
1830 | " OverTime_No | \n",
1831 | " OverTime_Yes | \n",
1832 | "
\n",
1833 | " \n",
1834 | " \n",
1835 | " \n",
1836 | " | 0 | \n",
1837 | " 0 | \n",
1838 | " 0 | \n",
1839 | " 1 | \n",
1840 | " 0 | \n",
1841 | " 0 | \n",
1842 | " 1 | \n",
1843 | " 0 | \n",
1844 | " 1 | \n",
1845 | " 0 | \n",
1846 | " 0 | \n",
1847 | " ... | \n",
1848 | " 0 | \n",
1849 | " 0 | \n",
1850 | " 1 | \n",
1851 | " 0 | \n",
1852 | " 0 | \n",
1853 | " 0 | \n",
1854 | " 1 | \n",
1855 | " 1 | \n",
1856 | " 0 | \n",
1857 | " 1 | \n",
1858 | "
\n",
1859 | " \n",
1860 | " | 1 | \n",
1861 | " 0 | \n",
1862 | " 1 | \n",
1863 | " 0 | \n",
1864 | " 0 | \n",
1865 | " 1 | \n",
1866 | " 0 | \n",
1867 | " 0 | \n",
1868 | " 1 | \n",
1869 | " 0 | \n",
1870 | " 0 | \n",
1871 | " ... | \n",
1872 | " 0 | \n",
1873 | " 1 | \n",
1874 | " 0 | \n",
1875 | " 0 | \n",
1876 | " 0 | \n",
1877 | " 1 | \n",
1878 | " 0 | \n",
1879 | " 1 | \n",
1880 | " 1 | \n",
1881 | " 0 | \n",
1882 | "
\n",
1883 | " \n",
1884 | " | 2 | \n",
1885 | " 0 | \n",
1886 | " 0 | \n",
1887 | " 1 | \n",
1888 | " 0 | \n",
1889 | " 1 | \n",
1890 | " 0 | \n",
1891 | " 0 | \n",
1892 | " 0 | \n",
1893 | " 0 | \n",
1894 | " 0 | \n",
1895 | " ... | \n",
1896 | " 0 | \n",
1897 | " 0 | \n",
1898 | " 0 | \n",
1899 | " 0 | \n",
1900 | " 0 | \n",
1901 | " 0 | \n",
1902 | " 1 | \n",
1903 | " 1 | \n",
1904 | " 0 | \n",
1905 | " 1 | \n",
1906 | "
\n",
1907 | " \n",
1908 | "
\n",
1909 | "
3 rows × 29 columns
\n",
1910 | "
"
1911 | ],
1912 | "text/plain": [
1913 | " BusinessTravel_Non-Travel BusinessTravel_Travel_Frequently \\\n",
1914 | "0 0 0 \n",
1915 | "1 0 1 \n",
1916 | "2 0 0 \n",
1917 | "\n",
1918 | " BusinessTravel_Travel_Rarely Department_Human Resources \\\n",
1919 | "0 1 0 \n",
1920 | "1 0 0 \n",
1921 | "2 1 0 \n",
1922 | "\n",
1923 | " Department_Research & Development Department_Sales \\\n",
1924 | "0 0 1 \n",
1925 | "1 1 0 \n",
1926 | "2 1 0 \n",
1927 | "\n",
1928 | " EducationField_Human Resources EducationField_Life Sciences \\\n",
1929 | "0 0 1 \n",
1930 | "1 0 1 \n",
1931 | "2 0 0 \n",
1932 | "\n",
1933 | " EducationField_Marketing EducationField_Medical ... \\\n",
1934 | "0 0 0 ... \n",
1935 | "1 0 0 ... \n",
1936 | "2 0 0 ... \n",
1937 | "\n",
1938 | " JobRole_Research Director JobRole_Research Scientist \\\n",
1939 | "0 0 0 \n",
1940 | "1 0 1 \n",
1941 | "2 0 0 \n",
1942 | "\n",
1943 | " JobRole_Sales Executive JobRole_Sales Representative \\\n",
1944 | "0 1 0 \n",
1945 | "1 0 0 \n",
1946 | "2 0 0 \n",
1947 | "\n",
1948 | " MaritalStatus_Divorced MaritalStatus_Married MaritalStatus_Single \\\n",
1949 | "0 0 0 1 \n",
1950 | "1 0 1 0 \n",
1951 | "2 0 0 1 \n",
1952 | "\n",
1953 | " Over18_Y OverTime_No OverTime_Yes \n",
1954 | "0 1 0 1 \n",
1955 | "1 1 1 0 \n",
1956 | "2 1 0 1 \n",
1957 | "\n",
1958 | "[3 rows x 29 columns]"
1959 | ]
1960 | },
1961 | "execution_count": 16,
1962 | "metadata": {},
1963 | "output_type": "execute_result"
1964 | }
1965 | ],
1966 | "source": [
1967 | "attrition_cat = pd.get_dummies(attrition_cat)\n",
1968 | "attrition_cat.head(3)"
1969 | ]
1970 | },
1971 | {
1972 | "cell_type": "code",
1973 | "execution_count": 17,
1974 | "metadata": {
1975 | "_cell_guid": "de8b3a57-6aba-eae7-2be3-dbe0ae761d6a"
1976 | },
1977 | "outputs": [],
1978 | "source": [
1979 | "# Store the numerical features to a dataframe attrition_num\n",
1980 | "attrition_num = attrition[numerical]"
1981 | ]
1982 | },
1983 | {
1984 | "cell_type": "markdown",
1985 | "metadata": {
1986 | "_cell_guid": "9de23a93-10b6-33b8-eea8-0cf44c6e5e08"
1987 | },
1988 | "source": [
1989 | "let's concat numerical and caterogial dfs"
1990 | ]
1991 | },
1992 | {
1993 | "cell_type": "code",
1994 | "execution_count": 18,
1995 | "metadata": {
1996 | "_cell_guid": "b90b69ba-f19d-0707-7c2c-183b8d01130f"
1997 | },
1998 | "outputs": [],
1999 | "source": [
2000 | "# Concat the two dataframes together columnwise\n",
2001 | "attrition_final = pd.concat([attrition_num, attrition_cat], axis=1)"
2002 | ]
2003 | },
2004 | {
2005 | "cell_type": "code",
2006 | "execution_count": 19,
2007 | "metadata": {},
2008 | "outputs": [
2009 | {
2010 | "data": {
2011 | "text/plain": [
2012 | "(1470, 55)"
2013 | ]
2014 | },
2015 | "execution_count": 19,
2016 | "metadata": {},
2017 | "output_type": "execute_result"
2018 | }
2019 | ],
2020 | "source": [
2021 | "attrition_final.shape"
2022 | ]
2023 | },
2024 | {
2025 | "cell_type": "markdown",
2026 | "metadata": {
2027 | "_cell_guid": "1a295568-fab4-b79a-bc0d-be32ad032b3e"
2028 | },
2029 | "source": [
2030 | "**Target variable**\n",
2031 | "\n",
2032 | "The target in this case is given by the column **Attrition** which contains categorical variables therefore requires numerical encoding. We numerically encode it by creating a dictionary with the mapping given as 1 : Yes and 0 : No"
2033 | ]
2034 | },
2035 | {
2036 | "cell_type": "code",
2037 | "execution_count": 20,
2038 | "metadata": {
2039 | "_cell_guid": "bfa5e82f-2dd3-1bee-5b2b-367468be7040"
2040 | },
2041 | "outputs": [
2042 | {
2043 | "data": {
2044 | "text/plain": [
2045 | "0 1\n",
2046 | "1 0\n",
2047 | "2 1\n",
2048 | "Name: Attrition, dtype: int64"
2049 | ]
2050 | },
2051 | "execution_count": 20,
2052 | "metadata": {},
2053 | "output_type": "execute_result"
2054 | }
2055 | ],
2056 | "source": [
2057 | "# Define a dictionary for the target mapping\n",
2058 | "target_map = {'Yes':1, 'No':0}\n",
2059 | "# Use the pandas apply method to numerically encode our attrition target variable\n",
2060 | "target = attrition[\"Attrition\"].apply(lambda x: target_map[x])\n",
2061 | "target.head(3)"
2062 | ]
2063 | },
2064 | {
2065 | "cell_type": "markdown",
2066 | "metadata": {
2067 | "_cell_guid": "5564e6e1-83ed-75de-2540-0d037e31291b"
2068 | },
2069 | "source": [
2070 | "\n",
2071 | "**Splitting Data into Train and Test sets**\n"
2072 | ]
2073 | },
2074 | {
2075 | "cell_type": "code",
2076 | "execution_count": 21,
2077 | "metadata": {
2078 | "_cell_guid": "c197f8ee-76b0-7137-f001-83f969637521"
2079 | },
2080 | "outputs": [],
2081 | "source": [
2082 | "# Split data into train and test sets as well as for validation and testing\n",
2083 | "train, test, target_train, target_test = train_test_split(attrition_final, target, train_size= 0.75,random_state=0);"
2084 | ]
2085 | },
2086 | {
2087 | "cell_type": "markdown",
2088 | "metadata": {},
2089 | "source": [
2090 | "# Implementing Machine Learning Models\n"
2091 | ]
2092 | },
2093 | {
2094 | "cell_type": "markdown",
2095 | "metadata": {
2096 | "_cell_guid": "610cfa87-0b9d-4671-cd51-c99ef9c9151d"
2097 | },
2098 | "source": [
2099 | "## GBM Classifier\n",
2100 | "\n"
2101 | ]
2102 | },
2103 | {
2104 | "cell_type": "markdown",
2105 | "metadata": {},
2106 | "source": [
2107 | "### 1.n_estimators - No of Trees in the Model\n",
2108 | "\n",
2109 | "### 2.max_features - The number of features to consider while searching for a best split.Thumb Rule to have Square root of no of Columns\n",
2110 | "\n",
2111 | "### 3.max_depth - Maximum Depth of Tree and can be used to control overfiting \n",
2112 | "\n",
2113 | "### 4.min_samples_leaf - Minimum samples (or observations) required in a terminal node or leaf.In general we need to have lower values for it for Imbalanced problems\n",
2114 | "\n",
2115 | "### 5.subsample- The fraction of samples to be used for fitting the individual base learners\n",
2116 | "\n",
2117 | "### 6.learning_rate - Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators"
2118 | ]
2119 | },
2120 | {
2121 | "cell_type": "code",
2122 | "execution_count": 44,
2123 | "metadata": {},
2124 | "outputs": [
2125 | {
2126 | "data": {
2127 | "text/plain": [
2128 | "{'ccp_alpha': 0.0,\n",
2129 | " 'criterion': 'friedman_mse',\n",
2130 | " 'init': None,\n",
2131 | " 'learning_rate': 0.1,\n",
2132 | " 'loss': 'deviance',\n",
2133 | " 'max_depth': 3,\n",
2134 | " 'max_features': None,\n",
2135 | " 'max_leaf_nodes': None,\n",
2136 | " 'min_impurity_decrease': 0.0,\n",
2137 | " 'min_impurity_split': None,\n",
2138 | " 'min_samples_leaf': 1,\n",
2139 | " 'min_samples_split': 2,\n",
2140 | " 'min_weight_fraction_leaf': 0.0,\n",
2141 | " 'n_estimators': 100,\n",
2142 | " 'n_iter_no_change': None,\n",
2143 | " 'presort': 'deprecated',\n",
2144 | " 'random_state': 100,\n",
2145 | " 'subsample': 1.0,\n",
2146 | " 'tol': 0.0001,\n",
2147 | " 'validation_fraction': 0.1,\n",
2148 | " 'verbose': 0,\n",
2149 | " 'warm_start': False}"
2150 | ]
2151 | },
2152 | "execution_count": 44,
2153 | "metadata": {},
2154 | "output_type": "execute_result"
2155 | }
2156 | ],
2157 | "source": [
2158 | "gb = GradientBoostingClassifier(random_state=100) # default \n",
2159 | "gb.get_params()"
2160 | ]
2161 | },
2162 | {
2163 | "cell_type": "code",
2164 | "execution_count": 45,
2165 | "metadata": {
2166 | "_cell_guid": "ed6a837e-2864-291c-be8d-3c8e9ed900b7"
2167 | },
2168 | "outputs": [],
2169 | "source": [
2170 | "# Fit the model to our train and target\n",
2171 | "gb.fit(train, target_train)\n",
2172 | "# Get our predictions\n",
2173 | "gb_predictions = gb.predict(test)"
2174 | ]
2175 | },
2176 | {
2177 | "cell_type": "code",
2178 | "execution_count": 46,
2179 | "metadata": {},
2180 | "outputs": [
2181 | {
2182 | "data": {
2183 | "text/plain": [
2184 | "array([[0.95425715, 0.04574285],\n",
2185 | " [0.96266164, 0.03733836],\n",
2186 | " [0.86753512, 0.13246488],\n",
2187 | " [0.93446343, 0.06553657],\n",
2188 | " [0.11595517, 0.88404483],\n",
2189 | " [0.69684147, 0.30315853],\n",
2190 | " [0.64887562, 0.35112438],\n",
2191 | " [0.95196738, 0.04803262],\n",
2192 | " [0.97038412, 0.02961588],\n",
2193 | " [0.86642399, 0.13357601],\n",
2194 | " [0.94437031, 0.05562969],\n",
2195 | " [0.91453905, 0.08546095],\n",
2196 | " [0.97008856, 0.02991144],\n",
2197 | " [0.30172989, 0.69827011],\n",
2198 | " [0.94679291, 0.05320709],\n",
2199 | " [0.98937899, 0.01062101],\n",
2200 | " [0.94587557, 0.05412443],\n",
2201 | " [0.93925719, 0.06074281],\n",
2202 | " [0.94572155, 0.05427845],\n",
2203 | " [0.92824768, 0.07175232],\n",
2204 | " [0.63031267, 0.36968733],\n",
2205 | " [0.95095032, 0.04904968],\n",
2206 | " [0.96466225, 0.03533775],\n",
2207 | " [0.97249008, 0.02750992],\n",
2208 | " [0.58256377, 0.41743623],\n",
2209 | " [0.75837581, 0.24162419],\n",
2210 | " [0.95692266, 0.04307734],\n",
2211 | " [0.97394765, 0.02605235],\n",
2212 | " [0.25660257, 0.74339743],\n",
2213 | " [0.96627476, 0.03372524],\n",
2214 | " [0.9370103 , 0.0629897 ],\n",
2215 | " [0.97054113, 0.02945887],\n",
2216 | " [0.72602826, 0.27397174],\n",
2217 | " [0.94088637, 0.05911363],\n",
2218 | " [0.8540458 , 0.1459542 ],\n",
2219 | " [0.95643054, 0.04356946],\n",
2220 | " [0.92795664, 0.07204336],\n",
2221 | " [0.95077212, 0.04922788],\n",
2222 | " [0.96403289, 0.03596711],\n",
2223 | " [0.89832621, 0.10167379],\n",
2224 | " [0.93184448, 0.06815552],\n",
2225 | " [0.98246084, 0.01753916],\n",
2226 | " [0.98249482, 0.01750518],\n",
2227 | " [0.9445623 , 0.0554377 ],\n",
2228 | " [0.9739705 , 0.0260295 ],\n",
2229 | " [0.39098348, 0.60901652],\n",
2230 | " [0.84584692, 0.15415308],\n",
2231 | " [0.98543184, 0.01456816],\n",
2232 | " [0.12275346, 0.87724654],\n",
2233 | " [0.41831239, 0.58168761],\n",
2234 | " [0.88842615, 0.11157385],\n",
2235 | " [0.45888689, 0.54111311],\n",
2236 | " [0.94905763, 0.05094237],\n",
2237 | " [0.95377671, 0.04622329],\n",
2238 | " [0.38737978, 0.61262022],\n",
2239 | " [0.81782947, 0.18217053],\n",
2240 | " [0.98358002, 0.01641998],\n",
2241 | " [0.90739507, 0.09260493],\n",
2242 | " [0.98341059, 0.01658941],\n",
2243 | " [0.52594093, 0.47405907],\n",
2244 | " [0.98112677, 0.01887323],\n",
2245 | " [0.90242903, 0.09757097],\n",
2246 | " [0.76861349, 0.23138651],\n",
2247 | " [0.94486355, 0.05513645],\n",
2248 | " [0.53386057, 0.46613943],\n",
2249 | " [0.96150163, 0.03849837],\n",
2250 | " [0.8207475 , 0.1792525 ],\n",
2251 | " [0.92160034, 0.07839966],\n",
2252 | " [0.94286369, 0.05713631],\n",
2253 | " [0.95425839, 0.04574161],\n",
2254 | " [0.94541621, 0.05458379],\n",
2255 | " [0.70456971, 0.29543029],\n",
2256 | " [0.89822164, 0.10177836],\n",
2257 | " [0.97057804, 0.02942196],\n",
2258 | " [0.9116692 , 0.0883308 ],\n",
2259 | " [0.96863902, 0.03136098],\n",
2260 | " [0.97495523, 0.02504477],\n",
2261 | " [0.92495345, 0.07504655],\n",
2262 | " [0.41259222, 0.58740778],\n",
2263 | " [0.98987829, 0.01012171],\n",
2264 | " [0.98695265, 0.01304735],\n",
2265 | " [0.97879634, 0.02120366],\n",
2266 | " [0.9380963 , 0.0619037 ],\n",
2267 | " [0.95352286, 0.04647714],\n",
2268 | " [0.93758617, 0.06241383],\n",
2269 | " [0.955901 , 0.044099 ],\n",
2270 | " [0.97398947, 0.02601053],\n",
2271 | " [0.82717248, 0.17282752],\n",
2272 | " [0.93887772, 0.06112228],\n",
2273 | " [0.96171254, 0.03828746],\n",
2274 | " [0.32999572, 0.67000428],\n",
2275 | " [0.96346516, 0.03653484],\n",
2276 | " [0.88623647, 0.11376353],\n",
2277 | " [0.73941826, 0.26058174],\n",
2278 | " [0.98771043, 0.01228957],\n",
2279 | " [0.9712543 , 0.0287457 ],\n",
2280 | " [0.89449503, 0.10550497],\n",
2281 | " [0.58046794, 0.41953206],\n",
2282 | " [0.97774197, 0.02225803],\n",
2283 | " [0.95302046, 0.04697954],\n",
2284 | " [0.71342454, 0.28657546],\n",
2285 | " [0.8061877 , 0.1938123 ],\n",
2286 | " [0.49475417, 0.50524583],\n",
2287 | " [0.97057666, 0.02942334],\n",
2288 | " [0.96443989, 0.03556011],\n",
2289 | " [0.96042126, 0.03957874],\n",
2290 | " [0.92572113, 0.07427887],\n",
2291 | " [0.62023522, 0.37976478],\n",
2292 | " [0.48119761, 0.51880239],\n",
2293 | " [0.84650471, 0.15349529],\n",
2294 | " [0.62243893, 0.37756107],\n",
2295 | " [0.85932631, 0.14067369],\n",
2296 | " [0.95462151, 0.04537849],\n",
2297 | " [0.96619621, 0.03380379],\n",
2298 | " [0.94581538, 0.05418462],\n",
2299 | " [0.91689517, 0.08310483],\n",
2300 | " [0.95839369, 0.04160631],\n",
2301 | " [0.8794354 , 0.1205646 ],\n",
2302 | " [0.98362608, 0.01637392],\n",
2303 | " [0.98281878, 0.01718122],\n",
2304 | " [0.96947689, 0.03052311],\n",
2305 | " [0.97334027, 0.02665973],\n",
2306 | " [0.95974568, 0.04025432],\n",
2307 | " [0.12036692, 0.87963308],\n",
2308 | " [0.82297479, 0.17702521],\n",
2309 | " [0.98153225, 0.01846775],\n",
2310 | " [0.98694682, 0.01305318],\n",
2311 | " [0.91438861, 0.08561139],\n",
2312 | " [0.89250181, 0.10749819],\n",
2313 | " [0.9602713 , 0.0397287 ],\n",
2314 | " [0.98262507, 0.01737493],\n",
2315 | " [0.48009381, 0.51990619],\n",
2316 | " [0.33955706, 0.66044294],\n",
2317 | " [0.64491347, 0.35508653],\n",
2318 | " [0.8168872 , 0.1831128 ],\n",
2319 | " [0.50657794, 0.49342206],\n",
2320 | " [0.56711104, 0.43288896],\n",
2321 | " [0.9007447 , 0.0992553 ],\n",
2322 | " [0.90199207, 0.09800793],\n",
2323 | " [0.95356113, 0.04643887],\n",
2324 | " [0.96705584, 0.03294416],\n",
2325 | " [0.90884378, 0.09115622],\n",
2326 | " [0.9349426 , 0.0650574 ],\n",
2327 | " [0.91939924, 0.08060076],\n",
2328 | " [0.84578704, 0.15421296],\n",
2329 | " [0.97456423, 0.02543577],\n",
2330 | " [0.84751971, 0.15248029],\n",
2331 | " [0.96033593, 0.03966407],\n",
2332 | " [0.92545517, 0.07454483],\n",
2333 | " [0.97048199, 0.02951801],\n",
2334 | " [0.97032391, 0.02967609],\n",
2335 | " [0.91630897, 0.08369103],\n",
2336 | " [0.93079044, 0.06920956],\n",
2337 | " [0.40456246, 0.59543754],\n",
2338 | " [0.947625 , 0.052375 ],\n",
2339 | " [0.91449881, 0.08550119],\n",
2340 | " [0.98636154, 0.01363846],\n",
2341 | " [0.71405454, 0.28594546],\n",
2342 | " [0.96510197, 0.03489803],\n",
2343 | " [0.97168808, 0.02831192],\n",
2344 | " [0.77305162, 0.22694838],\n",
2345 | " [0.98524719, 0.01475281],\n",
2346 | " [0.56610324, 0.43389676],\n",
2347 | " [0.06973742, 0.93026258],\n",
2348 | " [0.96669022, 0.03330978],\n",
2349 | " [0.86957648, 0.13042352],\n",
2350 | " [0.85638175, 0.14361825],\n",
2351 | " [0.96746578, 0.03253422],\n",
2352 | " [0.96326231, 0.03673769],\n",
2353 | " [0.98008622, 0.01991378],\n",
2354 | " [0.9631766 , 0.0368234 ],\n",
2355 | " [0.94571755, 0.05428245],\n",
2356 | " [0.95353727, 0.04646273],\n",
2357 | " [0.76749272, 0.23250728],\n",
2358 | " [0.97156023, 0.02843977],\n",
2359 | " [0.84776155, 0.15223845],\n",
2360 | " [0.93983408, 0.06016592],\n",
2361 | " [0.7128298 , 0.2871702 ],\n",
2362 | " [0.87254419, 0.12745581],\n",
2363 | " [0.91867668, 0.08132332],\n",
2364 | " [0.96659066, 0.03340934],\n",
2365 | " [0.96842656, 0.03157344],\n",
2366 | " [0.98440216, 0.01559784],\n",
2367 | " [0.52822561, 0.47177439],\n",
2368 | " [0.97013695, 0.02986305],\n",
2369 | " [0.975773 , 0.024227 ],\n",
2370 | " [0.72460519, 0.27539481],\n",
2371 | " [0.92948195, 0.07051805],\n",
2372 | " [0.92364724, 0.07635276],\n",
2373 | " [0.8141039 , 0.1858961 ],\n",
2374 | " [0.81531336, 0.18468664],\n",
2375 | " [0.34193098, 0.65806902],\n",
2376 | " [0.94062026, 0.05937974],\n",
2377 | " [0.98605656, 0.01394344],\n",
2378 | " [0.96238386, 0.03761614],\n",
2379 | " [0.89240038, 0.10759962],\n",
2380 | " [0.96629132, 0.03370868],\n",
2381 | " [0.96743579, 0.03256421],\n",
2382 | " [0.47861277, 0.52138723],\n",
2383 | " [0.95445889, 0.04554111],\n",
2384 | " [0.77024837, 0.22975163],\n",
2385 | " [0.93913958, 0.06086042],\n",
2386 | " [0.81173832, 0.18826168],\n",
2387 | " [0.97146037, 0.02853963],\n",
2388 | " [0.94145861, 0.05854139],\n",
2389 | " [0.93356602, 0.06643398],\n",
2390 | " [0.75972578, 0.24027422],\n",
2391 | " [0.98540422, 0.01459578],\n",
2392 | " [0.95597808, 0.04402192],\n",
2393 | " [0.97471758, 0.02528242],\n",
2394 | " [0.94873919, 0.05126081],\n",
2395 | " [0.97093324, 0.02906676],\n",
2396 | " [0.86302403, 0.13697597],\n",
2397 | " [0.83032931, 0.16967069],\n",
2398 | " [0.96277411, 0.03722589],\n",
2399 | " [0.97446945, 0.02553055],\n",
2400 | " [0.98062402, 0.01937598],\n",
2401 | " [0.87761169, 0.12238831],\n",
2402 | " [0.97251425, 0.02748575],\n",
2403 | " [0.63691449, 0.36308551],\n",
2404 | " [0.72636885, 0.27363115],\n",
2405 | " [0.77240465, 0.22759535],\n",
2406 | " [0.57649195, 0.42350805],\n",
2407 | " [0.67980935, 0.32019065],\n",
2408 | " [0.82615901, 0.17384099],\n",
2409 | " [0.96841481, 0.03158519],\n",
2410 | " [0.93445502, 0.06554498],\n",
2411 | " [0.69565772, 0.30434228],\n",
2412 | " [0.27794715, 0.72205285],\n",
2413 | " [0.92601046, 0.07398954],\n",
2414 | " [0.96144666, 0.03855334],\n",
2415 | " [0.59763048, 0.40236952],\n",
2416 | " [0.95776276, 0.04223724],\n",
2417 | " [0.92687147, 0.07312853],\n",
2418 | " [0.96753432, 0.03246568],\n",
2419 | " [0.61570908, 0.38429092],\n",
2420 | " [0.86399975, 0.13600025],\n",
2421 | " [0.98923344, 0.01076656],\n",
2422 | " [0.88397265, 0.11602735],\n",
2423 | " [0.97806179, 0.02193821],\n",
2424 | " [0.79986193, 0.20013807],\n",
2425 | " [0.80545456, 0.19454544],\n",
2426 | " [0.9721119 , 0.0278881 ],\n",
2427 | " [0.95970082, 0.04029918],\n",
2428 | " [0.86890865, 0.13109135],\n",
2429 | " [0.95813176, 0.04186824],\n",
2430 | " [0.97568072, 0.02431928],\n",
2431 | " [0.69268457, 0.30731543],\n",
2432 | " [0.92598788, 0.07401212],\n",
2433 | " [0.91900926, 0.08099074],\n",
2434 | " [0.91581725, 0.08418275],\n",
2435 | " [0.8345903 , 0.1654097 ],\n",
2436 | " [0.65988095, 0.34011905],\n",
2437 | " [0.94964915, 0.05035085],\n",
2438 | " [0.79333725, 0.20666275],\n",
2439 | " [0.92374445, 0.07625555],\n",
2440 | " [0.96868775, 0.03131225],\n",
2441 | " [0.35561514, 0.64438486],\n",
2442 | " [0.84094598, 0.15905402],\n",
2443 | " [0.38892717, 0.61107283],\n",
2444 | " [0.85167427, 0.14832573],\n",
2445 | " [0.96307142, 0.03692858],\n",
2446 | " [0.88462374, 0.11537626],\n",
2447 | " [0.94908531, 0.05091469],\n",
2448 | " [0.88240951, 0.11759049],\n",
2449 | " [0.79423159, 0.20576841],\n",
2450 | " [0.97788665, 0.02211335],\n",
2451 | " [0.62121259, 0.37878741],\n",
2452 | " [0.86255923, 0.13744077],\n",
2453 | " [0.97136832, 0.02863168],\n",
2454 | " [0.95266044, 0.04733956],\n",
2455 | " [0.96974757, 0.03025243],\n",
2456 | " [0.95422737, 0.04577263],\n",
2457 | " [0.94976773, 0.05023227],\n",
2458 | " [0.96809168, 0.03190832],\n",
2459 | " [0.97957092, 0.02042908],\n",
2460 | " [0.91742816, 0.08257184],\n",
2461 | " [0.74667356, 0.25332644],\n",
2462 | " [0.93221129, 0.06778871],\n",
2463 | " [0.67372023, 0.32627977],\n",
2464 | " [0.66684839, 0.33315161],\n",
2465 | " [0.97330889, 0.02669111],\n",
2466 | " [0.91337794, 0.08662206],\n",
2467 | " [0.77366167, 0.22633833],\n",
2468 | " [0.8832038 , 0.1167962 ],\n",
2469 | " [0.84292213, 0.15707787],\n",
2470 | " [0.99294 , 0.00706 ],\n",
2471 | " [0.94024791, 0.05975209],\n",
2472 | " [0.96593479, 0.03406521],\n",
2473 | " [0.97629651, 0.02370349],\n",
2474 | " [0.93867593, 0.06132407],\n",
2475 | " [0.59379026, 0.40620974],\n",
2476 | " [0.94611828, 0.05388172],\n",
2477 | " [0.94685051, 0.05314949],\n",
2478 | " [0.92676488, 0.07323512],\n",
2479 | " [0.9703074 , 0.0296926 ],\n",
2480 | " [0.89317638, 0.10682362],\n",
2481 | " [0.79986704, 0.20013296],\n",
2482 | " [0.02633175, 0.97366825],\n",
2483 | " [0.97025032, 0.02974968],\n",
2484 | " [0.89974809, 0.10025191],\n",
2485 | " [0.91638632, 0.08361368],\n",
2486 | " [0.78031964, 0.21968036],\n",
2487 | " [0.91008332, 0.08991668],\n",
2488 | " [0.97953917, 0.02046083],\n",
2489 | " [0.95730388, 0.04269612],\n",
2490 | " [0.7377896 , 0.2622104 ],\n",
2491 | " [0.88001116, 0.11998884],\n",
2492 | " [0.98293977, 0.01706023],\n",
2493 | " [0.96724203, 0.03275797],\n",
2494 | " [0.68747934, 0.31252066],\n",
2495 | " [0.96403622, 0.03596378],\n",
2496 | " [0.8867743 , 0.1132257 ],\n",
2497 | " [0.96202999, 0.03797001],\n",
2498 | " [0.82005794, 0.17994206],\n",
2499 | " [0.92286609, 0.07713391],\n",
2500 | " [0.88474497, 0.11525503],\n",
2501 | " [0.92484127, 0.07515873],\n",
2502 | " [0.70115256, 0.29884744],\n",
2503 | " [0.97183716, 0.02816284],\n",
2504 | " [0.5462985 , 0.4537015 ],\n",
2505 | " [0.9573019 , 0.0426981 ],\n",
2506 | " [0.92033062, 0.07966938],\n",
2507 | " [0.85225757, 0.14774243],\n",
2508 | " [0.95370205, 0.04629795],\n",
2509 | " [0.96747068, 0.03252932],\n",
2510 | " [0.7458445 , 0.2541555 ],\n",
2511 | " [0.74339205, 0.25660795],\n",
2512 | " [0.98721474, 0.01278526],\n",
2513 | " [0.96306272, 0.03693728],\n",
2514 | " [0.9579974 , 0.0420026 ],\n",
2515 | " [0.82413419, 0.17586581],\n",
2516 | " [0.89696197, 0.10303803],\n",
2517 | " [0.24911204, 0.75088796],\n",
2518 | " [0.82281444, 0.17718556],\n",
2519 | " [0.89850766, 0.10149234],\n",
2520 | " [0.92595595, 0.07404405],\n",
2521 | " [0.97562403, 0.02437597],\n",
2522 | " [0.97172923, 0.02827077],\n",
2523 | " [0.93588728, 0.06411272],\n",
2524 | " [0.88409016, 0.11590984],\n",
2525 | " [0.50668915, 0.49331085],\n",
2526 | " [0.93318445, 0.06681555],\n",
2527 | " [0.93502909, 0.06497091],\n",
2528 | " [0.93028959, 0.06971041],\n",
2529 | " [0.94739208, 0.05260792],\n",
2530 | " [0.95415688, 0.04584312],\n",
2531 | " [0.65663195, 0.34336805],\n",
2532 | " [0.96911857, 0.03088143],\n",
2533 | " [0.7018124 , 0.2981876 ],\n",
2534 | " [0.98190499, 0.01809501],\n",
2535 | " [0.94059124, 0.05940876],\n",
2536 | " [0.92597402, 0.07402598],\n",
2537 | " [0.92941033, 0.07058967],\n",
2538 | " [0.945513 , 0.054487 ],\n",
2539 | " [0.98032438, 0.01967562],\n",
2540 | " [0.95757909, 0.04242091],\n",
2541 | " [0.94447651, 0.05552349],\n",
2542 | " [0.97012588, 0.02987412],\n",
2543 | " [0.44250021, 0.55749979],\n",
2544 | " [0.98552445, 0.01447555],\n",
2545 | " [0.97023514, 0.02976486],\n",
2546 | " [0.96078622, 0.03921378],\n",
2547 | " [0.88696047, 0.11303953],\n",
2548 | " [0.94499311, 0.05500689],\n",
2549 | " [0.86110176, 0.13889824],\n",
2550 | " [0.95385708, 0.04614292],\n",
2551 | " [0.70609297, 0.29390703]])"
2552 | ]
2553 | },
2554 | "execution_count": 46,
2555 | "metadata": {},
2556 | "output_type": "execute_result"
2557 | }
2558 | ],
2559 | "source": [
2560 | "gb_predictions_prob = gb.predict_proba(test)\n",
2561 | "gb_predictions_prob"
2562 | ]
2563 | },
2564 | {
2565 | "cell_type": "code",
2566 | "execution_count": null,
2567 | "metadata": {},
2568 | "outputs": [],
2569 | "source": [
2570 | "# Gradient Boosting Parameters\n",
2571 | "# gb_params ={\n",
2572 | "# 'n_estimators': 500, # no of Trees \n",
2573 | "# 'learning_rate' : 0.2,\n",
2574 | "# 'max_depth': 11,\n",
2575 | "# 'min_samples_leaf': 2,\n",
2576 | "# 'subsample': 1,\n",
2577 | "# 'max_features' : 'sqrt',\n",
2578 | "# 'random_state' : 100,\n",
2579 | "# 'verbose': 0\n",
2580 | "# }\n",
2581 | "\n",
2582 | "#gb = GradientBoostingClassifier(**gb_params) # After Doing HPT , we can pass the paramaters"
2583 | ]
2584 | },
2585 | {
2586 | "cell_type": "code",
2587 | "execution_count": 47,
2588 | "metadata": {
2589 | "_cell_guid": "40c37011-76df-fcc7-9cd0-e689374a8d1a"
2590 | },
2591 | "outputs": [
2592 | {
2593 | "data": {
2594 | "text/plain": [
2595 | "0.8831521739130435"
2596 | ]
2597 | },
2598 | "execution_count": 47,
2599 | "metadata": {},
2600 | "output_type": "execute_result"
2601 | }
2602 | ],
2603 | "source": [
2604 | "accuracy_score(target_test, gb_predictions)"
2605 | ]
2606 | },
2607 | {
2608 | "cell_type": "markdown",
2609 | "metadata": {
2610 | "_cell_guid": "21cc0476-b03e-731f-97b4-89d81977c3a7"
2611 | },
2612 | "source": [
2613 | "### Feature Importance Gradient Boosting Model\n"
2614 | ]
2615 | },
2616 | {
2617 | "cell_type": "code",
2618 | "execution_count": 48,
2619 | "metadata": {},
2620 | "outputs": [
2621 | {
2622 | "data": {
2623 | "text/plain": [
2624 | "array([0.06652223, 0.0588816 , 0.03073138, 0.00432496, 0. ,\n",
2625 | " 0.03904932, 0.03439457, 0.01631039, 0.03338705, 0.03211924,\n",
2626 | " 0.02490683, 0.10492537, 0.02377928, 0.0315579 , 0.02005508,\n",
2627 | " 0. , 0.01282365, 0. , 0.04186334, 0.04785753,\n",
2628 | " 0.00882029, 0.02921662, 0.02106675, 0.00975893, 0.02529042,\n",
2629 | " 0.05801712, 0.00119296, 0.02512782, 0. , 0. ,\n",
2630 | " 0.00335323, 0.00150605, 0.0042737 , 0.0005249 , 0.00822779,\n",
2631 | " 0.00420823, 0.0016531 , 0.00426846, 0.00052944, 0.00090874,\n",
2632 | " 0.00121754, 0.00014525, 0.01109724, 0. , 0.00334464,\n",
2633 | " 0. , 0.00517715, 0.01111343, 0.01076333, 0.00311429,\n",
2634 | " 0.00155062, 0.01744231, 0. , 0.05617885, 0.0474211 ])"
2635 | ]
2636 | },
2637 | "execution_count": 48,
2638 | "metadata": {},
2639 | "output_type": "execute_result"
2640 | }
2641 | ],
2642 | "source": [
2643 | "gb.feature_importances_"
2644 | ]
2645 | },
2646 | {
2647 | "cell_type": "code",
2648 | "execution_count": 49,
2649 | "metadata": {
2650 | "_cell_guid": "082ca641-ffd2-fc3b-a7b6-9418b08767d9"
2651 | },
2652 | "outputs": [
2653 | {
2654 | "data": {
2655 | "application/vnd.plotly.v1+json": {
2656 | "config": {
2657 | "linkText": "Export to plot.ly",
2658 | "plotlyServerURL": "https://plot.ly",
2659 | "showLink": false
2660 | },
2661 | "data": [
2662 | {
2663 | "marker": {
2664 | "color": [
2665 | 0.06652223253695257,
2666 | 0.05888159883551992,
2667 | 0.03073137823187192,
2668 | 0.004324958198515217,
2669 | 0,
2670 | 0.03904932419117631,
2671 | 0.03439456715586092,
2672 | 0.01631038927940496,
2673 | 0.03338704516488361,
2674 | 0.03211924296440046,
2675 | 0.024906829658417066,
2676 | 0.10492536820487945,
2677 | 0.02377927623243685,
2678 | 0.031557897004027106,
2679 | 0.020055083719287114,
2680 | 0,
2681 | 0.012823654096422492,
2682 | 0,
2683 | 0.04186333616035551,
2684 | 0.04785753324320872,
2685 | 0.008820286459808887,
2686 | 0.029216621375744902,
2687 | 0.021066753459770205,
2688 | 0.009758927945108273,
2689 | 0.025290422087271346,
2690 | 0.05801711636083825,
2691 | 0.001192961971420996,
2692 | 0.025127819791960826,
2693 | 0,
2694 | 0,
2695 | 0.0033532256344687922,
2696 | 0.0015060503101848842,
2697 | 0.004273699405845098,
2698 | 0.0005249018883639997,
2699 | 0.00822778740581938,
2700 | 0.004208227048439592,
2701 | 0.0016531018368706766,
2702 | 0.0042684633108618355,
2703 | 0.0005294377436253811,
2704 | 0.000908740704327891,
2705 | 0.0012175398421370041,
2706 | 0.00014524848123212858,
2707 | 0.011097244488818544,
2708 | 0,
2709 | 0.0033446426566392054,
2710 | 0,
2711 | 0.005177148028722981,
2712 | 0.011113429006340464,
2713 | 0.010763326636927086,
2714 | 0.003114285866218408,
2715 | 0.0015506180803909163,
2716 | 0.017442314085269278,
2717 | 0,
2718 | 0.05617884533997763,
2719 | 0.04742109786897487
2720 | ],
2721 | "colorscale": [
2722 | [
2723 | 0,
2724 | "rgb(12,51,131)"
2725 | ],
2726 | [
2727 | 0.25,
2728 | "rgb(10,136,186)"
2729 | ],
2730 | [
2731 | 0.5,
2732 | "rgb(242,211,56)"
2733 | ],
2734 | [
2735 | 0.75,
2736 | "rgb(242,143,56)"
2737 | ],
2738 | [
2739 | 1,
2740 | "rgb(217,30,30)"
2741 | ]
2742 | ],
2743 | "showscale": true,
2744 | "size": 12,
2745 | "sizemode": "diameter",
2746 | "sizeref": 1.3
2747 | },
2748 | "mode": "markers",
2749 | "text": [
2750 | "Age",
2751 | "DailyRate",
2752 | "DistanceFromHome",
2753 | "Education",
2754 | "EmployeeCount",
2755 | "EmployeeNumber",
2756 | "EnvironmentSatisfaction",
2757 | "HourlyRate",
2758 | "JobInvolvement",
2759 | "JobLevel",
2760 | "JobSatisfaction",
2761 | "MonthlyIncome",
2762 | "MonthlyRate",
2763 | "NumCompaniesWorked",
2764 | "PercentSalaryHike",
2765 | "PerformanceRating",
2766 | "RelationshipSatisfaction",
2767 | "StandardHours",
2768 | "StockOptionLevel",
2769 | "TotalWorkingYears",
2770 | "TrainingTimesLastYear",
2771 | "WorkLifeBalance",
2772 | "YearsAtCompany",
2773 | "YearsInCurrentRole",
2774 | "YearsSinceLastPromotion",
2775 | "YearsWithCurrManager",
2776 | "BusinessTravel_Non-Travel",
2777 | "BusinessTravel_Travel_Frequently",
2778 | "BusinessTravel_Travel_Rarely",
2779 | "Department_Human Resources",
2780 | "Department_Research & Development",
2781 | "Department_Sales",
2782 | "EducationField_Human Resources",
2783 | "EducationField_Life Sciences",
2784 | "EducationField_Marketing",
2785 | "EducationField_Medical",
2786 | "EducationField_Other",
2787 | "EducationField_Technical Degree",
2788 | "Gender_Female",
2789 | "Gender_Male",
2790 | "JobRole_Healthcare Representative",
2791 | "JobRole_Human Resources",
2792 | "JobRole_Laboratory Technician",
2793 | "JobRole_Manager",
2794 | "JobRole_Manufacturing Director",
2795 | "JobRole_Research Director",
2796 | "JobRole_Research Scientist",
2797 | "JobRole_Sales Executive",
2798 | "JobRole_Sales Representative",
2799 | "MaritalStatus_Divorced",
2800 | "MaritalStatus_Married",
2801 | "MaritalStatus_Single",
2802 | "Over18_Y",
2803 | "OverTime_No",
2804 | "OverTime_Yes"
2805 | ],
2806 | "type": "scatter",
2807 | "x": [
2808 | "Age",
2809 | "DailyRate",
2810 | "DistanceFromHome",
2811 | "Education",
2812 | "EmployeeCount",
2813 | "EmployeeNumber",
2814 | "EnvironmentSatisfaction",
2815 | "HourlyRate",
2816 | "JobInvolvement",
2817 | "JobLevel",
2818 | "JobSatisfaction",
2819 | "MonthlyIncome",
2820 | "MonthlyRate",
2821 | "NumCompaniesWorked",
2822 | "PercentSalaryHike",
2823 | "PerformanceRating",
2824 | "RelationshipSatisfaction",
2825 | "StandardHours",
2826 | "StockOptionLevel",
2827 | "TotalWorkingYears",
2828 | "TrainingTimesLastYear",
2829 | "WorkLifeBalance",
2830 | "YearsAtCompany",
2831 | "YearsInCurrentRole",
2832 | "YearsSinceLastPromotion",
2833 | "YearsWithCurrManager",
2834 | "BusinessTravel_Non-Travel",
2835 | "BusinessTravel_Travel_Frequently",
2836 | "BusinessTravel_Travel_Rarely",
2837 | "Department_Human Resources",
2838 | "Department_Research & Development",
2839 | "Department_Sales",
2840 | "EducationField_Human Resources",
2841 | "EducationField_Life Sciences",
2842 | "EducationField_Marketing",
2843 | "EducationField_Medical",
2844 | "EducationField_Other",
2845 | "EducationField_Technical Degree",
2846 | "Gender_Female",
2847 | "Gender_Male",
2848 | "JobRole_Healthcare Representative",
2849 | "JobRole_Human Resources",
2850 | "JobRole_Laboratory Technician",
2851 | "JobRole_Manager",
2852 | "JobRole_Manufacturing Director",
2853 | "JobRole_Research Director",
2854 | "JobRole_Research Scientist",
2855 | "JobRole_Sales Executive",
2856 | "JobRole_Sales Representative",
2857 | "MaritalStatus_Divorced",
2858 | "MaritalStatus_Married",
2859 | "MaritalStatus_Single",
2860 | "Over18_Y",
2861 | "OverTime_No",
2862 | "OverTime_Yes"
2863 | ],
2864 | "y": [
2865 | 0.06652223253695257,
2866 | 0.05888159883551992,
2867 | 0.03073137823187192,
2868 | 0.004324958198515217,
2869 | 0,
2870 | 0.03904932419117631,
2871 | 0.03439456715586092,
2872 | 0.01631038927940496,
2873 | 0.03338704516488361,
2874 | 0.03211924296440046,
2875 | 0.024906829658417066,
2876 | 0.10492536820487945,
2877 | 0.02377927623243685,
2878 | 0.031557897004027106,
2879 | 0.020055083719287114,
2880 | 0,
2881 | 0.012823654096422492,
2882 | 0,
2883 | 0.04186333616035551,
2884 | 0.04785753324320872,
2885 | 0.008820286459808887,
2886 | 0.029216621375744902,
2887 | 0.021066753459770205,
2888 | 0.009758927945108273,
2889 | 0.025290422087271346,
2890 | 0.05801711636083825,
2891 | 0.001192961971420996,
2892 | 0.025127819791960826,
2893 | 0,
2894 | 0,
2895 | 0.0033532256344687922,
2896 | 0.0015060503101848842,
2897 | 0.004273699405845098,
2898 | 0.0005249018883639997,
2899 | 0.00822778740581938,
2900 | 0.004208227048439592,
2901 | 0.0016531018368706766,
2902 | 0.0042684633108618355,
2903 | 0.0005294377436253811,
2904 | 0.000908740704327891,
2905 | 0.0012175398421370041,
2906 | 0.00014524848123212858,
2907 | 0.011097244488818544,
2908 | 0,
2909 | 0.0033446426566392054,
2910 | 0,
2911 | 0.005177148028722981,
2912 | 0.011113429006340464,
2913 | 0.010763326636927086,
2914 | 0.003114285866218408,
2915 | 0.0015506180803909163,
2916 | 0.017442314085269278,
2917 | 0,
2918 | 0.05617884533997763,
2919 | 0.04742109786897487
2920 | ]
2921 | }
2922 | ],
2923 | "layout": {
2924 | "autosize": true,
2925 | "hovermode": "closest",
2926 | "showlegend": false,
2927 | "template": {
2928 | "data": {
2929 | "bar": [
2930 | {
2931 | "error_x": {
2932 | "color": "#2a3f5f"
2933 | },
2934 | "error_y": {
2935 | "color": "#2a3f5f"
2936 | },
2937 | "marker": {
2938 | "line": {
2939 | "color": "#E5ECF6",
2940 | "width": 0.5
2941 | }
2942 | },
2943 | "type": "bar"
2944 | }
2945 | ],
2946 | "barpolar": [
2947 | {
2948 | "marker": {
2949 | "line": {
2950 | "color": "#E5ECF6",
2951 | "width": 0.5
2952 | }
2953 | },
2954 | "type": "barpolar"
2955 | }
2956 | ],
2957 | "carpet": [
2958 | {
2959 | "aaxis": {
2960 | "endlinecolor": "#2a3f5f",
2961 | "gridcolor": "white",
2962 | "linecolor": "white",
2963 | "minorgridcolor": "white",
2964 | "startlinecolor": "#2a3f5f"
2965 | },
2966 | "baxis": {
2967 | "endlinecolor": "#2a3f5f",
2968 | "gridcolor": "white",
2969 | "linecolor": "white",
2970 | "minorgridcolor": "white",
2971 | "startlinecolor": "#2a3f5f"
2972 | },
2973 | "type": "carpet"
2974 | }
2975 | ],
2976 | "choropleth": [
2977 | {
2978 | "colorbar": {
2979 | "outlinewidth": 0,
2980 | "ticks": ""
2981 | },
2982 | "type": "choropleth"
2983 | }
2984 | ],
2985 | "contour": [
2986 | {
2987 | "colorbar": {
2988 | "outlinewidth": 0,
2989 | "ticks": ""
2990 | },
2991 | "colorscale": [
2992 | [
2993 | 0,
2994 | "#0d0887"
2995 | ],
2996 | [
2997 | 0.1111111111111111,
2998 | "#46039f"
2999 | ],
3000 | [
3001 | 0.2222222222222222,
3002 | "#7201a8"
3003 | ],
3004 | [
3005 | 0.3333333333333333,
3006 | "#9c179e"
3007 | ],
3008 | [
3009 | 0.4444444444444444,
3010 | "#bd3786"
3011 | ],
3012 | [
3013 | 0.5555555555555556,
3014 | "#d8576b"
3015 | ],
3016 | [
3017 | 0.6666666666666666,
3018 | "#ed7953"
3019 | ],
3020 | [
3021 | 0.7777777777777778,
3022 | "#fb9f3a"
3023 | ],
3024 | [
3025 | 0.8888888888888888,
3026 | "#fdca26"
3027 | ],
3028 | [
3029 | 1,
3030 | "#f0f921"
3031 | ]
3032 | ],
3033 | "type": "contour"
3034 | }
3035 | ],
3036 | "contourcarpet": [
3037 | {
3038 | "colorbar": {
3039 | "outlinewidth": 0,
3040 | "ticks": ""
3041 | },
3042 | "type": "contourcarpet"
3043 | }
3044 | ],
3045 | "heatmap": [
3046 | {
3047 | "colorbar": {
3048 | "outlinewidth": 0,
3049 | "ticks": ""
3050 | },
3051 | "colorscale": [
3052 | [
3053 | 0,
3054 | "#0d0887"
3055 | ],
3056 | [
3057 | 0.1111111111111111,
3058 | "#46039f"
3059 | ],
3060 | [
3061 | 0.2222222222222222,
3062 | "#7201a8"
3063 | ],
3064 | [
3065 | 0.3333333333333333,
3066 | "#9c179e"
3067 | ],
3068 | [
3069 | 0.4444444444444444,
3070 | "#bd3786"
3071 | ],
3072 | [
3073 | 0.5555555555555556,
3074 | "#d8576b"
3075 | ],
3076 | [
3077 | 0.6666666666666666,
3078 | "#ed7953"
3079 | ],
3080 | [
3081 | 0.7777777777777778,
3082 | "#fb9f3a"
3083 | ],
3084 | [
3085 | 0.8888888888888888,
3086 | "#fdca26"
3087 | ],
3088 | [
3089 | 1,
3090 | "#f0f921"
3091 | ]
3092 | ],
3093 | "type": "heatmap"
3094 | }
3095 | ],
3096 | "heatmapgl": [
3097 | {
3098 | "colorbar": {
3099 | "outlinewidth": 0,
3100 | "ticks": ""
3101 | },
3102 | "colorscale": [
3103 | [
3104 | 0,
3105 | "#0d0887"
3106 | ],
3107 | [
3108 | 0.1111111111111111,
3109 | "#46039f"
3110 | ],
3111 | [
3112 | 0.2222222222222222,
3113 | "#7201a8"
3114 | ],
3115 | [
3116 | 0.3333333333333333,
3117 | "#9c179e"
3118 | ],
3119 | [
3120 | 0.4444444444444444,
3121 | "#bd3786"
3122 | ],
3123 | [
3124 | 0.5555555555555556,
3125 | "#d8576b"
3126 | ],
3127 | [
3128 | 0.6666666666666666,
3129 | "#ed7953"
3130 | ],
3131 | [
3132 | 0.7777777777777778,
3133 | "#fb9f3a"
3134 | ],
3135 | [
3136 | 0.8888888888888888,
3137 | "#fdca26"
3138 | ],
3139 | [
3140 | 1,
3141 | "#f0f921"
3142 | ]
3143 | ],
3144 | "type": "heatmapgl"
3145 | }
3146 | ],
3147 | "histogram": [
3148 | {
3149 | "marker": {
3150 | "colorbar": {
3151 | "outlinewidth": 0,
3152 | "ticks": ""
3153 | }
3154 | },
3155 | "type": "histogram"
3156 | }
3157 | ],
3158 | "histogram2d": [
3159 | {
3160 | "colorbar": {
3161 | "outlinewidth": 0,
3162 | "ticks": ""
3163 | },
3164 | "colorscale": [
3165 | [
3166 | 0,
3167 | "#0d0887"
3168 | ],
3169 | [
3170 | 0.1111111111111111,
3171 | "#46039f"
3172 | ],
3173 | [
3174 | 0.2222222222222222,
3175 | "#7201a8"
3176 | ],
3177 | [
3178 | 0.3333333333333333,
3179 | "#9c179e"
3180 | ],
3181 | [
3182 | 0.4444444444444444,
3183 | "#bd3786"
3184 | ],
3185 | [
3186 | 0.5555555555555556,
3187 | "#d8576b"
3188 | ],
3189 | [
3190 | 0.6666666666666666,
3191 | "#ed7953"
3192 | ],
3193 | [
3194 | 0.7777777777777778,
3195 | "#fb9f3a"
3196 | ],
3197 | [
3198 | 0.8888888888888888,
3199 | "#fdca26"
3200 | ],
3201 | [
3202 | 1,
3203 | "#f0f921"
3204 | ]
3205 | ],
3206 | "type": "histogram2d"
3207 | }
3208 | ],
3209 | "histogram2dcontour": [
3210 | {
3211 | "colorbar": {
3212 | "outlinewidth": 0,
3213 | "ticks": ""
3214 | },
3215 | "colorscale": [
3216 | [
3217 | 0,
3218 | "#0d0887"
3219 | ],
3220 | [
3221 | 0.1111111111111111,
3222 | "#46039f"
3223 | ],
3224 | [
3225 | 0.2222222222222222,
3226 | "#7201a8"
3227 | ],
3228 | [
3229 | 0.3333333333333333,
3230 | "#9c179e"
3231 | ],
3232 | [
3233 | 0.4444444444444444,
3234 | "#bd3786"
3235 | ],
3236 | [
3237 | 0.5555555555555556,
3238 | "#d8576b"
3239 | ],
3240 | [
3241 | 0.6666666666666666,
3242 | "#ed7953"
3243 | ],
3244 | [
3245 | 0.7777777777777778,
3246 | "#fb9f3a"
3247 | ],
3248 | [
3249 | 0.8888888888888888,
3250 | "#fdca26"
3251 | ],
3252 | [
3253 | 1,
3254 | "#f0f921"
3255 | ]
3256 | ],
3257 | "type": "histogram2dcontour"
3258 | }
3259 | ],
3260 | "mesh3d": [
3261 | {
3262 | "colorbar": {
3263 | "outlinewidth": 0,
3264 | "ticks": ""
3265 | },
3266 | "type": "mesh3d"
3267 | }
3268 | ],
3269 | "parcoords": [
3270 | {
3271 | "line": {
3272 | "colorbar": {
3273 | "outlinewidth": 0,
3274 | "ticks": ""
3275 | }
3276 | },
3277 | "type": "parcoords"
3278 | }
3279 | ],
3280 | "pie": [
3281 | {
3282 | "automargin": true,
3283 | "type": "pie"
3284 | }
3285 | ],
3286 | "scatter": [
3287 | {
3288 | "marker": {
3289 | "colorbar": {
3290 | "outlinewidth": 0,
3291 | "ticks": ""
3292 | }
3293 | },
3294 | "type": "scatter"
3295 | }
3296 | ],
3297 | "scatter3d": [
3298 | {
3299 | "line": {
3300 | "colorbar": {
3301 | "outlinewidth": 0,
3302 | "ticks": ""
3303 | }
3304 | },
3305 | "marker": {
3306 | "colorbar": {
3307 | "outlinewidth": 0,
3308 | "ticks": ""
3309 | }
3310 | },
3311 | "type": "scatter3d"
3312 | }
3313 | ],
3314 | "scattercarpet": [
3315 | {
3316 | "marker": {
3317 | "colorbar": {
3318 | "outlinewidth": 0,
3319 | "ticks": ""
3320 | }
3321 | },
3322 | "type": "scattercarpet"
3323 | }
3324 | ],
3325 | "scattergeo": [
3326 | {
3327 | "marker": {
3328 | "colorbar": {
3329 | "outlinewidth": 0,
3330 | "ticks": ""
3331 | }
3332 | },
3333 | "type": "scattergeo"
3334 | }
3335 | ],
3336 | "scattergl": [
3337 | {
3338 | "marker": {
3339 | "colorbar": {
3340 | "outlinewidth": 0,
3341 | "ticks": ""
3342 | }
3343 | },
3344 | "type": "scattergl"
3345 | }
3346 | ],
3347 | "scattermapbox": [
3348 | {
3349 | "marker": {
3350 | "colorbar": {
3351 | "outlinewidth": 0,
3352 | "ticks": ""
3353 | }
3354 | },
3355 | "type": "scattermapbox"
3356 | }
3357 | ],
3358 | "scatterpolar": [
3359 | {
3360 | "marker": {
3361 | "colorbar": {
3362 | "outlinewidth": 0,
3363 | "ticks": ""
3364 | }
3365 | },
3366 | "type": "scatterpolar"
3367 | }
3368 | ],
3369 | "scatterpolargl": [
3370 | {
3371 | "marker": {
3372 | "colorbar": {
3373 | "outlinewidth": 0,
3374 | "ticks": ""
3375 | }
3376 | },
3377 | "type": "scatterpolargl"
3378 | }
3379 | ],
3380 | "scatterternary": [
3381 | {
3382 | "marker": {
3383 | "colorbar": {
3384 | "outlinewidth": 0,
3385 | "ticks": ""
3386 | }
3387 | },
3388 | "type": "scatterternary"
3389 | }
3390 | ],
3391 | "surface": [
3392 | {
3393 | "colorbar": {
3394 | "outlinewidth": 0,
3395 | "ticks": ""
3396 | },
3397 | "colorscale": [
3398 | [
3399 | 0,
3400 | "#0d0887"
3401 | ],
3402 | [
3403 | 0.1111111111111111,
3404 | "#46039f"
3405 | ],
3406 | [
3407 | 0.2222222222222222,
3408 | "#7201a8"
3409 | ],
3410 | [
3411 | 0.3333333333333333,
3412 | "#9c179e"
3413 | ],
3414 | [
3415 | 0.4444444444444444,
3416 | "#bd3786"
3417 | ],
3418 | [
3419 | 0.5555555555555556,
3420 | "#d8576b"
3421 | ],
3422 | [
3423 | 0.6666666666666666,
3424 | "#ed7953"
3425 | ],
3426 | [
3427 | 0.7777777777777778,
3428 | "#fb9f3a"
3429 | ],
3430 | [
3431 | 0.8888888888888888,
3432 | "#fdca26"
3433 | ],
3434 | [
3435 | 1,
3436 | "#f0f921"
3437 | ]
3438 | ],
3439 | "type": "surface"
3440 | }
3441 | ],
3442 | "table": [
3443 | {
3444 | "cells": {
3445 | "fill": {
3446 | "color": "#EBF0F8"
3447 | },
3448 | "line": {
3449 | "color": "white"
3450 | }
3451 | },
3452 | "header": {
3453 | "fill": {
3454 | "color": "#C8D4E3"
3455 | },
3456 | "line": {
3457 | "color": "white"
3458 | }
3459 | },
3460 | "type": "table"
3461 | }
3462 | ]
3463 | },
3464 | "layout": {
3465 | "annotationdefaults": {
3466 | "arrowcolor": "#2a3f5f",
3467 | "arrowhead": 0,
3468 | "arrowwidth": 1
3469 | },
3470 | "coloraxis": {
3471 | "colorbar": {
3472 | "outlinewidth": 0,
3473 | "ticks": ""
3474 | }
3475 | },
3476 | "colorscale": {
3477 | "diverging": [
3478 | [
3479 | 0,
3480 | "#8e0152"
3481 | ],
3482 | [
3483 | 0.1,
3484 | "#c51b7d"
3485 | ],
3486 | [
3487 | 0.2,
3488 | "#de77ae"
3489 | ],
3490 | [
3491 | 0.3,
3492 | "#f1b6da"
3493 | ],
3494 | [
3495 | 0.4,
3496 | "#fde0ef"
3497 | ],
3498 | [
3499 | 0.5,
3500 | "#f7f7f7"
3501 | ],
3502 | [
3503 | 0.6,
3504 | "#e6f5d0"
3505 | ],
3506 | [
3507 | 0.7,
3508 | "#b8e186"
3509 | ],
3510 | [
3511 | 0.8,
3512 | "#7fbc41"
3513 | ],
3514 | [
3515 | 0.9,
3516 | "#4d9221"
3517 | ],
3518 | [
3519 | 1,
3520 | "#276419"
3521 | ]
3522 | ],
3523 | "sequential": [
3524 | [
3525 | 0,
3526 | "#0d0887"
3527 | ],
3528 | [
3529 | 0.1111111111111111,
3530 | "#46039f"
3531 | ],
3532 | [
3533 | 0.2222222222222222,
3534 | "#7201a8"
3535 | ],
3536 | [
3537 | 0.3333333333333333,
3538 | "#9c179e"
3539 | ],
3540 | [
3541 | 0.4444444444444444,
3542 | "#bd3786"
3543 | ],
3544 | [
3545 | 0.5555555555555556,
3546 | "#d8576b"
3547 | ],
3548 | [
3549 | 0.6666666666666666,
3550 | "#ed7953"
3551 | ],
3552 | [
3553 | 0.7777777777777778,
3554 | "#fb9f3a"
3555 | ],
3556 | [
3557 | 0.8888888888888888,
3558 | "#fdca26"
3559 | ],
3560 | [
3561 | 1,
3562 | "#f0f921"
3563 | ]
3564 | ],
3565 | "sequentialminus": [
3566 | [
3567 | 0,
3568 | "#0d0887"
3569 | ],
3570 | [
3571 | 0.1111111111111111,
3572 | "#46039f"
3573 | ],
3574 | [
3575 | 0.2222222222222222,
3576 | "#7201a8"
3577 | ],
3578 | [
3579 | 0.3333333333333333,
3580 | "#9c179e"
3581 | ],
3582 | [
3583 | 0.4444444444444444,
3584 | "#bd3786"
3585 | ],
3586 | [
3587 | 0.5555555555555556,
3588 | "#d8576b"
3589 | ],
3590 | [
3591 | 0.6666666666666666,
3592 | "#ed7953"
3593 | ],
3594 | [
3595 | 0.7777777777777778,
3596 | "#fb9f3a"
3597 | ],
3598 | [
3599 | 0.8888888888888888,
3600 | "#fdca26"
3601 | ],
3602 | [
3603 | 1,
3604 | "#f0f921"
3605 | ]
3606 | ]
3607 | },
3608 | "colorway": [
3609 | "#636efa",
3610 | "#EF553B",
3611 | "#00cc96",
3612 | "#ab63fa",
3613 | "#FFA15A",
3614 | "#19d3f3",
3615 | "#FF6692",
3616 | "#B6E880",
3617 | "#FF97FF",
3618 | "#FECB52"
3619 | ],
3620 | "font": {
3621 | "color": "#2a3f5f"
3622 | },
3623 | "geo": {
3624 | "bgcolor": "white",
3625 | "lakecolor": "white",
3626 | "landcolor": "#E5ECF6",
3627 | "showlakes": true,
3628 | "showland": true,
3629 | "subunitcolor": "white"
3630 | },
3631 | "hoverlabel": {
3632 | "align": "left"
3633 | },
3634 | "hovermode": "closest",
3635 | "mapbox": {
3636 | "style": "light"
3637 | },
3638 | "paper_bgcolor": "white",
3639 | "plot_bgcolor": "#E5ECF6",
3640 | "polar": {
3641 | "angularaxis": {
3642 | "gridcolor": "white",
3643 | "linecolor": "white",
3644 | "ticks": ""
3645 | },
3646 | "bgcolor": "#E5ECF6",
3647 | "radialaxis": {
3648 | "gridcolor": "white",
3649 | "linecolor": "white",
3650 | "ticks": ""
3651 | }
3652 | },
3653 | "scene": {
3654 | "xaxis": {
3655 | "backgroundcolor": "#E5ECF6",
3656 | "gridcolor": "white",
3657 | "gridwidth": 2,
3658 | "linecolor": "white",
3659 | "showbackground": true,
3660 | "ticks": "",
3661 | "zerolinecolor": "white"
3662 | },
3663 | "yaxis": {
3664 | "backgroundcolor": "#E5ECF6",
3665 | "gridcolor": "white",
3666 | "gridwidth": 2,
3667 | "linecolor": "white",
3668 | "showbackground": true,
3669 | "ticks": "",
3670 | "zerolinecolor": "white"
3671 | },
3672 | "zaxis": {
3673 | "backgroundcolor": "#E5ECF6",
3674 | "gridcolor": "white",
3675 | "gridwidth": 2,
3676 | "linecolor": "white",
3677 | "showbackground": true,
3678 | "ticks": "",
3679 | "zerolinecolor": "white"
3680 | }
3681 | },
3682 | "shapedefaults": {
3683 | "line": {
3684 | "color": "#2a3f5f"
3685 | }
3686 | },
3687 | "ternary": {
3688 | "aaxis": {
3689 | "gridcolor": "white",
3690 | "linecolor": "white",
3691 | "ticks": ""
3692 | },
3693 | "baxis": {
3694 | "gridcolor": "white",
3695 | "linecolor": "white",
3696 | "ticks": ""
3697 | },
3698 | "bgcolor": "#E5ECF6",
3699 | "caxis": {
3700 | "gridcolor": "white",
3701 | "linecolor": "white",
3702 | "ticks": ""
3703 | }
3704 | },
3705 | "title": {
3706 | "x": 0.05
3707 | },
3708 | "xaxis": {
3709 | "automargin": true,
3710 | "gridcolor": "white",
3711 | "linecolor": "white",
3712 | "ticks": "",
3713 | "title": {
3714 | "standoff": 15
3715 | },
3716 | "zerolinecolor": "white",
3717 | "zerolinewidth": 2
3718 | },
3719 | "yaxis": {
3720 | "automargin": true,
3721 | "gridcolor": "white",
3722 | "linecolor": "white",
3723 | "ticks": "",
3724 | "title": {
3725 | "standoff": 15
3726 | },
3727 | "zerolinecolor": "white",
3728 | "zerolinewidth": 2
3729 | }
3730 | }
3731 | },
3732 | "title": {
3733 | "text": "GBM Model Feature Importance"
3734 | },
3735 | "xaxis": {
3736 | "showgrid": false,
3737 | "showline": false,
3738 | "ticklen": 5,
3739 | "zeroline": false
3740 | },
3741 | "yaxis": {
3742 | "gridwidth": 2,
3743 | "showgrid": false,
3744 | "ticklen": 5,
3745 | "title": {
3746 | "text": "Feature Importance"
3747 | },
3748 | "zeroline": false
3749 | }
3750 | }
3751 | },
3752 | "text/html": [
3753 | "\n",
3754 | " \n",
3755 | " \n",
3756 | "
\n",
3757 | " \n",
3795 | "
"
3796 | ]
3797 | },
3798 | "metadata": {},
3799 | "output_type": "display_data"
3800 | }
3801 | ],
3802 | "source": [
3803 | "# Scatter plot \n",
3804 | "trace = go.Scatter(\n",
3805 | " y = gb.feature_importances_,\n",
3806 | " x = attrition_final.columns.values,\n",
3807 | " mode='markers',\n",
3808 | " marker=dict(\n",
3809 | " sizemode = 'diameter',\n",
3810 | " sizeref = 1.3,\n",
3811 | " size = 12,\n",
3812 | " color = gb.feature_importances_,\n",
3813 | " colorscale='Portland',\n",
3814 | " showscale=True\n",
3815 | " ),\n",
3816 | " text = attrition_final.columns.values\n",
3817 | ")\n",
3818 | "data = [trace]\n",
3819 | "\n",
3820 | "layout= go.Layout(\n",
3821 | " autosize= True,\n",
3822 | " title= 'GBM Model Feature Importance',\n",
3823 | " hovermode= 'closest',\n",
3824 | " xaxis= dict(\n",
3825 | " ticklen= 5,\n",
3826 | " showgrid=False,\n",
3827 | " zeroline=False,\n",
3828 | " showline=False\n",
3829 | " ),\n",
3830 | " yaxis=dict(\n",
3831 | " title= 'Feature Importance',\n",
3832 | " showgrid=False,\n",
3833 | " zeroline=False,\n",
3834 | " ticklen= 5,\n",
3835 | " gridwidth= 2\n",
3836 | " ),\n",
3837 | " showlegend= False\n",
3838 | ")\n",
3839 | "fig = go.Figure(data=data, layout=layout)\n",
3840 | "py.iplot(fig,filename='scatter')"
3841 | ]
3842 | }
3843 | ],
3844 | "metadata": {
3845 | "_change_revision": 2,
3846 | "_is_fork": false,
3847 | "kernelspec": {
3848 | "display_name": "Python 3",
3849 | "language": "python",
3850 | "name": "python3"
3851 | },
3852 | "language_info": {
3853 | "codemirror_mode": {
3854 | "name": "ipython",
3855 | "version": 3
3856 | },
3857 | "file_extension": ".py",
3858 | "mimetype": "text/x-python",
3859 | "name": "python",
3860 | "nbconvert_exporter": "python",
3861 | "pygments_lexer": "ipython3",
3862 | "version": "3.7.7"
3863 | }
3864 | },
3865 | "nbformat": 4,
3866 | "nbformat_minor": 1
3867 | }
3868 |
--------------------------------------------------------------------------------