├── .gitattributes
├── .ipynb_checkpoints
├── bank churn model - modeling(GBDT)-checkpoint.ipynb
├── bank churn model - preprocessing-checkpoint.ipynb
└── bank churn model - preview-checkpoint.ipynb
├── ExternalData.csv
├── bank churn model - modeling(GBDT).ipynb
├── bank churn model - preprocessing.ipynb
├── bank churn model - preview.ipynb
├── bankChurn.csv
└── model_data.csv
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/bank churn model - modeling(GBDT)-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n"
14 | ]
15 | }
16 | ],
17 | "source": [
18 | "import pandas as pd\n",
19 | "from sklearn import ensemble, cross_validation, metrics\n",
20 | "from sklearn.ensemble import GradientBoostingClassifier\n",
21 | "from sklearn.cross_validation import train_test_split\n",
22 | "from sklearn.cross_validation import KFold\n",
23 | "from sklearn.model_selection import GridSearchCV"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {},
30 | "outputs": [
31 | {
32 | "data": {
33 | "text/html": [
34 | "
\n",
35 | "\n",
48 | "
\n",
49 | " \n",
50 | " \n",
51 | " | \n",
52 | " LOCAL_CUR_SAV_SLOPE | \n",
53 | " LOCAL_CUR_MON_AVG_BAL | \n",
54 | " LOCAL_OVEONEYR_FF_MON_AVG_BAL | \n",
55 | " LOCAL_FIX_MON_AVG_BAL | \n",
56 | " LOCAL_FIX_MON_AVG_BAL_PROP | \n",
57 | " LOCAL_BELONEYR_FF_SLOPE | \n",
58 | " LOCAL_BELONEYR_FF_MON_AVG_BAL | \n",
59 | " LOCAL_OVEONEYR_FF_SLOPE | \n",
60 | " LOCAL_SAV_SLOPE | \n",
61 | " LOCAL_SAV_CUR_ALL_BAL | \n",
62 | " ... | \n",
63 | " kid6_10_nan | \n",
64 | " kid11_15_U | \n",
65 | " kid11_15_Y | \n",
66 | " kid11_15_nan | \n",
67 | " kid16_17_U | \n",
68 | " kid16_17_Y | \n",
69 | " kid16_17_nan | \n",
70 | " car_buy_New | \n",
71 | " car_buy_UNKNOWN | \n",
72 | " car_buy_nan | \n",
73 | "
\n",
74 | " \n",
75 | " \n",
76 | " \n",
77 | " 0 | \n",
78 | " 0.333333 | \n",
79 | " 0.000000 | \n",
80 | " 0.009068 | \n",
81 | " 0.001157 | \n",
82 | " 1.000000 | \n",
83 | " 0.344437 | \n",
84 | " 0.0 | \n",
85 | " 0.333284 | \n",
86 | " 0.333196 | \n",
87 | " 9.277653e-04 | \n",
88 | " ... | \n",
89 | " 0.0 | \n",
90 | " 1.0 | \n",
91 | " 0.0 | \n",
92 | " 0.0 | \n",
93 | " 1.0 | \n",
94 | " 0.0 | \n",
95 | " 0.0 | \n",
96 | " 1.0 | \n",
97 | " 0.0 | \n",
98 | " 0.0 | \n",
99 | "
\n",
100 | " \n",
101 | " 1 | \n",
102 | " 0.668564 | \n",
103 | " 0.002648 | \n",
104 | " 0.010078 | \n",
105 | " 0.001286 | \n",
106 | " 0.735565 | \n",
107 | " 0.344437 | \n",
108 | " 0.0 | \n",
109 | " 0.293169 | \n",
110 | " 0.345716 | \n",
111 | " 7.225051e-04 | \n",
112 | " ... | \n",
113 | " 0.0 | \n",
114 | " 1.0 | \n",
115 | " 0.0 | \n",
116 | " 0.0 | \n",
117 | " 1.0 | \n",
118 | " 0.0 | \n",
119 | " 0.0 | \n",
120 | " 1.0 | \n",
121 | " 0.0 | \n",
122 | " 0.0 | \n",
123 | "
\n",
124 | " \n",
125 | " 2 | \n",
126 | " 0.333521 | \n",
127 | " 0.000002 | \n",
128 | " 0.000000 | \n",
129 | " 0.000000 | \n",
130 | " 0.000000 | \n",
131 | " 0.344437 | \n",
132 | " 0.0 | \n",
133 | " 0.344442 | \n",
134 | " 0.344545 | \n",
135 | " 4.032385e-07 | \n",
136 | " ... | \n",
137 | " 0.0 | \n",
138 | " 1.0 | \n",
139 | " 0.0 | \n",
140 | " 0.0 | \n",
141 | " 1.0 | \n",
142 | " 0.0 | \n",
143 | " 0.0 | \n",
144 | " 1.0 | \n",
145 | " 0.0 | \n",
146 | " 0.0 | \n",
147 | "
\n",
148 | " \n",
149 | " 3 | \n",
150 | " 0.149696 | \n",
151 | " 0.003858 | \n",
152 | " 0.000000 | \n",
153 | " 0.000000 | \n",
154 | " 0.000000 | \n",
155 | " 0.344437 | \n",
156 | " 0.0 | \n",
157 | " 0.344442 | \n",
158 | " 0.154644 | \n",
159 | " 7.529284e-04 | \n",
160 | " ... | \n",
161 | " 0.0 | \n",
162 | " 1.0 | \n",
163 | " 0.0 | \n",
164 | " 0.0 | \n",
165 | " 1.0 | \n",
166 | " 0.0 | \n",
167 | " 0.0 | \n",
168 | " 0.0 | \n",
169 | " 1.0 | \n",
170 | " 0.0 | \n",
171 | "
\n",
172 | " \n",
173 | " 4 | \n",
174 | " 0.333429 | \n",
175 | " 0.000005 | \n",
176 | " 0.000000 | \n",
177 | " 0.000000 | \n",
178 | " 0.000000 | \n",
179 | " 0.344437 | \n",
180 | " 0.0 | \n",
181 | " 0.344442 | \n",
182 | " 0.344450 | \n",
183 | " 7.852062e-07 | \n",
184 | " ... | \n",
185 | " 0.0 | \n",
186 | " 1.0 | \n",
187 | " 0.0 | \n",
188 | " 0.0 | \n",
189 | " 0.0 | \n",
190 | " 1.0 | \n",
191 | " 0.0 | \n",
192 | " 1.0 | \n",
193 | " 0.0 | \n",
194 | " 0.0 | \n",
195 | "
\n",
196 | " \n",
197 | "
\n",
198 | "
5 rows × 178 columns
\n",
199 | "
"
200 | ],
201 | "text/plain": [
202 | " LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL LOCAL_OVEONEYR_FF_MON_AVG_BAL \\\n",
203 | "0 0.333333 0.000000 0.009068 \n",
204 | "1 0.668564 0.002648 0.010078 \n",
205 | "2 0.333521 0.000002 0.000000 \n",
206 | "3 0.149696 0.003858 0.000000 \n",
207 | "4 0.333429 0.000005 0.000000 \n",
208 | "\n",
209 | " LOCAL_FIX_MON_AVG_BAL LOCAL_FIX_MON_AVG_BAL_PROP LOCAL_BELONEYR_FF_SLOPE \\\n",
210 | "0 0.001157 1.000000 0.344437 \n",
211 | "1 0.001286 0.735565 0.344437 \n",
212 | "2 0.000000 0.000000 0.344437 \n",
213 | "3 0.000000 0.000000 0.344437 \n",
214 | "4 0.000000 0.000000 0.344437 \n",
215 | "\n",
216 | " LOCAL_BELONEYR_FF_MON_AVG_BAL LOCAL_OVEONEYR_FF_SLOPE LOCAL_SAV_SLOPE \\\n",
217 | "0 0.0 0.333284 0.333196 \n",
218 | "1 0.0 0.293169 0.345716 \n",
219 | "2 0.0 0.344442 0.344545 \n",
220 | "3 0.0 0.344442 0.154644 \n",
221 | "4 0.0 0.344442 0.344450 \n",
222 | "\n",
223 | " LOCAL_SAV_CUR_ALL_BAL ... kid6_10_nan kid11_15_U kid11_15_Y \\\n",
224 | "0 9.277653e-04 ... 0.0 1.0 0.0 \n",
225 | "1 7.225051e-04 ... 0.0 1.0 0.0 \n",
226 | "2 4.032385e-07 ... 0.0 1.0 0.0 \n",
227 | "3 7.529284e-04 ... 0.0 1.0 0.0 \n",
228 | "4 7.852062e-07 ... 0.0 1.0 0.0 \n",
229 | "\n",
230 | " kid11_15_nan kid16_17_U kid16_17_Y kid16_17_nan car_buy_New \\\n",
231 | "0 0.0 1.0 0.0 0.0 1.0 \n",
232 | "1 0.0 1.0 0.0 0.0 1.0 \n",
233 | "2 0.0 1.0 0.0 0.0 1.0 \n",
234 | "3 0.0 1.0 0.0 0.0 0.0 \n",
235 | "4 0.0 0.0 1.0 0.0 1.0 \n",
236 | "\n",
237 | " car_buy_UNKNOWN car_buy_nan \n",
238 | "0 0.0 0.0 \n",
239 | "1 0.0 0.0 \n",
240 | "2 0.0 0.0 \n",
241 | "3 1.0 0.0 \n",
242 | "4 0.0 0.0 \n",
243 | "\n",
244 | "[5 rows x 178 columns]"
245 | ]
246 | },
247 | "execution_count": 2,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "model_data = pd.read_csv('./model_data.csv', encoding='utf-8')\n",
254 | "model_data.head()"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 3,
260 | "metadata": {
261 | "collapsed": true
262 | },
263 | "outputs": [],
264 | "source": [
265 | "all_features = list(model_data.columns)\n",
266 | "all_features.remove('CHURN_CUST_IND')"
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": 4,
272 | "metadata": {
273 | "collapsed": true
274 | },
275 | "outputs": [],
276 | "source": [
277 | "X_train, X_test, y_train, y_test = train_test_split(model_data[all_features], model_data['CHURN_CUST_IND'], test_size=0.3, random_state=1)"
278 | ]
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": 5,
283 | "metadata": {},
284 | "outputs": [
285 | {
286 | "name": "stdout",
287 | "output_type": "stream",
288 | "text": [
289 | "y_train_count: 0.0 10851\n",
290 | "1.0 1217\n",
291 | "Name: CHURN_CUST_IND, dtype: int64\n",
292 | "y_test_count: 0.0 4649\n",
293 | "1.0 524\n",
294 | "Name: CHURN_CUST_IND, dtype: int64\n"
295 | ]
296 | }
297 | ],
298 | "source": [
299 | "print('y_train_count: ', y_train.value_counts())\n",
300 | "print('y_test_count: ', y_test.value_counts())"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 6,
306 | "metadata": {},
307 | "outputs": [
308 | {
309 | "name": "stdout",
310 | "output_type": "stream",
311 | "text": [
312 | "Accuracy : 0.9115\n",
313 | "AUC Score (Testing): 0.855843\n"
314 | ]
315 | }
316 | ],
317 | "source": [
318 | "# 使用GBDT模块的默认参数进行训练\n",
319 | "gbm0 = GradientBoostingClassifier(random_state=10)\n",
320 | "gbm0.fit(X_train,y_train)\n",
321 | "y_pred = gbm0.predict(X_test)\n",
322 | "y_predprob = gbm0.predict_proba(X_test)[:,1]\n",
323 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n",
324 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": null,
330 | "metadata": {
331 | "collapsed": true
332 | },
333 | "outputs": [],
334 | "source": []
335 | }
336 | ],
337 | "metadata": {
338 | "kernelspec": {
339 | "display_name": "Python 3",
340 | "language": "python",
341 | "name": "python3"
342 | },
343 | "language_info": {
344 | "codemirror_mode": {
345 | "name": "ipython",
346 | "version": 3
347 | },
348 | "file_extension": ".py",
349 | "mimetype": "text/x-python",
350 | "name": "python",
351 | "nbconvert_exporter": "python",
352 | "pygments_lexer": "ipython3",
353 | "version": "3.6.3"
354 | }
355 | },
356 | "nbformat": 4,
357 | "nbformat_minor": 2
358 | }
359 |
--------------------------------------------------------------------------------
/bank churn model - modeling(GBDT).ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 47,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "from sklearn import ensemble, cross_validation, metrics\n",
11 | "from sklearn.ensemble import GradientBoostingClassifier\n",
12 | "from sklearn.cross_validation import train_test_split\n",
13 | "from sklearn.cross_validation import KFold\n",
14 | "from sklearn.model_selection import GridSearchCV\n",
15 | "from sklearn.metrics import roc_curve, auc\n",
16 | "import matplotlib.pyplot as plt"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "data": {
26 | "text/html": [
27 | "\n",
28 | "\n",
41 | "
\n",
42 | " \n",
43 | " \n",
44 | " | \n",
45 | " LOCAL_CUR_SAV_SLOPE | \n",
46 | " LOCAL_CUR_MON_AVG_BAL | \n",
47 | " LOCAL_OVEONEYR_FF_MON_AVG_BAL | \n",
48 | " LOCAL_FIX_MON_AVG_BAL | \n",
49 | " LOCAL_FIX_MON_AVG_BAL_PROP | \n",
50 | " LOCAL_BELONEYR_FF_SLOPE | \n",
51 | " LOCAL_BELONEYR_FF_MON_AVG_BAL | \n",
52 | " LOCAL_OVEONEYR_FF_SLOPE | \n",
53 | " LOCAL_SAV_SLOPE | \n",
54 | " LOCAL_SAV_CUR_ALL_BAL | \n",
55 | " ... | \n",
56 | " kid6_10_nan | \n",
57 | " kid11_15_U | \n",
58 | " kid11_15_Y | \n",
59 | " kid11_15_nan | \n",
60 | " kid16_17_U | \n",
61 | " kid16_17_Y | \n",
62 | " kid16_17_nan | \n",
63 | " car_buy_New | \n",
64 | " car_buy_UNKNOWN | \n",
65 | " car_buy_nan | \n",
66 | "
\n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " 0 | \n",
71 | " 0.333333 | \n",
72 | " 0.000000 | \n",
73 | " 0.009068 | \n",
74 | " 0.001157 | \n",
75 | " 1.000000 | \n",
76 | " 0.344437 | \n",
77 | " 0.0 | \n",
78 | " 0.333284 | \n",
79 | " 0.333196 | \n",
80 | " 9.277653e-04 | \n",
81 | " ... | \n",
82 | " 0.0 | \n",
83 | " 1.0 | \n",
84 | " 0.0 | \n",
85 | " 0.0 | \n",
86 | " 1.0 | \n",
87 | " 0.0 | \n",
88 | " 0.0 | \n",
89 | " 1.0 | \n",
90 | " 0.0 | \n",
91 | " 0.0 | \n",
92 | "
\n",
93 | " \n",
94 | " 1 | \n",
95 | " 0.668564 | \n",
96 | " 0.002648 | \n",
97 | " 0.010078 | \n",
98 | " 0.001286 | \n",
99 | " 0.735565 | \n",
100 | " 0.344437 | \n",
101 | " 0.0 | \n",
102 | " 0.293169 | \n",
103 | " 0.345716 | \n",
104 | " 7.225051e-04 | \n",
105 | " ... | \n",
106 | " 0.0 | \n",
107 | " 1.0 | \n",
108 | " 0.0 | \n",
109 | " 0.0 | \n",
110 | " 1.0 | \n",
111 | " 0.0 | \n",
112 | " 0.0 | \n",
113 | " 1.0 | \n",
114 | " 0.0 | \n",
115 | " 0.0 | \n",
116 | "
\n",
117 | " \n",
118 | " 2 | \n",
119 | " 0.333521 | \n",
120 | " 0.000002 | \n",
121 | " 0.000000 | \n",
122 | " 0.000000 | \n",
123 | " 0.000000 | \n",
124 | " 0.344437 | \n",
125 | " 0.0 | \n",
126 | " 0.344442 | \n",
127 | " 0.344545 | \n",
128 | " 4.032385e-07 | \n",
129 | " ... | \n",
130 | " 0.0 | \n",
131 | " 1.0 | \n",
132 | " 0.0 | \n",
133 | " 0.0 | \n",
134 | " 1.0 | \n",
135 | " 0.0 | \n",
136 | " 0.0 | \n",
137 | " 1.0 | \n",
138 | " 0.0 | \n",
139 | " 0.0 | \n",
140 | "
\n",
141 | " \n",
142 | " 3 | \n",
143 | " 0.149696 | \n",
144 | " 0.003858 | \n",
145 | " 0.000000 | \n",
146 | " 0.000000 | \n",
147 | " 0.000000 | \n",
148 | " 0.344437 | \n",
149 | " 0.0 | \n",
150 | " 0.344442 | \n",
151 | " 0.154644 | \n",
152 | " 7.529284e-04 | \n",
153 | " ... | \n",
154 | " 0.0 | \n",
155 | " 1.0 | \n",
156 | " 0.0 | \n",
157 | " 0.0 | \n",
158 | " 1.0 | \n",
159 | " 0.0 | \n",
160 | " 0.0 | \n",
161 | " 0.0 | \n",
162 | " 1.0 | \n",
163 | " 0.0 | \n",
164 | "
\n",
165 | " \n",
166 | " 4 | \n",
167 | " 0.333429 | \n",
168 | " 0.000005 | \n",
169 | " 0.000000 | \n",
170 | " 0.000000 | \n",
171 | " 0.000000 | \n",
172 | " 0.344437 | \n",
173 | " 0.0 | \n",
174 | " 0.344442 | \n",
175 | " 0.344450 | \n",
176 | " 7.852062e-07 | \n",
177 | " ... | \n",
178 | " 0.0 | \n",
179 | " 1.0 | \n",
180 | " 0.0 | \n",
181 | " 0.0 | \n",
182 | " 0.0 | \n",
183 | " 1.0 | \n",
184 | " 0.0 | \n",
185 | " 1.0 | \n",
186 | " 0.0 | \n",
187 | " 0.0 | \n",
188 | "
\n",
189 | " \n",
190 | "
\n",
191 | "
5 rows × 178 columns
\n",
192 | "
"
193 | ],
194 | "text/plain": [
195 | " LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL LOCAL_OVEONEYR_FF_MON_AVG_BAL \\\n",
196 | "0 0.333333 0.000000 0.009068 \n",
197 | "1 0.668564 0.002648 0.010078 \n",
198 | "2 0.333521 0.000002 0.000000 \n",
199 | "3 0.149696 0.003858 0.000000 \n",
200 | "4 0.333429 0.000005 0.000000 \n",
201 | "\n",
202 | " LOCAL_FIX_MON_AVG_BAL LOCAL_FIX_MON_AVG_BAL_PROP LOCAL_BELONEYR_FF_SLOPE \\\n",
203 | "0 0.001157 1.000000 0.344437 \n",
204 | "1 0.001286 0.735565 0.344437 \n",
205 | "2 0.000000 0.000000 0.344437 \n",
206 | "3 0.000000 0.000000 0.344437 \n",
207 | "4 0.000000 0.000000 0.344437 \n",
208 | "\n",
209 | " LOCAL_BELONEYR_FF_MON_AVG_BAL LOCAL_OVEONEYR_FF_SLOPE LOCAL_SAV_SLOPE \\\n",
210 | "0 0.0 0.333284 0.333196 \n",
211 | "1 0.0 0.293169 0.345716 \n",
212 | "2 0.0 0.344442 0.344545 \n",
213 | "3 0.0 0.344442 0.154644 \n",
214 | "4 0.0 0.344442 0.344450 \n",
215 | "\n",
216 | " LOCAL_SAV_CUR_ALL_BAL ... kid6_10_nan kid11_15_U kid11_15_Y \\\n",
217 | "0 9.277653e-04 ... 0.0 1.0 0.0 \n",
218 | "1 7.225051e-04 ... 0.0 1.0 0.0 \n",
219 | "2 4.032385e-07 ... 0.0 1.0 0.0 \n",
220 | "3 7.529284e-04 ... 0.0 1.0 0.0 \n",
221 | "4 7.852062e-07 ... 0.0 1.0 0.0 \n",
222 | "\n",
223 | " kid11_15_nan kid16_17_U kid16_17_Y kid16_17_nan car_buy_New \\\n",
224 | "0 0.0 1.0 0.0 0.0 1.0 \n",
225 | "1 0.0 1.0 0.0 0.0 1.0 \n",
226 | "2 0.0 1.0 0.0 0.0 1.0 \n",
227 | "3 0.0 1.0 0.0 0.0 0.0 \n",
228 | "4 0.0 0.0 1.0 0.0 1.0 \n",
229 | "\n",
230 | " car_buy_UNKNOWN car_buy_nan \n",
231 | "0 0.0 0.0 \n",
232 | "1 0.0 0.0 \n",
233 | "2 0.0 0.0 \n",
234 | "3 1.0 0.0 \n",
235 | "4 0.0 0.0 \n",
236 | "\n",
237 | "[5 rows x 178 columns]"
238 | ]
239 | },
240 | "execution_count": 2,
241 | "metadata": {},
242 | "output_type": "execute_result"
243 | }
244 | ],
245 | "source": [
246 | "model_data = pd.read_csv('./model_data.csv', encoding='utf-8')\n",
247 | "model_data.head()"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 3,
253 | "metadata": {
254 | "collapsed": true
255 | },
256 | "outputs": [],
257 | "source": [
258 | "all_features = list(model_data.columns)\n",
259 | "all_features.remove('CHURN_CUST_IND')"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 4,
265 | "metadata": {
266 | "collapsed": true
267 | },
268 | "outputs": [],
269 | "source": [
270 | "X_train, X_test, y_train, y_test = train_test_split(model_data[all_features], model_data['CHURN_CUST_IND'], test_size=0.3, random_state=1)"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 5,
276 | "metadata": {},
277 | "outputs": [
278 | {
279 | "name": "stdout",
280 | "output_type": "stream",
281 | "text": [
282 | "y_train_count: 0.0 10851\n",
283 | "1.0 1217\n",
284 | "Name: CHURN_CUST_IND, dtype: int64\n",
285 | "y_test_count: 0.0 4649\n",
286 | "1.0 524\n",
287 | "Name: CHURN_CUST_IND, dtype: int64\n"
288 | ]
289 | }
290 | ],
291 | "source": [
292 | "print('y_train_count: ', y_train.value_counts())\n",
293 | "print('y_test_count: ', y_test.value_counts())"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 6,
299 | "metadata": {},
300 | "outputs": [
301 | {
302 | "name": "stdout",
303 | "output_type": "stream",
304 | "text": [
305 | "Accuracy : 0.9115\n",
306 | "AUC Score (Testing): 0.855843\n"
307 | ]
308 | }
309 | ],
310 | "source": [
311 | "# 使用GBDT模块的默认参数进行训练\n",
312 | "gbm0 = GradientBoostingClassifier(random_state=10)\n",
313 | "gbm0.fit(X_train,y_train)\n",
314 | "y_pred = gbm0.predict(X_test)\n",
315 | "y_predprob = gbm0.predict_proba(X_test)[:,1]\n",
316 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n",
317 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 12,
323 | "metadata": {},
324 | "outputs": [
325 | {
326 | "data": {
327 | "text/plain": [
328 | ""
336 | ]
337 | },
338 | "execution_count": 12,
339 | "metadata": {},
340 | "output_type": "execute_result"
341 | }
342 | ],
343 | "source": [
344 | "gbm0.get_params"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 20,
350 | "metadata": {},
351 | "outputs": [
352 | {
353 | "data": {
354 | "text/plain": [
355 | "GridSearchCV(cv=5, error_score='raise',\n",
356 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
357 | " learning_rate=0.1, loss='deviance', max_depth=5,\n",
358 | " max_features=1.0, max_leaf_nodes=None,\n",
359 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
360 | " min_samples_leaf=20, min_samples_split=300,\n",
361 | " min_weight_fraction_leaf=0.0, n_estimators=100,\n",
362 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n",
363 | " warm_start=False),\n",
364 | " fit_params=None, iid=False, n_jobs=1,\n",
365 | " param_grid={'n_estimators': range(50, 201, 10)},\n",
366 | " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
367 | " scoring='roc_auc', verbose=0)"
368 | ]
369 | },
370 | "execution_count": 20,
371 | "metadata": {},
372 | "output_type": "execute_result"
373 | }
374 | ],
375 | "source": [
376 | "# 对estimator进行参数调优\n",
377 | "param_test1 = {'n_estimators': range(50, 201, 10)}\n",
378 | "est1 = GridSearchCV(estimator=GradientBoostingClassifier(learning_rate=0.1, min_samples_split=300, min_samples_leaf=20, max_depth=5, \n",
379 | " max_features=1.0, subsample=0.8, random_state=10), \n",
380 | " param_grid = param_test1, scoring='roc_auc', iid=False, cv=5)\n",
381 | "est1.fit(X_train, y_train)"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": 21,
387 | "metadata": {},
388 | "outputs": [
389 | {
390 | "name": "stderr",
391 | "output_type": "stream",
392 | "text": [
393 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
394 | " DeprecationWarning)\n"
395 | ]
396 | },
397 | {
398 | "data": {
399 | "text/plain": [
400 | "[mean: 0.85970, std: 0.01218, params: {'n_estimators': 50},\n",
401 | " mean: 0.85998, std: 0.01228, params: {'n_estimators': 60},\n",
402 | " mean: 0.86043, std: 0.01162, params: {'n_estimators': 70},\n",
403 | " mean: 0.86064, std: 0.01188, params: {'n_estimators': 80},\n",
404 | " mean: 0.86067, std: 0.01115, params: {'n_estimators': 90},\n",
405 | " mean: 0.86028, std: 0.01136, params: {'n_estimators': 100},\n",
406 | " mean: 0.85983, std: 0.01133, params: {'n_estimators': 110},\n",
407 | " mean: 0.85983, std: 0.01136, params: {'n_estimators': 120},\n",
408 | " mean: 0.85962, std: 0.01143, params: {'n_estimators': 130},\n",
409 | " mean: 0.85910, std: 0.01211, params: {'n_estimators': 140},\n",
410 | " mean: 0.85833, std: 0.01207, params: {'n_estimators': 150},\n",
411 | " mean: 0.85808, std: 0.01135, params: {'n_estimators': 160},\n",
412 | " mean: 0.85829, std: 0.01167, params: {'n_estimators': 170},\n",
413 | " mean: 0.85774, std: 0.01135, params: {'n_estimators': 180},\n",
414 | " mean: 0.85699, std: 0.01170, params: {'n_estimators': 190},\n",
415 | " mean: 0.85577, std: 0.01126, params: {'n_estimators': 200}]"
416 | ]
417 | },
418 | "execution_count": 21,
419 | "metadata": {},
420 | "output_type": "execute_result"
421 | }
422 | ],
423 | "source": [
424 | "est1.grid_scores_"
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": 22,
430 | "metadata": {},
431 | "outputs": [
432 | {
433 | "data": {
434 | "text/plain": [
435 | "{'n_estimators': 90}"
436 | ]
437 | },
438 | "execution_count": 22,
439 | "metadata": {},
440 | "output_type": "execute_result"
441 | }
442 | ],
443 | "source": [
444 | "est1.best_params_"
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": 23,
450 | "metadata": {},
451 | "outputs": [
452 | {
453 | "data": {
454 | "text/plain": [
455 | "0.8606705972896014"
456 | ]
457 | },
458 | "execution_count": 23,
459 | "metadata": {},
460 | "output_type": "execute_result"
461 | }
462 | ],
463 | "source": [
464 | "est1.best_score_"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 25,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "name": "stdout",
474 | "output_type": "stream",
475 | "text": [
476 | "Accuracy : 0.9103\n",
477 | "AUC Score (Testing): 0.858071\n"
478 | ]
479 | }
480 | ],
481 | "source": [
482 | "gbm1 = GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20, max_depth=8, \n",
483 | " max_features=1.0, subsample=0.8, random_state=10)\n",
484 | "gbm1.fit(X_train,y_train)\n",
485 | "y_pred = gbm1.predict(X_test)\n",
486 | "y_predprob = gbm1.predict_proba(X_test)[:,1]\n",
487 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n",
488 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 31,
494 | "metadata": {},
495 | "outputs": [
496 | {
497 | "data": {
498 | "text/plain": [
499 | "GridSearchCV(cv=5, error_score='raise',\n",
500 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
501 | " learning_rate=0.1, loss='deviance', max_depth=3,\n",
502 | " max_features=1.0, max_leaf_nodes=None,\n",
503 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
504 | " min_samples_leaf=20, min_samples_split=300,\n",
505 | " min_weight_fraction_leaf=0.0, n_estimators=90,\n",
506 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n",
507 | " warm_start=False),\n",
508 | " fit_params=None, iid=False, n_jobs=1,\n",
509 | " param_grid={'max_depth': range(5, 16, 2)}, pre_dispatch='2*n_jobs',\n",
510 | " refit=True, return_train_score='warn', scoring='roc_auc', verbose=0)"
511 | ]
512 | },
513 | "execution_count": 31,
514 | "metadata": {},
515 | "output_type": "execute_result"
516 | }
517 | ],
518 | "source": [
519 | "# 对max_depth进行调优\n",
520 | "param_test2 = {'max_depth': range(5, 16, 2)}\n",
521 | "est2 = GridSearchCV(estimator=GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20,\n",
522 | " max_features=1.0, subsample=0.8, random_state=10),\n",
523 | " param_grid=param_test2, scoring='roc_auc', iid=False, cv=5)\n",
524 | "est2.fit(X_train, y_train)"
525 | ]
526 | },
527 | {
528 | "cell_type": "code",
529 | "execution_count": 32,
530 | "metadata": {},
531 | "outputs": [
532 | {
533 | "name": "stderr",
534 | "output_type": "stream",
535 | "text": [
536 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
537 | " DeprecationWarning)\n"
538 | ]
539 | },
540 | {
541 | "data": {
542 | "text/plain": [
543 | "[mean: 0.86067, std: 0.01115, params: {'max_depth': 5},\n",
544 | " mean: 0.85743, std: 0.00934, params: {'max_depth': 7},\n",
545 | " mean: 0.85636, std: 0.01054, params: {'max_depth': 9},\n",
546 | " mean: 0.85099, std: 0.00972, params: {'max_depth': 11},\n",
547 | " mean: 0.85169, std: 0.01273, params: {'max_depth': 13},\n",
548 | " mean: 0.84906, std: 0.01225, params: {'max_depth': 15}]"
549 | ]
550 | },
551 | "execution_count": 32,
552 | "metadata": {},
553 | "output_type": "execute_result"
554 | }
555 | ],
556 | "source": [
557 | "est2.grid_scores_"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 33,
563 | "metadata": {},
564 | "outputs": [
565 | {
566 | "data": {
567 | "text/plain": [
568 | "{'max_depth': 5}"
569 | ]
570 | },
571 | "execution_count": 33,
572 | "metadata": {},
573 | "output_type": "execute_result"
574 | }
575 | ],
576 | "source": [
577 | "est2.best_params_"
578 | ]
579 | },
580 | {
581 | "cell_type": "code",
582 | "execution_count": 34,
583 | "metadata": {},
584 | "outputs": [
585 | {
586 | "data": {
587 | "text/plain": [
588 | "0.8606705972896014"
589 | ]
590 | },
591 | "execution_count": 34,
592 | "metadata": {},
593 | "output_type": "execute_result"
594 | }
595 | ],
596 | "source": [
597 | "est2.best_score_"
598 | ]
599 | },
600 | {
601 | "cell_type": "code",
602 | "execution_count": 35,
603 | "metadata": {},
604 | "outputs": [
605 | {
606 | "name": "stdout",
607 | "output_type": "stream",
608 | "text": [
609 | "Accuracy : 0.9126\n",
610 | "AUC Score (Testing): 0.859257\n"
611 | ]
612 | }
613 | ],
614 | "source": [
615 | "gbm2 = GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20, max_depth=5, \n",
616 | " max_features=1.0, subsample=0.8, random_state=10)\n",
617 | "gbm2.fit(X_train,y_train)\n",
618 | "y_pred = gbm2.predict(X_test)\n",
619 | "y_predprob = gbm2.predict_proba(X_test)[:,1]\n",
620 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n",
621 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": 37,
627 | "metadata": {},
628 | "outputs": [
629 | {
630 | "data": {
631 | "text/plain": [
632 | "GridSearchCV(cv=5, error_score='raise',\n",
633 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
634 | " learning_rate=0.1, loss='deviance', max_depth=5,\n",
635 | " max_features=1.0, max_leaf_nodes=None,\n",
636 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
637 | " min_samples_leaf=20, min_samples_split=300,\n",
638 | " min_weight_fraction_leaf=0.0, n_estimators=90,\n",
639 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n",
640 | " warm_start=False),\n",
641 | " fit_params=None, iid=False, n_jobs=1,\n",
642 | " param_grid={'learning_rate': [0.01, 0.05, 0.1, 0.15]},\n",
643 | " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
644 | " scoring='roc_auc', verbose=0)"
645 | ]
646 | },
647 | "execution_count": 37,
648 | "metadata": {},
649 | "output_type": "execute_result"
650 | }
651 | ],
652 | "source": [
653 | "# 对learning_rate进行调优\n",
654 | "param_test3 = {'learning_rate': [0.01, 0.05, 0.1, 0.15]}\n",
655 | "est3 = GridSearchCV(estimator=GradientBoostingClassifier(n_estimators=90, max_depth=5, min_samples_split=300, min_samples_leaf=20,\n",
656 | " max_features=1.0, subsample=0.8, random_state=10),\n",
657 | " param_grid=param_test3, scoring='roc_auc', iid=False, cv=5)\n",
658 | "est3.fit(X_train, y_train)"
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": 38,
664 | "metadata": {},
665 | "outputs": [
666 | {
667 | "name": "stderr",
668 | "output_type": "stream",
669 | "text": [
670 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
671 | " DeprecationWarning)\n"
672 | ]
673 | },
674 | {
675 | "data": {
676 | "text/plain": [
677 | "[mean: 0.85457, std: 0.01371, params: {'learning_rate': 0.01},\n",
678 | " mean: 0.86058, std: 0.01204, params: {'learning_rate': 0.05},\n",
679 | " mean: 0.86067, std: 0.01115, params: {'learning_rate': 0.1},\n",
680 | " mean: 0.85709, std: 0.01315, params: {'learning_rate': 0.15}]"
681 | ]
682 | },
683 | "execution_count": 38,
684 | "metadata": {},
685 | "output_type": "execute_result"
686 | }
687 | ],
688 | "source": [
689 | "est3.grid_scores_"
690 | ]
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": 39,
695 | "metadata": {},
696 | "outputs": [
697 | {
698 | "data": {
699 | "text/plain": [
700 | "{'learning_rate': 0.1}"
701 | ]
702 | },
703 | "execution_count": 39,
704 | "metadata": {},
705 | "output_type": "execute_result"
706 | }
707 | ],
708 | "source": [
709 | "est3.best_params_"
710 | ]
711 | },
712 | {
713 | "cell_type": "code",
714 | "execution_count": 40,
715 | "metadata": {},
716 | "outputs": [
717 | {
718 | "data": {
719 | "text/plain": [
720 | "0.8606705972896014"
721 | ]
722 | },
723 | "execution_count": 40,
724 | "metadata": {},
725 | "output_type": "execute_result"
726 | }
727 | ],
728 | "source": [
729 | "est3.best_score_"
730 | ]
731 | },
732 | {
733 | "cell_type": "code",
734 | "execution_count": 41,
735 | "metadata": {},
736 | "outputs": [
737 | {
738 | "data": {
739 | "text/plain": [
740 | "GridSearchCV(cv=5, error_score='raise',\n",
741 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
742 | " learning_rate=0.1, loss='deviance', max_depth=5,\n",
743 | " max_features=None, max_leaf_nodes=None,\n",
744 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
745 | " min_samples_leaf=20, min_samples_split=300,\n",
746 | " min_weight_fraction_leaf=0.0, n_estimators=90,\n",
747 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n",
748 | " warm_start=False),\n",
749 | " fit_params=None, iid=False, n_jobs=1,\n",
750 | " param_grid={'max_features': (0.3, 0.5, 0.8, 1.0)},\n",
751 | " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
752 | " scoring='roc_auc', verbose=0)"
753 | ]
754 | },
755 | "execution_count": 41,
756 | "metadata": {},
757 | "output_type": "execute_result"
758 | }
759 | ],
760 | "source": [
761 | "# 对max_features进行调优\n",
762 | "param_test4 = {'max_features': (0.3, 0.5, 0.8, 1.0)}\n",
763 | "est4 = GridSearchCV(estimator=GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20,\n",
764 | " max_depth=5, subsample=0.8, random_state=10),\n",
765 | " param_grid=param_test4, scoring='roc_auc', iid=False, cv=5)\n",
766 | "est4.fit(X_train, y_train)"
767 | ]
768 | },
769 | {
770 | "cell_type": "code",
771 | "execution_count": 42,
772 | "metadata": {},
773 | "outputs": [
774 | {
775 | "name": "stderr",
776 | "output_type": "stream",
777 | "text": [
778 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
779 | " DeprecationWarning)\n"
780 | ]
781 | },
782 | {
783 | "data": {
784 | "text/plain": [
785 | "[mean: 0.85937, std: 0.01201, params: {'max_features': 0.3},\n",
786 | " mean: 0.85815, std: 0.01321, params: {'max_features': 0.5},\n",
787 | " mean: 0.85989, std: 0.01139, params: {'max_features': 0.8},\n",
788 | " mean: 0.86067, std: 0.01115, params: {'max_features': 1.0}]"
789 | ]
790 | },
791 | "execution_count": 42,
792 | "metadata": {},
793 | "output_type": "execute_result"
794 | }
795 | ],
796 | "source": [
797 | "est4.grid_scores_"
798 | ]
799 | },
800 | {
801 | "cell_type": "code",
802 | "execution_count": 43,
803 | "metadata": {},
804 | "outputs": [
805 | {
806 | "data": {
807 | "text/plain": [
808 | "{'max_features': 1.0}"
809 | ]
810 | },
811 | "execution_count": 43,
812 | "metadata": {},
813 | "output_type": "execute_result"
814 | }
815 | ],
816 | "source": [
817 | "est4.best_params_"
818 | ]
819 | },
820 | {
821 | "cell_type": "code",
822 | "execution_count": 44,
823 | "metadata": {},
824 | "outputs": [
825 | {
826 | "data": {
827 | "text/plain": [
828 | "0.8606705972896014"
829 | ]
830 | },
831 | "execution_count": 44,
832 | "metadata": {},
833 | "output_type": "execute_result"
834 | }
835 | ],
836 | "source": [
837 | "est4.best_score_"
838 | ]
839 | },
840 | {
841 | "cell_type": "code",
842 | "execution_count": 51,
843 | "metadata": {},
844 | "outputs": [
845 | {
846 | "data": {
847 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAGDCAYAAADEegxVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XeYVOXZx/HvDYggzQI2QAHFShFd\nsTeKIgoao4INu0ZfNNaoaMSWSIy9S+wVIlHBCiogsaCAoAgKAjYQFZQuZVnu94/nsBmWLcPuzpwp\nv891zbVzysy5z8zs3POU8zzm7oiIiADUiDsAERHJHEoKIiJSTElBRESKKSmIiEgxJQURESmmpCAi\nIsWUFKRSzOwUMxsRdxxxMbOlZtYqhuO2MDM3s1rpPnYqmNkUMzu0Eo/L689fKikp5AAz+9bMlkdf\nVD+Z2ZNmVj+Vx3T359z98FQ8t5ntb2YjzWyJmS0ys1fNbLdUHCvJeEab2TmJ69y9vrvPStHxdjKz\nF81sfnT+n5vZZWZWMxXHq6woOe1Yledw993dfXQFx1kvEaby85fvlBRyRw93rw/sAXQArok5nkox\ns/2AEcBQYFugJfAZ8EEqfpln2i9uM9sB+Bj4AWjr7o2AE4ACoEE1Hyu2c8+0110SuLtuWX4DvgW6\nJCzfBryesLwxcDvwPfAz8DBQN2H7McAkYDEwE+gWrW8EPAbMBeYAtwA1o21nAO9H9x8Gbi8R01Dg\nsuj+tsB/gHnAN8DF5ZzLf4EHS1n/JvB0dP9QYDbQD5gfnf8pyZxvwmOvAn4CngE2A16L4lsQ3W8W\n7f83oAhYASwF7o/WO7BjdP9J4AHgdWAJ4Ut9h4R4DgemAYuAB4H3gHPKOP9nE9+7Ura3iI59enR+\n84FrE7Z3BD4CFkbv2/1A7YTtDvwf8DXwTbTuHkISWgxMAA5K2L9m9DrPjM5tAtAcGBM917LodekV\n7X804bO0EPgQaFfic3oV8DmwEqhFwmc3in18FMfPwJ3R+u+jYy2NbvuR8PmL9tkdeBv4LXpsv7j/\nL7P1FnsAulXDm7juP1YzYDJwT8L2u4FhwOaEX5uvArdG2zpGX1ZdCSXHpsAu0bZXgEeAesCWwCfA\n+dG24n9K4ODoS8Wi5c2A5YRkUCP6IrkeqA20AmYBR5RyHpsQvoAPK2XbmcDc6P6hwGrgTkICOCT6\ncto5ifNd+9h/RI+tC2wB/DE6fgPgReCVhGOPpsSXOOsnhd+i17IW8BwwKNrWOPqSOy7a9megsOTz\nJTzvT8CZ5bzXLaJj/yuKvT3hC3bXaPtewL7RsVoAXwKXlIj77ei1WZsoT41eg1rA5VEMdaJtVxI+\nTzsDFh1vi5KvQbS8J/ALsA8hmZxO+GxunPA5nURIKnUT1q397H4EnBbdrw/sW+KcayUc6wz+9/lr\nQEiAlwN1ouV94v6/zNZb7AHoVg1vYvjHWkr4JefAu8Cm0TYjfGEm/nLdj//9SnwEuKuU59wq+rJJ\nLFGcBIyK7if+Uxrh19zB0fK5wMjo/j7A9yWe+xrgiVKO2SyKf5dStnUDCqP7hxK+2OslbP838Nck\nzvdQYNXaL70yXs89gAUJy6OpOCk8mrCtO/BVdL8P8FHCNiMk0LKSQiFRSa2M7Wu/IJslrPsE6F3G\n/pcAL5eIu1MFn6cFQPvo/jTgmDL2K5kUHgJuLrHPNOCQhM/pWaV8dtcmhTHAjUDjMs65rKRwEjAx\nXf9vuX5TvV7uONbd3zGzQ4DnCb9QFwJNCL+AJ5jZ2n2N8EsOwq+2N0p5vu2BjYC5CY+rQfhCW4e7\nu5kNIvxzjgFOJlSDrH2ebc1sYcJDahKqiUpaAKwBtgG+KrFtG0JVSfG+7r4sYfk7QsmkovMFmOfu\nK4o3mm0C3EVIPJtFqxuYWU13LyolztL8lHD/d8IvXaKYil+z6LWaXc7z/Eo410odz8x2IpSgCgiv\nQy1CSS3ROu+hmV0OnBPF6kBDwucHwudjZhLxQHivTzezixLW1Y6et9Rjl3A2cBPwlZl9A9zo7q8l\ncdwNiVEqoIbmHOPu7xF+ud4erZpPqMrZ3d03jW6NPDRKQ/gn3aGUp/qBUFJonPC4hu6+exmHfgE4\n3sy2J5QO/pPwPN8kPMem7t7A3buXEvsyQhXCCaU8/4mEEtBam5lZvYTl7YAfkzhfCF98iS4nVI/s\n4+4NCdVhEJJJaftviLmEElB4wpCpmpW9O+8QqrIq6yFCQm0dnUs//nceaxWfj5kdRKjnPxHYzN03\nJVQnrn1MWZ+P0vwA/K3Ee72Ju79Q2rFLcvev3f0kQlXlP4Ah0Xtc0eu/ITFKBZQUctPdQFcz28Pd\n1xDqn+8ysy0BzKypmR0R7fsYcKaZdTazGtG2Xdx9LqEX0B1m1jDatkNUElmPu08kNNQ+Cgx397Ul\ng0+AxWZ2lZnVNbOaZtbGzPYuI/arCb82LzazBma2mZndQqgCurHEvjeaWe3oi+1o4MUkzrc0DQiJ\nZKGZbQ70L7H9Z0JbSGW8DrQ1s2OjHjf/B2xdzv79gf3N7J9mtnUU/45m9qyZbZrE8RoQ2jCWmtku\nwAVJ7L+a8N7VMrPrCSWFtR4Fbjaz1ha0M7Mtom0lX5d/AX8ys32ifeuZ2VFmllSvKTM71cyaRO/h\n2s9PURTbGsp+D14DtjazS8xs4+hzs08yx5T1KSnkIHefBzxNqGOH8EtwBjDWzBYTfo3uHO37CaER\n9y7CL8T3CNUAEOrDawNTCVU7Qyi/auMFoAuh+mptLEVAD0I9/TeEX/KPEno2lRb7+8ARhIbZuYRq\noQ7Age7+dcKuP0Ux/Uho2P2Tu6+tcirzfMtwN6HRdj4wFnirxPZ7CKWgBWZ2bznPU9r5zCeUfG4j\nVA3tRuhhs7KM/WcSEmALYIqZLSKUusYT2owqcgWh+m4J4Ut6cAX7Dyf07JpOeK1XsG4Vz52E9poR\nhGTzGOG1ArgBeMrMFprZie4+ntCedD/hvZlBqPtPVjfCOS8lvOa93X2Fu/9O6AX2QXSsfRMf5O5L\nCB0lehA+F18Dh23AcSXB2t4iIlkjugL2WXcvrxomI5lZDUKX2FPcfVTc8YiUpJKCSIqZ2RFmtqmZ\nbcz/6vjHxhyWSKmUFERSbz9C75j5hCqOY919ebwhiZRO1UciIlJMJQURESmmpCAiIsWy7ormxo0b\ne4sWLeIOQ0Qkq0yYMGG+uzepaL+sSwotWrRg/PjxcYchIpJVzOy7ZPZT9ZGIiBRTUhARkWJKCiIi\nUkxJQUREiikpiIhIMSUFEREppqQgIiLFlBRERKSYkoKIiBRLWVIws8fN7Bcz+6KM7WZm95rZDDP7\n3Mz2TFUsIiKSnFSWFJ4kTK9XliOB1tHtPMKE4yIiEqOUJQV3HwP8Vs4uxwBPezAW2NTMypv/V0Qk\nf61Zk5bDxDkgXlPWnSB8drRubskdzew8QmmC7bbbLi3BiYikQvsbR7BoeeEGPeb8j4fQ/sfpXNzz\nL8z45zEpiiyIMylYKetKnQbO3QcCAwEKCgo0VZyIVIvKfEFXVaO6G/HtgKOSf8DNN8PoJ6F3b7r/\nvXvK4lorzqQwG2iesNwM+DGmWEQkxeL4Aq7IBn9Bp9vnn0P//nDaafDEE1CzZsoPGWdSGAb0NbNB\nwD7AIndfr+pIRLJXYiLI+C/gTNSuHYwZA/vtl5aEAClMCmb2AnAo0NjMZgP9gY0A3P1h4A2gOzAD\n+B04M1WxiEhqlVUKUCKoBHe46io47DA48kg48MC0Hj5lScHdT6pguwP/l6rji0h6tL9xBIC+/KvD\nmjXQty88FPXQP/LItIeQddNxikg8yisNfNb/8BgiyjFFRXD++fDYY/CXv8CAAbGEoaQgIuVamwxU\nFZRCRUVw5pnwzDPw17/CjTeCldZBM/WUFESkWGmlASWDNKhRAzbZJHQ/ve66WENRUhDJYyWTgBJA\nmq1aBb/8As2ahXaEmEoHiZQURPKQqoQywMqVcMIJ8NlnMGUK1K8fd0SAkoJIVqrqhWBKBjFbvhz+\n8AcYPhwefDBjEgIoKYhkBVXz5JBly6BnTxg1Ch59FM4+O+6I1qGkIJKhdDVwjurXD0aPhqeeCsNX\nZBglBZGYVFQFpESQo266CY44ArqnfnC7ylBSEEkzNfLmoQULwrUHt94KjRplbEIAJQWRaqcSgKxj\n/nzo2hWmTg29jQ44IO6IyqWkIFINVP8vpfrlF+jcGWbMgKFDMz4hgJKCSJWoKkjKNHduSAjffguv\nvRbuZwElBZFK0uigUq6FC8MFam+9BQcfHHc0SVNSEKmARgeVDfLbb7DZZrDrrvDVV7DRRnFHtEGU\nFEQSaEA4qZKZM6FTJzj33DCwXZYlBFBSEFEjsVSPadNCu8GKFXBU9n6GlBQk56mLqKTc1KkhIaxZ\nE4avaNs27ogqTUlBcpZ6BklaLFsGXbqE+6NHh7aELKakIDlFVUGSdvXqwX33hdLBTjvFHU2VKSlI\nTlCpQNLuk0/CtQjHHAN//GPc0VQbJQXJerpeQNLugw/gyCOhadMwjlEW9jIqi5KCZK3E0oGuF5C0\nGT0ajj46JIR33smphABKCpJl1GYgsXrnnTBBTsuW8O67sPXWcUdU7ZQUJGuomkhiN2IE7LhjSA5b\nbhl3NClRI+4ARJK1aHmhqokkHitWhL//+EdoT8jRhAAqKUiGK1ldJJJ2Q4bA5ZeHi9JatYIGDeKO\nKKWUFCQjqYupZITnn4c+fWCffaBx47ijSQslBckoSgaSMZ56Cs48Mwx7/dprUL9+3BGlhZKCZAw1\nJEvGGDYsJITOncOMaZtsEndEaaOkILHT9QaScTp1gquugv79oU6duKNJKyUFiY2qiiTjDBoULkyr\nXx9uvTXuaGKhLqkSi8SqIpUOJCMMGAAnnQR33x13JLFSSUHSpmT3UiUDyQjucPPNoaropJPg6qvj\njihWSgqScqomkozlHqbN/Pvf4fTT4bHHoGbNuKOKlZKCpNyi5YVKBpKZ5s+HJ54Icyo//DDUUI26\nkoKkVPsbR+hKZMk87uFvkyYwfnwY2E4JAVBSkBRRN1PJWGvWwIUXQt26cOedsO22cUeUUZQUJCVU\nZSQZqagoVBU98UTeNyiXReUlqXaqMpKMtHo1nHFGSAj9+4fGZbO4o8o4KilItVp7/YGqjCTjnHUW\nPPss/O1v0K9f3NFkLCUFqVaqNpKMddxx0L59GAZbyqSkINVG1UaScVasgI8+gsMOg2OPjTuarKCk\nIFWiq5QlYy1fHhLBqFEwfTq0aBF3RFlBSUEqRVcpS0Zbtgx69IDRo+HRR5UQNoCSgmwwzXsgGW3J\nEjjqqDCX8tNPw6mnxh1RVlFSkA2i3kWS8Z57Dj78MEyl2atX3NFkHSUFSYquUJascf75sP/+0K5d\n3JFkJSUFKZfaDiQrzJ8PJ58c5kLYbTclhCpQUpBy6boDyXg//xzmUp45E+bMCUlBKi2lw1yYWTcz\nm2ZmM8xsvYFGzGw7MxtlZhPN7HMz657KeGTD6LoDyXg//giHHgrffAOvvw5du8YdUdZLWVIws5rA\nA8CRwG7ASWZWMoVfB/zb3TsAvYEHUxWPbLhFywvVfiCZ68cf4ZBDYPZseOst6NQp7ohyQipLCh2B\nGe4+y91XAYOAY0rs40DD6H4j4McUxiMbQKUEyXibbgpt2sCIEXDQQXFHkzNS2abQFPghYXk2sE+J\nfW4ARpjZRUA9oEsK45ENoLYEyVizZsEWW0CjRvDyy3FHk3NSWVIobUxaL7F8EvCkuzcDugPPmNl6\nMZnZeWY23szGz5s3LwWhSiKVEiRjffVVKBWcdlrckeSsVCaF2UDzhOVmrF89dDbwbwB3/wioAzQu\n+UTuPtDdC9y9oEmTJikKV9ZSW4JkpC++CI3KRUVhLgRJiVQmhXFAazNraWa1CQ3Jw0rs8z3QGcDM\ndiUkBRUFYqRSgmSkzz4LI53WqBHGM2rTJu6IclbK2hTcfbWZ9QWGAzWBx919ipndBIx392HA5cC/\nzOxSQtXSGe5esopJ0kBXLEvGcg8zptWpAyNHQuvWcUeU0yzbvoMLCgp8/PjxcYeRUzSekWS8GTOg\nZk1o2TLuSLKWmU1w94KK9tMczaI2BMlM778fZklzhx13VEJIEyWFPKc2BMlIo0dDt27w2muwYEHc\n0eQVJYU8pmojyUhvvw3du8P228N778Hmm8cdUV5RUshTSgiSkd54I8yYttNOobSw9dZxR5R3NEpq\nntIVy5Kx9toLXn1VJYSYqKSQh9SOIBnnh2hEnO7dQwOzEkJslBTyjKqNJOM89xzssEMY6RTAShsh\nR9JFSSGPKCFIxnnyyTCO0YEHhpvETkkhTyghSMYZOBDOPBO6dAldT+vXjzsiQUkhLyghSMYZNw7O\nPz+0IQwbBptsEndEElFSyHFKCJKR9t47tCW89FIY00gyhpJCDlNCkIxz110waVK4f/LJsPHG8cYj\n61FSyGEa00gyhjvceCNcdhk8+mjc0Ug5dPGaiKSWO1x7Ldx6axgC+5574o5IyqGkkIMS50YQiZU7\nXHEF3HknnHcePPRQmChHMpaSQo5Z246gISwkI6xeDV9+CX37wr336sK0LKCkkGM0ppFkhDVrYOlS\naNgQXnkFNtpICSFLqBwnItWrqAjOPjvMqbx8OdSurYSQRVRSyBFqR5CMsHo1nH46PP883HCDrkHI\nQkoKOULVRhK7wkI45RR48UX4+9/hmmvijkgqQUlBRKrHZZeFhHDHHeG+ZCUlhRyg+REkI1xxBXTo\nAGedFXckUgVqaM4BunJZYvP772HoijVrwpzKSghZT0khy6mUILFZuhSOOgouvxw++ijuaKSaqPoo\ni2nAO4nN4sVh2OuPPoJnn4UDDog7IqkmSgpZTD2OJBYLF0K3bjBhAgwaBCecEHdEUo2UFLKUqo0k\nNlOmhKErhgyBY46JOxqpZkoKWUjVRhKLVavC1ckHHADffgubbRZ3RJICamjOMkoIEouffoK99oLH\nHw/LSgg5SyWFLKKEILGYMwc6dQp/W7WKOxpJMSWFLKGEILH4/vuQEH75Bd56Cw48MO6IJMWUFLKE\nehpJ2i1eDIccAgsWwIgRsO++cUckaaCkICKla9gQLr4YDjoICgrijkbSREkhC6j7qaTVV1+Fq5UL\nCuDSS+OORtJMSSELqOpI0uaLL6BzZ9h883C/Zs24I5I0U5dUEQkmTYJDD4VatcIUmkoIeUlJQURg\n/PjQy2iTTeC992DnneOOSGKi6qMMp/YESYv77oNGjWDUKGjRIu5oJEZKChlM1yZIyrmDGQwcCL/9\nBttsE3dEEjNVH2UwTZ4jKTVyJOy/P8yfDxtvrIQgQJJJwcxqm9mOqQ5GRNJk+PAwQc6SJVBUFHc0\nkkEqTApmdhQwGXg7Wt7DzF5OdWD5Tm0JkjKvvQY9e4bG5FGjYKut4o5IMkgyJYWbgH2AhQDuPglQ\nqSHFVHUkKfHmm3DccdCuXag+atIk7ogkwySTFArdfWGJdZ6KYEQkxdq3hxNPhHfeCReoiZSQTFL4\n0sxOBGqYWUszuxsYm+K48pqqjqTavfcerF4N224b5lRu1CjuiCRDJZMU+gJ7AWuAl4AVwJ9TGVS+\nU9WRVKvHH4fDDoO77oo7EskCySSFI9z9KnfvEN2uBo5MdWAiUg0efhjOPhu6doX/+7+4o5EskExS\nuK6UdddWdyASqOpIqs2998IFF4Sup0OHhiEsRCpQ5hXNZnYE0A1oamZ3JmxqSKhKkhTQiKhSLX78\nEfr1gz/8AQYNgtq1445IskR5w1z8AnxBaEOYkrB+CXB1KoMSkSradlv48EPYdVfYSCVPSV6ZScHd\nJwITzew5d1+RxpjyUvsbR7BoeaGqjqTy3OGGG2DLLUP7Qbt2cUckWSiZNoWmZjbIzD43s+lrb8k8\nuZl1M7NpZjbDzEotXZjZiWY21cymmNnzGxR9jlg78N23A45SryOpHHe45hq46SaYODEsi1RCMqOk\nPgncAtxO6HV0Jkm0KZhZTeABoCswGxhnZsPcfWrCPq2Ba4AD3H2BmW25wWeQA9SOIFXiDpddBnff\nDX/6EzzwQBj5VKQSkikpbOLuwwHcfaa7XwcclsTjOgIz3H2Wu68CBgHHlNjnXOABd18QPf8vyYee\nG9TbSKrEHS66KCSEiy+GBx+EGhr8WCovmU/PSjMzYKaZ/cnMegDJ/KJvCvyQsDw7WpdoJ2AnM/vA\nzMaaWbfSnsjMzjOz8WY2ft68eUkcOjtovgSpMjPYYQe48sqQGFRCkCpKpvroUqA+cDHwN6ARcFYS\njyvt01myorMW0Bo4FGgG/NfM2pQca8ndBwIDAQoKCnKmslTVRlJpRUUwfXroXXTppXFHIzmkwpKC\nu3/s7kvc/Xt3P83dewLfJfHcs4HmCcvNgB9L2Weouxe6+zfANEKSEJGyrF4Np50G++wDc+bEHY3k\nmHKTgpntbWbHmlnjaHl3M3ua5AbEGwe0jgbRqw30BoaV2OcVovaJ6Bg7AbM28ByyktoSpFIKC6F3\nb3jhBbj2WmhaskZWpGrKTApmdivwHHAK8JaZXQuMAj4jfHmXy91XEwbTGw58Cfzb3aeY2U1m1jPa\nbTjwq5lNjZ77Snf/tSonlC006J1ssJUr4fjj4T//gTvvhKuuijsiyUHltSkcA7R39+Vmtjmh6qe9\nu09L9snd/Q3gjRLrrk+478Bl0S0v6CI1qbT77oNhw+D++zW4naRMeUlhhbsvB3D338zsqw1JCFI6\nNS5Lpf35z9C2LRxxRNyRSA4rr02hlZm9FN1eBlokLL+UrgBF8trSpXDOOfDTT2EMIyUESbHySgp/\nLLF8fyoDEZESFi+G7t1h7Fg45hjo0SPuiCQPlDcg3rvpDCQfqMeRJG3BAujWDT79FAYPVkKQtEnm\n4jWpJmpPkKT8+iscfjhMnhx6GvXsWfFjRKqJkoJIpikqCn+HDoUjNfOtpFfSScHMNnb3lakMJpep\n6kgqNG8ebLppmA9h3DgNbCexqPBTZ2YdzWwy8HW03N7M7kt5ZDlGF6tJuebMgQMPhPPOC8tKCBKT\nZD559wJHA78CuPtnJDd0tkRUSpByffcdHHwwzJ0bup+KxCiZ6qMa7v6drTskb1GK4sk5Gh5byjVr\nFhx2GCxaBG+/HQa5E4lRMknhBzPrCHg0m9pFQFLTceY7JQQpV1FR6Fm0dCmMHAl77hl3RCJJJYUL\nCFVI2wE/A+9E66QC6oIq5apZE/71L6hXD9q1izsaESC5pLDa3XunPJIco3YEKdPkyTBmTBjUbr/9\n4o5GZB3JJIVxZjYNGAy85O5LUhxTTlApQUo1cSJ07Qp16sCpp0KjRnFHJLKOZGZe2wG4BdgLmGxm\nr5iZSg7lUClBSvXJJ9CpU6gueu89JQTJSEl1hnb3D939YmBPYDFh8h0pg65JkPV8+CF06QKbbRaq\njnbYIe6IREqVzMVr9c3sFDN7FfgEmAfsn/LIspRKCVKqKVNgm21CQth++7ijESlTMm0KXwCvAre5\n+39THE/WU1uCrGPJEmjQAM49N7Qh1K0bd0Qi5Uqm+qiVu1+khCCygd56C1q2hI8+CstKCJIFyiwp\nmNkd7n458B8z85Lb3f24lEaWhVR1JMVefRWOPx523x122inuaESSVl710eDor2ZcS5KqjgQIcyD0\n7g0dOsDw4aFxWSRLlDfz2ifR3V3dfZ3EYGZ9Ac3MlkClBAFCVVGvXmEMozfeULdTyTrJtCmcVcq6\ns6s7kGynbqgCQMeOcPPNoT1BCUGyUHltCr2A3kBLM3spYVMDYGGqAxPJKi+8AAcdBM2awTXXxB2N\nSKWV16bwCWEOhWbAAwnrlwATUxlUtlHVUZ576CG48EK44AJ48MG4oxGpkvLaFL4BviGMiirlUANz\nHrvnHrjkEujRA+66K+5oRKqszDYFM3sv+rvAzH5LuC0ws9/SF6JIhvrnP0NCOO44GDIENt447ohE\nqqy86qO1U242TkcgIlllxQp49tnQ9fTpp2EjVR9Kbiiv+mhNdLc58KO7rzKzA4F2wLOEgfFE8ot7\nmDGtTh0YPToMYVErmdFiRLJDMl1SXyFMxbkD8DSwK/B8SqMSyUTucPXV8Ic/QGFhuChNCUFyTDJJ\nYY27FwLHAXe7+0VA09SGJZJh3OHSS+G226B58zCVpkgOSiYprDazE4DTgNeidapAjag7ah5YsyZM\nnXnPPfDnP8MDD0CNpKYiEck6yV7RfBhh6OxZZtYSeCG1YWUPXcmcB668MlyL8Je/hG6nZnFHJJIy\nFVaIuvsXZnYxsKOZ7QLMcPe/pT40kQxx8smw+ebQr58SguS8CpOCmR0EPAPMAQzY2sxOc/cPUh2c\nSGwKC8Pw18cdB3vtFW4ieSCZ6qO7gO7ufoC77w8cBdyT2rBEYrRqVbj+4I9/hI8/jjsakbRKpj9d\nbXefunbB3b80s9opjClrqJE5B61cCSecEEoJd98dhsAWySPJJIVPzewRQhUSwCloQDxAYx7lnOXL\nwzUIw4eHge0uuCDuiETSLpmk8CfgYuAvhDaFMcB9qQxKJBajR8M778Cjj8LZmjJE8lO5ScHM2gI7\nAC+7+23pCSk7qOooh7iHXkVHHglffQU77hh3RCKxKW+U1H6EIS5OAd42s9JmYMtbuj4hRyxaBF26\nhBICKCFI3iuvpHAK0M7dl5lZE+AN4PH0hJW52t84gkXLC1VKyAULFsARR8DEieGKZREpNymsdPdl\nAO4+z8zy/rr+9jeOAFDjci6YPx+6doWpU+Gll8IkOSJSblJolTA3swE7JM7V7O7HpTSyDKTeRjli\n4UI47DCYMQOGDoVu3eKOSCRjlJcU/lhi+f5UBpLp1LCcQxo2hEMOCdchdO4cdzQiGaW8SXbeTWcg\nmU6lhBwwezasXg0tWsD9ef0bR6RMmiEkCSol5IBvv4VOnaBRI5gwQUNfi5RBSSEJKiVkuZkzQ0JY\nvBgGD1ZCEClH0v8dZrZxKgMRSYlp0+Dgg2HZMhg5EvbeO+6IRDJahUnBzDqa2WTg62i5vZlpmAvJ\nDldeGdoRRo2CDh3ijkYk4yVTUrgXOBr4FcDdPyPMxFYhM+tmZtPMbIaZXV3OfsebmZtZQTLPm05q\nT8hyTz0FY8ZA27ZxRyKSFZJnbDF9AAAeFElEQVRJCjXc/bsS64oqepCZ1QQeAI4EdgNOMrPdStmv\nAWHAvYwcuF7DWWShTz+Fk06CFStgs81g553jjkgkaySTFH4ws46Am1lNM7sEmJ7E4zoSpu6c5e6r\ngEHAMaXsdzNwG7Ai2aBFyvTxx6FR+aOPYN68uKMRyTrJJIULgMuA7YCfgX2jdRVpCvyQsDw7WlfM\nzDoAzd39tfKeyMzOM7PxZjZ+nv7RpSwffBCGrthiC3jvPWjePO6IRLJOhV1S3f0XoHclnru0Gc69\neGMYS+ku4IwkYhgIDAQoKCjwCnavNmpPyCJjxkD37tC0aehl1LRpxY8RkfVUmBTM7F8kfJmv5e7n\nVfDQ2UDiT7VmwI8Jyw2ANsBoMwPYGhhmZj3dfXxFcaWDrk/IIpttBnvtFa5D2HrruKMRyVrJXLz2\nTsL9OsAfWLdaqCzjgNZm1hKYQyhtnLx2o7svAhqvXTaz0cAVmZIQJEtMmwY77RR6F40eHSbLEZFK\nq7BNwd0HJ9yeAo4j9Caq6HGrgb7AcOBL4N/uPsXMbjKznlUNXIRhw6BdOxg4MCwrIYhUWWWGuWgJ\nbJ/Mju7+BmFynsR115ex76GViEXy1ZAhodvpnntCr15xRyOSM5JpU1jA/9oUagC/AWVeiCaSci+8\nAKedBvvsA2++GYbCFpFqUW5SsNAC3J7QJgCwxt3T1vtHZD3ffw+nnw4HHgivvQb168cdkUhOKbdN\nIUoAL7t7UXTLm4Sg7qgZarvtQlvCG28oIYikQDIXr31iZnumPJIMo+EtMsxDD4WSAYTpMzfZJN54\nRHJUmUnBzNZWLR1ISAzTzOxTM5toZp+mJzwR4K674MIL4Zln4o5EJOeV16bwCbAncGyaYhFZ34AB\ncM01cPzx8OyzcUcjkvPKSwoG4O4z0xSLyLpuugn69w9dT59+GmppokCRVCvvv6yJmV1W1kZ3vzMF\n8YgE7vDzz6Gn0WOPQc2acUckkhfKSwo1gfqUPrBdTlPPoxi5hyGvt9wS7osm+NOcyiJpU15SmOvu\nN6UtkgyigfBi4g6XXAIvvQQTJoTEICJpVd5PsLwrIUiM1qyBCy6Ae++FE06AJk3ijkgkL5WXFDqn\nLYoMoqqjGBQVwTnnwCOPwNVXwx13aHA7kZiUWX3k7r+lM5BMoaqjGPzjH/DEE6GnUf/+SggiMVIf\nP4lf376wzTZw5plxRyKS99StQ+KxalW4DuH338Mop0oIIhlBSUHSb8UK+OMfQ1XRW2/FHY2IJFD1\nUQI1MqfB8uVw7LEwYgQ8/DAcd1zcEYlIAiWFBGpkTrFly6BHjzCX8uOPq8pIJAMpKUj6zJ0L06aF\ncYxOPTXuaESkFEoKknq//w5168KOO8L06VCvXtwRiUgZ1NAsqfXbb3DwwXDddWFZCUEkoykpRNTI\nnALz50OnTjB5Muy3X9zRiEgSVH1ESAiApt+sTj//DJ07w8yZ8OqrcLheW5FsoKSAeh1Vu9WroWtX\n+OYbeP31UFoQkaygpCDVr1YtuP562GorOOiguKMRkQ2gpCDV59tvQ/tBjx5hTmURyTp5nxTUwFxN\nZswI1UQrV4Z2hPr1445IRCoh75OC2hOqwVdfhUbllSvhnXeUEESyWN4nBamiL76ALl3CVJqjR0Ob\nNnFHJCJVoKQgVfOf/0CNGjByJOyyS9zRiEgV6eI1qZyiovD3+uth4kQlBJEcoaQgG27sWNh99zC4\nnVnoeioiOUFJQTbM+++HC9NWrw6D3IlITlFSkOSNHg1HHAFNm8J778F228UdkYhUs7xOCrpGYQOM\nHQvdu0OLFiE5NG0ad0QikgJ5nRQWLS/UIHjJatsW+vQJCWHrreOORkRSJK+TgiRh5EhYsiTMg/Dw\nw9CkSdwRiUgKKSlI2V58MbQhXHtt3JGISJooKUjpnnsOeveGffeFW26JOxoRSRMlBVnfk0/CaafB\nIYfAm29Cw4ZxRyQiaaKkIOtatgz++tcwntFrr2lwO5E8o7GP5H/cQ4PymDGwzTZQp07cEYlImqmk\nIMEdd8All4TE0LKlEoJInlJSELj1VrjiCpg7938D3YlIXlJSyGfucOON0K8fnHwyPP98mF9ZRPJW\n3iYFDXEB3HBDuJ1xBjz9tBKCiORvUtAQF0BBAVx4ITz2GNSsGXc0IpIB8jYp5K01a2DcuHC/Rw94\n4IEwc5qICEoK+WXNGrjggnCV8qRJcUcjIhlIlcj5oqgIzjknXK3crx+0bx93RCKSgVJaUjCzbmY2\nzcxmmNnVpWy/zMymmtnnZvaumW2fynjy1urVYdjrJ58MvY1uuSVMoykiUkLKkoKZ1QQeAI4EdgNO\nMrPdSuw2EShw93bAEOC2VMWT1156KXQ3/fvf4frrlRBEpEyprD7qCMxw91kAZjYIOAaYunYHdx+V\nsP9Y4NQUxpO/TjghDFtx0EFxRyIiGS6V1UdNgR8SlmdH68pyNvBmCuPJLytWhCqjL74IJQMlBBFJ\nQiqTQml1FF7qjmanAgXAP8vYfp6ZjTez8fPmzavGEHPU779Dz57wzDMwfnzc0YhIFkllUpgNNE9Y\nbgb8WHInM+sCXAv0dPeVpT2Ruw909wJ3L2ii6SDLt3QpHHUUvPMOPP54uFpZRCRJqUwK44DWZtbS\nzGoDvYFhiTuYWQfgEUJC+CWFseSHJUugW7cw9PUzz8CZZ8YdkYhkmZQ1NLv7ajPrCwwHagKPu/sU\nM7sJGO/uwwjVRfWBFy30iPne3XumKqacV6tWmCVt0KDQuCwisoFSevGau78BvFFi3fUJ97uk8vh5\n47ffwlAVm24Kr7+uLqciUml5OcxFTo2QOm8eHHYYHHNMGApbCUFEqiAvh7lYtLyQbwccFXcYVffT\nT9C5M8yaBcOGKSGISJXlZVLICXPmQKdOMHs2vPFGKC2IiFSRkkK2Ov30MH3m8OFw4IFxRyMiOUJJ\nIVsNHAjz50PHjnFHIiI5JC8bmrPW11+HYa/XrIFWrZQQRKTaKSlkiy+/hEMOgX/9K7QjiIikgJJC\nNvjiCzj00FBCGD0attsu7ohEJEcpKWS6SZNCQqhVC957D3bfPe6IRCSHKSlkul9+gc03Dwlh553j\njkZEcpx6H2WqX3+FLbaAww+HKVNgoxy5AltEMppKCplozJjQu2jIkLCshCAiaaKkkGlGjoQjj4Rt\nt4X99487GhHJM0oKmWT48DBBTqtWoZfRttvGHZGI5BklhUwxc2aYQnOXXWDUKNhqq7gjEpE8pKSQ\nKXbYAe6/H959Fxo3jjsaEclTSgpxGzIEPv003D/33ND9VEQkJkoKcXr2WejVC26+Oe5IREQAJYX4\nPP449OkTrlZ+9tm4oxERAZQU4vHII3D22dC1K7z2GtSrF3dEIiKAkkL6uYdEcNRRMHQo1K0bd0Qi\nIsU0zEU6rVgBderAiy9CjRpQu3bcEYmIrEMlhXT5299gv/1g0aKQGJQQRCQD5V1SaH/jCBrVTeNY\nQu7Qvz9cdx20aaP2AxHJaHlXfbRoeSHfDjgqPQdzh2uugX/8A848M8yaVrNmeo4tIlIJeVdSSKvb\nbgsJ4U9/gkcfVUIQkYyXdyWFtDr5ZCgshGuvBbO4oxERqZBKCtVtzRp44gkoKoLmzUNbghKCiGQJ\nJYXqVFQEZ50VbsOGxR2NiMgGU/VRdVm9Ogxb8cILcNNN8Ic/xB2RiMgGU1KoDqtWhfaD//wHBgyA\nq66KOyIRkUpRUqgOU6fCm2/CnXfCpZfGHY0IhYWFzJ49mxUrVsQdiqRZnTp1aNasGRtVcm53JYWq\nWLMmDFexxx4wfTo0bRp3RCIAzJ49mwYNGtCiRQtMHR3yhrvz66+/Mnv2bFq2bFmp51BDc2X9/jt0\n6wYDB4ZlJQTJICtWrGCLLbZQQsgzZsYWW2xRpRKikkJlLF0K3buHqTPr1Ik7GpFSKSHkp6q+76o+\n2lCLFoWE8PHHYXKck06KOyIRkWqjksKGWLUKDj8cPvkEBg9WQhApR82aNdljjz1o06YNPXr0YOHC\nhcXbpkyZQqdOndhpp51o3bo1N998M+5evP3NN9+koKCAXXfdlV122YUrrrii1GO88sor3HTTTSk/\nl8pydy6++GJ23HFH2rVrx6dr52Mv4YUXXqBt27a0a9eObt26MX/+/OJt9913HzvvvDO77747f/nL\nXwCYPHkyZ5xxRuqCzqbbXnvt5VWx/VWvVenx/s9/ug8dWrXnEEmxqVOnxh2C16tXr/h+nz59/JZb\nbnF3999//91btWrlw4cPd3f3ZcuWebdu3fz+++93d/fJkyd7q1at/Msvv3R398LCQn/ggQdKPcZ+\n++3n8+bNSzqmwsLCSp1LZb3++uverVs3X7NmjX/00UfesWPHUmNq0qRJ8XlceeWV3r9/f3d3Hzly\npHfu3NlXrFjh7u4///xz8eM6d+7s3333XanHLe39B8Z7Et+xeVVSqPSw2b/8AhMmhPtXXAE9e1Zv\nYCI5br/99mPOnDkAPP/88xxwwAEcfvjhAGyyySbcf//9DBgwAIDbbruNa6+9ll122QWAWrVqceGF\nF673nNOnT2fjjTemcePGALz66qvss88+dOjQgS5duvDzzz8DcMMNN3Deeedx+OGH06dPH4qKirjy\nyivZe++9adeuHY888ggAS5cupXPnzuy55560bduWoUOHVvm8hw4dSp8+fTAz9t13XxYuXMjcuXPX\n2Wftl/GyZctwdxYvXsy2224LwEMPPcTVV1/NxhtvDMCWW25Z/LgePXowaNCgKsdYUl61KVRq2Oy5\nc6FzZ1i4EGbO1PSZkpVaXP16tT9nsv9LRUVFvPvuu5x99tlAqDraa6+91tlnhx12YOnSpSxevJgv\nvviCyy+/vMLn/eCDD9hzzz2Llw888EDGjh2LmfHoo49y2223cccddwAwYcIE3n//ferWrcvAgQNp\n1KgR48aNY+XKlcUJqnnz5rz88ss0bNiQ+fPns++++9KzZ8/1Gm579erFtGnT1ovnsssuo0+fPuus\nmzNnDs2bNy9ebtasGXPmzGGbbbYpXrfRRhvx0EMP0bZtW+rVq0fr1q154IEHgJD4/vvf/3LttddS\np04dbr/9dvbee28ACgoKGDBgQHGVUnXJq6SwwebMgU6dwt/XX1dCkKyVtjlEEixfvpw99tiDb7/9\nlr322ouuXbsC4ZdxWT1kNqTnzNy5c2nSpEnx8uzZs+nVqxdz585l1apV6/TT79mzJ3Wj/98RI0bw\n+eefM2TIEAAWLVrE119/TbNmzejXrx9jxoyhRo0azJkzh59//pmtt956neMOHjw46Rg9oZ2krHMs\nLCzkoYceYuLEibRq1YqLLrqIW2+9leuuu47Vq1ezYMECxo4dy7hx4zjxxBOZNWsWZsaWW27Jjz/+\nmHQsycqr6qMN8t13cPDBoaQwfDgcckjcEYlklbp16zJp0iS+++47Vq1aVfzrd/fdd2f8+PHr7Dtr\n1izq169PgwYN2H333Zmwtrq2gudP7I9/0UUX0bdvXyZPnswjjzyyzrZ6CTMeujv33XcfkyZNYtKk\nSXzzzTccfvjhPPfcc8ybN48JEyYwadIkttpqq1L7+/fq1Ys99thjvdvTTz+93r7NmjXjhx9+KF6e\nPXt2cdXQWpMmTQJCacnMOPHEE/nwww+LH3/cccdhZnTs2JEaNWoUN0KvWLGiONFVJyWFstx+O/z6\nK7z9NhxwQNzRiGStRo0ace+993L77bdTWFjIKaecwvvvv88777wDhBLFxRdfXFwNcuWVV/L3v/+d\n6dOnA7BmzRruvPPO9Z531113ZcaMGcXLixYtoml0EelTTz1VZjxHHHEEDz30EIWFhUCoolm2bBmL\nFi1iyy23ZKONNmLUqFF89913pT5+8ODBxQkl8Vay6ghCCeXpp5/G3Rk7diyNGjVap+oIoGnTpkyd\nOpV58+YB8Pbbb7PrrrsCcOyxxzJy5MjiOFetWlXchjJ9+nTatGlT5nlWlpJCWe64A8aOhX32iTsS\nkazXoUMH2rdvz6BBg6hbty5Dhw7llltuYeedd6Zt27bsvffe9O3bF4B27dpx9913c9JJJ7HrrrvS\npk2b9RpnAQ4++GAmTpxYXEVzww03cMIJJ3DQQQcVf3GW5pxzzmG33XZjzz33pE2bNpx//vmsXr2a\nU045hfHjx1NQUMBzzz1X3NBdFd27d6dVq1bsuOOOnHvuuTz44IPF2/bYYw8Att12W/r378/BBx9M\nu3btmDRpEv369QPgrLPOYtasWbRp04bevXvz1FNPFVc/jRo1iqOOqv5qQSutziuTFRQUeMmiZ7Ja\nXP16+XWrX34JF18Mzz8PCXWVItnmyy+/LP61mcv+/Oc/06NHD7p06RJ3KGm1cuVKDjnkEN5//31q\n1Vq/abi099/MJrh7QUXPrZLCWpMnh3aDL74I1UYikvH69evH77//HncYaff9998zYMCAUhNCVan3\nEcDEidC1axjHaORI2GmnuCMSkSRstdVW9MzD64Zat25N69atU/LcKil8+mnodlqvHrz3nhKCiOQ1\nJYVttoF994UxY2CHHeKORkQkVvmbFCZPDvMqb7NNmDVt++3jjkhEJHb5mRTefTd0Nf3rX+OORCRn\n1a9fv1KPe/LJJ4u7p0r6pTQpmFk3M5tmZjPM7OpStm9sZoOj7R+bWYtUxgOEq5OPPjpUFV1yScoP\nJyKSTVKWFMysJvAAcCSwG3CSme1WYrezgQXuviNwF/CPVMUD0HnGx2GE0112gVGjYKutUnk4EQFG\njx7N0UcfXbzct29fnnzySQDGjRvH/vvvT/v27enYsSNLlixZ57Gvv/46++233zrzC0hqpbJLakdg\nhrvPAjCzQcAxwNSEfY4BbojuDwHuNzPzVFxRt3gxt79+N7RvH0oLm21W7YcQyViHHrr+uhNPhAsv\nDPONd+++/vYzzgi3+fPh+OPX3TZ6dJVDWrVqFb169WLw4MHsvffeLF68eJ2xfF5++WXuvPNO3njj\nDTbT/2vapDIpNAV+SFieDZQcM6J4H3dfbWaLgC2AdX4WmNl5wHkA2223XeWiadiQzca8G7qcNmpU\nuecQkWozbdo0ttlmm+KhoBs2bFi8bdSoUYwfP54RI0ass15SL5VJobQxcEuWAJLZB3cfCAyEMMxF\npSOKPnwieae8X/abbFL+9saNq1QyqFWrFmvWrCleXjvyaHlDaLdq1YpZs2Yxffp0CgoqHJlBqlEq\nG5pnA80TlpsBJQf/Lt7HzGoBjYDfUhiTiKTZ9ttvz9SpU1m5ciWLFi3i3XffBWCXXXbhxx9/ZNy4\ncQAsWbKE1atXFz/mpZdeok+fPkyZMiW22PNRKpPCOKC1mbU0s9pAb2BYiX2GAadH948HRqakPUFE\nYtO8eXNOPPFE2rVrxymnnEKHDh0AqF27NoMHD+aiiy6iffv2dO3adZ35C3beeWeee+45TjjhBGbO\nnBlX+HknpaOkmll34G6gJvC4u//NzG4iTCA9zMzqAM8AHQglhN5rG6bLUpVRUkXyRb6Mkiqlq8oo\nqSkdEM/d3wDeKLHu+oT7K4ATUhmDiIgkLz+vaBYRkVIpKYiISDElBZEcpT4b+amq77uSgkgOqlOn\nDr/++qsSQ55xd3799Vfq1KlT6efQzGsiOahZs2bMnj2befPmxR2KpFmdOnVo1qxZpR+vpCCSgzba\naCNatmwZdxiShVR9JCIixZQURESkmJKCiIgUS+kwF6lgZvOA7yr58MaUGJY7D+ic84POOT9U5Zy3\nd/cmFe2UdUmhKsxsfDJjf+QSnXN+0Dnnh3Scs6qPRESkmJKCiIgUy7ekMDDuAGKgc84POuf8kPJz\nzqs2BRERKV++lRRERKQcOZkUzKybmU0zsxlmdnUp2zc2s8HR9o/NrEX6o6xeSZzzZWY21cw+N7N3\nzWz7OOKsThWdc8J+x5uZm1nW91RJ5pzN7MTovZ5iZs+nO8bqlsRnezszG2VmE6PPd/c44qwuZva4\nmf1iZl+Usd3M7N7o9fjczPas1gDcPaduhKk/ZwKtgNrAZ8BuJfa5EHg4ut8bGBx33Gk458OATaL7\nF+TDOUf7NQDGAGOBgrjjTsP73BqYCGwWLW8Zd9xpOOeBwAXR/d2Ab+OOu4rnfDCwJ/BFGdu7A28C\nBuwLfFydx8/FkkJHYIa7z3L3VcAg4JgS+xwDPBXdHwJ0NjNLY4zVrcJzdvdR7v57tDgWqPwwipkh\nmfcZ4GbgNmBFKduyTTLnfC7wgLsvAHD3X9IcY3VL5pwdaBjdbwT8mMb4qp27jyHMWV+WY4CnPRgL\nbGpm21TX8XMxKTQFfkhYnh2tK3Ufd18NLAK2SEt0qZHMOSc6m/BLI5tVeM5m1gFo7u6vpTOwFErm\nfd4J2MnMPjCzsWbWLW3RpUYy53wDcKqZzSbMCX9RekKLzYb+v2+QXBw6u7Rf/CW7WCWzTzZJ+nzM\n7FSgADgkpRGlXrnnbGY1gLuAM9IVUBok8z7XIlQhHUooDf7XzNq4+8IUx5YqyZzzScCT7n6Hme0H\nPBOd85rUhxeLlH5/5WJJYTbQPGG5GesXJ4v3MbNahCJnecW1TJfMOWNmXYBrgZ7uvjJNsaVKRefc\nAGgDjDazbwl1r8OyvLE52c/2UHcvdPdvgGmEJJGtkjnns4F/A7j7R0AdwhhBuSqp//fKysWkMA5o\nbWYtzaw2oSF5WIl9hgGnR/ePB0Z61IKTpSo856gq5RFCQsj2emao4JzdfZG7N3b3Fu7egtCO0tPd\nx8cTbrVI5rP9CqFTAWbWmFCdNCutUVavZM75e6AzgJntSkgKuTzl3DCgT9QLaV9gkbvPra4nz7nq\nI3dfbWZ9geGEnguPu/sUM7sJGO/uw4DHCEXMGYQSQu/4Iq66JM/5n0B94MWoTf17d+8ZW9BVlOQ5\n55Qkz3k4cLiZTQWKgCvd/df4oq6aJM/5cuBfZnYpoRrljGz+kWdmLxCq/xpH7ST9gY0A3P1hQrtJ\nd2AG8DtwZrUeP4tfOxERqWa5WH0kIiKVpKQgIiLFlBRERKSYkoKIiBRTUhARkWJKCpJxzKzIzCYl\n3FqUs2+LskaT3MBjjo5G4vwsGiJi50o8x5/MrE90/wwz2zZh26Nmtls1xznOzPZI4jGXmNkmVT22\n5AclBclEy919j4Tbt2k67inu3p4wWOI/N/TB7v6wuz8dLZ4BbJuw7Rx3n1otUf4vzgdJLs5LACUF\nSYqSgmSFqETwXzP7NLrtX8o+u5vZJ1Hp4nMzax2tPzVh/SNmVrOCw40Bdowe2zkap39yNM79xtH6\nAfa/+Sluj9bdYGZXmNnxhPGlnouOWTf6hV9gZheY2W0JMZ9hZvdVMs6PSBgIzcweMrPxFuZRuDFa\ndzEhOY0ys1HRusPN7KPodXzRzOpXcBzJI0oKkonqJlQdvRyt+wXo6u57Ar2Ae0t53J+Ae9x9D8KX\n8uxo2INewAHR+iLglAqO3wOYbGZ1gCeBXu7eljACwAVmtjnwB2B3d28H3JL4YHcfAown/KLfw92X\nJ2weAhyXsNwLGFzJOLsRhrVY61p3LwDaAYeYWTt3v5cwLs5h7n5YNPTFdUCX6LUcD1xWwXEkj+Tc\nMBeSE5ZHX4yJNgLuj+rQiwhj+pT0EXCtmTUDXnL3r82sM7AXMC4a3qMuIcGU5jkzWw58Sxh+eWfg\nG3efHm1/Cvg/4H7C/AyPmtnrQNJDc7v7PDObFY1Z83V0jA+i592QOOsRhn1InHXrRDM7j/B/vQ1h\nwpnPSzx232j9B9FxahNeNxFASUGyx6XAz0B7Qgl3vUlz3P15M/sYOAoYbmbnEIYZfsrdr0niGKck\nDphnZqXOsRGNx9ORMAhbb6Av0GkDzmUwcCLwFfCyu7uFb+ik4yTMQDYAeAA4zsxaAlcAe7v7AjN7\nkjAwXEkGvO3uJ21AvJJHVH0k2aIRMDcaI/80wq/kdZhZK2BWVGUyjFCN8i5wvJltGe2zuSU/P/VX\nQAsz2zFaPg14L6qDb+TubxAacUvrAbSEMHx3aV4CjiXMAzA4WrdBcbp7IaEaaN+o6qkhsAxYZGZb\nAUeWEctY4IC152Rmm5hZaaUuyVNKCpItHgRON7OxhKqjZaXs0wv4wswmAbsQpiycSvjyHGFmnwNv\nE6pWKuTuKwgjUL5oZpOBNcDDhC/Y16Lne49QiinpSeDhtQ3NJZ53ATAV2N7dP4nWbXCcUVvFHcAV\n7v4ZYW7mKcDjhCqptQYCb5rZKHefR+gZ9UJ0nLGE10oE0CipIiKSQCUFEREppqQgIiLFlBRERKSY\nkoKIiBRTUhARkWJKCiIiUkxJQUREiikpiIhIsf8Hs5uwXg7/QzkAAAAASUVORK5CYII=\n",
848 | "text/plain": [
849 | ""
850 | ]
851 | },
852 | "metadata": {},
853 | "output_type": "display_data"
854 | }
855 | ],
856 | "source": [
857 | "# 综合以上的参数调优,重新构建一个新的模型\n",
858 | "clf = GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20,\n",
859 | " max_depth=5, max_features=1.0, subsample=0.8, random_state=10)\n",
860 | "clf.fit(X_train, y_train)\n",
861 | "y_predict_proba = clf.predict_proba(X_test)[:,1]\n",
862 | "fpr, tpr, thresholds = roc_curve(y_test, y_predict_proba)\n",
863 | "roc_auc = auc(fpr, tpr)\n",
864 | "fig = plt.figure(figsize=(6, 6))\n",
865 | "ax = fig.add_subplot(111)\n",
866 | "ax.plot(fpr, tpr, lw=1, label='ROC (area = %.2f)' % (roc_auc))\n",
867 | "ax.plot([0, 1], [0, 1], '--', color='red', label='luck')\n",
868 | "ax.set_xlabel('False Positive Rate')\n",
869 | "ax.set_ylabel('True Positive Rate')\n",
870 | "ax.set_title('Receive Operating Characteristic')\n",
871 | "ax.legend(loc='lower right')\n",
872 | "plt.show()"
873 | ]
874 | },
875 | {
876 | "cell_type": "code",
877 | "execution_count": null,
878 | "metadata": {
879 | "collapsed": true
880 | },
881 | "outputs": [],
882 | "source": []
883 | }
884 | ],
885 | "metadata": {
886 | "kernelspec": {
887 | "display_name": "Python 3",
888 | "language": "python",
889 | "name": "python3"
890 | },
891 | "language_info": {
892 | "codemirror_mode": {
893 | "name": "ipython",
894 | "version": 3
895 | },
896 | "file_extension": ".py",
897 | "mimetype": "text/x-python",
898 | "name": "python",
899 | "nbconvert_exporter": "python",
900 | "pygments_lexer": "ipython3",
901 | "version": "3.6.3"
902 | }
903 | },
904 | "nbformat": 4,
905 | "nbformat_minor": 2
906 | }
907 |
--------------------------------------------------------------------------------
/bank churn model - preprocessing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np\n",
12 | "import pandas as pd\n",
13 | "import random\n",
14 | "import operator\n",
15 | "import numbers"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 2,
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "data": {
25 | "text/html": [
26 | "\n",
27 | "\n",
40 | "
\n",
41 | " \n",
42 | " \n",
43 | " | \n",
44 | " CUST_ID | \n",
45 | " OPEN_ACC_DUR | \n",
46 | " AGE | \n",
47 | " GENDER_CD | \n",
48 | " HASNT_HOME_ADDRESS_INF | \n",
49 | " HASNT_MOBILE_TEL_NUM_INF | \n",
50 | " LOCAL_CUR_SAV_SLOPE | \n",
51 | " LOCAL_CUR_MON_AVG_BAL | \n",
52 | " LOCAL_CUR_MON_AVG_BAL_PROP | \n",
53 | " LOCAL_CUR_ACCT_NUM | \n",
54 | " ... | \n",
55 | " educ1 | \n",
56 | " proptype | \n",
57 | " pcowner | \n",
58 | " ethnic | \n",
59 | " kid0_2 | \n",
60 | " kid3_5 | \n",
61 | " kid6_10 | \n",
62 | " kid11_15 | \n",
63 | " kid16_17 | \n",
64 | " car_buy | \n",
65 | "
\n",
66 | " \n",
67 | " \n",
68 | " \n",
69 | " 0 | \n",
70 | " 1 | \n",
71 | " 231 | \n",
72 | " 82 | \n",
73 | " 1 | \n",
74 | " Y | \n",
75 | " N | \n",
76 | " 0.000000 | \n",
77 | " 0.00 | \n",
78 | " 0.000000 | \n",
79 | " 0 | \n",
80 | " ... | \n",
81 | " 2.0 | \n",
82 | " NaN | \n",
83 | " Y | \n",
84 | " S | \n",
85 | " U | \n",
86 | " U | \n",
87 | " U | \n",
88 | " U | \n",
89 | " U | \n",
90 | " New | \n",
91 | "
\n",
92 | " \n",
93 | " 1 | \n",
94 | " 2 | \n",
95 | " 48 | \n",
96 | " 42 | \n",
97 | " 1 | \n",
98 | " Y | \n",
99 | " N | \n",
100 | " 1.005692 | \n",
101 | " 20149.04 | \n",
102 | " 0.264435 | \n",
103 | " 3 | \n",
104 | " ... | \n",
105 | " NaN | \n",
106 | " NaN | \n",
107 | " NaN | \n",
108 | " N | \n",
109 | " U | \n",
110 | " U | \n",
111 | " U | \n",
112 | " U | \n",
113 | " U | \n",
114 | " New | \n",
115 | "
\n",
116 | " \n",
117 | " 2 | \n",
118 | " 3 | \n",
119 | " 102 | \n",
120 | " 31 | \n",
121 | " 2 | \n",
122 | " Y | \n",
123 | " N | \n",
124 | " 0.000562 | \n",
125 | " 17.81 | \n",
126 | " 1.000000 | \n",
127 | " 1 | \n",
128 | " ... | \n",
129 | " NaN | \n",
130 | " A | \n",
131 | " Y | \n",
132 | " F | \n",
133 | " U | \n",
134 | " U | \n",
135 | " Y | \n",
136 | " U | \n",
137 | " U | \n",
138 | " New | \n",
139 | "
\n",
140 | " \n",
141 | " 3 | \n",
142 | " 4 | \n",
143 | " 62 | \n",
144 | " 78 | \n",
145 | " 2 | \n",
146 | " Y | \n",
147 | " N | \n",
148 | " -0.550912 | \n",
149 | " 29359.21 | \n",
150 | " 1.000000 | \n",
151 | " 1 | \n",
152 | " ... | \n",
153 | " NaN | \n",
154 | " NaN | \n",
155 | " NaN | \n",
156 | " N | \n",
157 | " U | \n",
158 | " U | \n",
159 | " U | \n",
160 | " U | \n",
161 | " U | \n",
162 | " UNKNOWN | \n",
163 | "
\n",
164 | " \n",
165 | " 4 | \n",
166 | " 5 | \n",
167 | " 79 | \n",
168 | " 46 | \n",
169 | " 1 | \n",
170 | " Y | \n",
171 | " N | \n",
172 | " 0.000288 | \n",
173 | " 34.68 | \n",
174 | " 1.000000 | \n",
175 | " 1 | \n",
176 | " ... | \n",
177 | " NaN | \n",
178 | " A | \n",
179 | " Y | \n",
180 | " U | \n",
181 | " U | \n",
182 | " U | \n",
183 | " U | \n",
184 | " U | \n",
185 | " Y | \n",
186 | " New | \n",
187 | "
\n",
188 | " \n",
189 | "
\n",
190 | "
5 rows × 127 columns
\n",
191 | "
"
192 | ],
193 | "text/plain": [
194 | " CUST_ID OPEN_ACC_DUR AGE GENDER_CD HASNT_HOME_ADDRESS_INF \\\n",
195 | "0 1 231 82 1 Y \n",
196 | "1 2 48 42 1 Y \n",
197 | "2 3 102 31 2 Y \n",
198 | "3 4 62 78 2 Y \n",
199 | "4 5 79 46 1 Y \n",
200 | "\n",
201 | " HASNT_MOBILE_TEL_NUM_INF LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL \\\n",
202 | "0 N 0.000000 0.00 \n",
203 | "1 N 1.005692 20149.04 \n",
204 | "2 N 0.000562 17.81 \n",
205 | "3 N -0.550912 29359.21 \n",
206 | "4 N 0.000288 34.68 \n",
207 | "\n",
208 | " LOCAL_CUR_MON_AVG_BAL_PROP LOCAL_CUR_ACCT_NUM ... educ1 proptype \\\n",
209 | "0 0.000000 0 ... 2.0 NaN \n",
210 | "1 0.264435 3 ... NaN NaN \n",
211 | "2 1.000000 1 ... NaN A \n",
212 | "3 1.000000 1 ... NaN NaN \n",
213 | "4 1.000000 1 ... NaN A \n",
214 | "\n",
215 | " pcowner ethnic kid0_2 kid3_5 kid6_10 kid11_15 kid16_17 car_buy \n",
216 | "0 Y S U U U U U New \n",
217 | "1 NaN N U U U U U New \n",
218 | "2 Y F U U Y U U New \n",
219 | "3 NaN N U U U U U UNKNOWN \n",
220 | "4 Y U U U U U Y New \n",
221 | "\n",
222 | "[5 rows x 127 columns]"
223 | ]
224 | },
225 | "execution_count": 2,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "bank_churn = pd.read_csv('./bankChurn.csv', encoding='utf-8')\n",
232 | "external_data = pd.read_csv('./ExternalData.csv', encoding='utf-8')\n",
233 | "df = pd.merge(bank_churn, external_data, on='CUST_ID')\n",
234 | "df.head()"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 3,
240 | "metadata": {
241 | "collapsed": true
242 | },
243 | "outputs": [],
244 | "source": [
245 | "model_data = df.copy()\n",
246 | "indep_cols = list(model_data.columns)\n",
247 | "indep_cols.remove('CHURN_CUST_IND')\n",
248 | "indep_cols.remove('CUST_ID')"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 4,
254 | "metadata": {},
255 | "outputs": [
256 | {
257 | "data": {
258 | "text/html": [
259 | "\n",
260 | "\n",
273 | "
\n",
274 | " \n",
275 | " \n",
276 | " | \n",
277 | " count | \n",
278 | " unique | \n",
279 | " top | \n",
280 | " freq | \n",
281 | " missing_pct | \n",
282 | "
\n",
283 | " \n",
284 | " \n",
285 | " \n",
286 | " GENDER_CD | \n",
287 | " 17241 | \n",
288 | " 3 | \n",
289 | " 2 | \n",
290 | " 8875 | \n",
291 | " 0.000000 | \n",
292 | "
\n",
293 | " \n",
294 | " HASNT_HOME_ADDRESS_INF | \n",
295 | " 17241 | \n",
296 | " 2 | \n",
297 | " Y | \n",
298 | " 14765 | \n",
299 | " 0.000000 | \n",
300 | "
\n",
301 | " \n",
302 | " HASNT_MOBILE_TEL_NUM_INF | \n",
303 | " 17241 | \n",
304 | " 2 | \n",
305 | " N | \n",
306 | " 11753 | \n",
307 | " 0.000000 | \n",
308 | "
\n",
309 | " \n",
310 | " crclscod | \n",
311 | " 17241 | \n",
312 | " 47 | \n",
313 | " AA | \n",
314 | " 6287 | \n",
315 | " 0.000000 | \n",
316 | "
\n",
317 | " \n",
318 | " asl_flag | \n",
319 | " 17241 | \n",
320 | " 2 | \n",
321 | " N | \n",
322 | " 14576 | \n",
323 | " 0.000000 | \n",
324 | "
\n",
325 | " \n",
326 | " last_swap | \n",
327 | " 7690 | \n",
328 | " 980 | \n",
329 | " 7/5/2001 | \n",
330 | " 35 | \n",
331 | " 0.553970 | \n",
332 | "
\n",
333 | " \n",
334 | " dwlltype | \n",
335 | " 11952 | \n",
336 | " 2 | \n",
337 | " S | \n",
338 | " 8595 | \n",
339 | " 0.306769 | \n",
340 | "
\n",
341 | " \n",
342 | " marital | \n",
343 | " 16935 | \n",
344 | " 5 | \n",
345 | " U | \n",
346 | " 6237 | \n",
347 | " 0.017748 | \n",
348 | "
\n",
349 | " \n",
350 | " wrkwoman | \n",
351 | " 2230 | \n",
352 | " 1 | \n",
353 | " Y | \n",
354 | " 2230 | \n",
355 | " 0.870657 | \n",
356 | "
\n",
357 | " \n",
358 | " proptype | \n",
359 | " 4965 | \n",
360 | " 6 | \n",
361 | " A | \n",
362 | " 4467 | \n",
363 | " 0.712024 | \n",
364 | "
\n",
365 | " \n",
366 | " pcowner | \n",
367 | " 3232 | \n",
368 | " 1 | \n",
369 | " Y | \n",
370 | " 3232 | \n",
371 | " 0.812540 | \n",
372 | "
\n",
373 | " \n",
374 | " ethnic | \n",
375 | " 16935 | \n",
376 | " 17 | \n",
377 | " N | \n",
378 | " 5866 | \n",
379 | " 0.017748 | \n",
380 | "
\n",
381 | " \n",
382 | " kid0_2 | \n",
383 | " 16935 | \n",
384 | " 2 | \n",
385 | " U | \n",
386 | " 16269 | \n",
387 | " 0.017748 | \n",
388 | "
\n",
389 | " \n",
390 | " kid3_5 | \n",
391 | " 16935 | \n",
392 | " 2 | \n",
393 | " U | \n",
394 | " 16165 | \n",
395 | " 0.017748 | \n",
396 | "
\n",
397 | " \n",
398 | " kid6_10 | \n",
399 | " 16935 | \n",
400 | " 2 | \n",
401 | " U | \n",
402 | " 15500 | \n",
403 | " 0.017748 | \n",
404 | "
\n",
405 | " \n",
406 | " kid11_15 | \n",
407 | " 16935 | \n",
408 | " 2 | \n",
409 | " U | \n",
410 | " 15378 | \n",
411 | " 0.017748 | \n",
412 | "
\n",
413 | " \n",
414 | " kid16_17 | \n",
415 | " 16935 | \n",
416 | " 2 | \n",
417 | " U | \n",
418 | " 15277 | \n",
419 | " 0.017748 | \n",
420 | "
\n",
421 | " \n",
422 | " car_buy | \n",
423 | " 16935 | \n",
424 | " 2 | \n",
425 | " UNKNOWN | \n",
426 | " 9659 | \n",
427 | " 0.017748 | \n",
428 | "
\n",
429 | " \n",
430 | "
\n",
431 | "
"
432 | ],
433 | "text/plain": [
434 | " count unique top freq missing_pct\n",
435 | "GENDER_CD 17241 3 2 8875 0.000000\n",
436 | "HASNT_HOME_ADDRESS_INF 17241 2 Y 14765 0.000000\n",
437 | "HASNT_MOBILE_TEL_NUM_INF 17241 2 N 11753 0.000000\n",
438 | "crclscod 17241 47 AA 6287 0.000000\n",
439 | "asl_flag 17241 2 N 14576 0.000000\n",
440 | "last_swap 7690 980 7/5/2001 35 0.553970\n",
441 | "dwlltype 11952 2 S 8595 0.306769\n",
442 | "marital 16935 5 U 6237 0.017748\n",
443 | "wrkwoman 2230 1 Y 2230 0.870657\n",
444 | "proptype 4965 6 A 4467 0.712024\n",
445 | "pcowner 3232 1 Y 3232 0.812540\n",
446 | "ethnic 16935 17 N 5866 0.017748\n",
447 | "kid0_2 16935 2 U 16269 0.017748\n",
448 | "kid3_5 16935 2 U 16165 0.017748\n",
449 | "kid6_10 16935 2 U 15500 0.017748\n",
450 | "kid11_15 16935 2 U 15378 0.017748\n",
451 | "kid16_17 16935 2 U 15277 0.017748\n",
452 | "car_buy 16935 2 UNKNOWN 9659 0.017748"
453 | ]
454 | },
455 | "execution_count": 4,
456 | "metadata": {},
457 | "output_type": "execute_result"
458 | }
459 | ],
460 | "source": [
461 | "object_df = model_data.select_dtypes(include=['object']).describe().T.assign(missing_pct = model_data.apply(lambda x: (len(x) - x.count()) / float(len(x))))\n",
462 | "object_df"
463 | ]
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": 5,
468 | "metadata": {},
469 | "outputs": [],
470 | "source": [
471 | "# 去除属性值过多的属性\n",
472 | "model_data.drop(['crclscod', 'ethnic'], axis=1, inplace=True)\n",
473 | "# 去除缺失值过多或无意义的属性\n",
474 | "model_data.drop(['last_swap'], axis=1, inplace=True)\n",
475 | "# wrkwoman表示是否有职场女性、proptype表示房屋类型、pcowner表示家里是否有电脑 这些属性在实际中均有很强的决策意义,以此不能删除\n",
476 | "# 针对这些变量可以采用对缺失值用一个哑变量表示"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": 6,
482 | "metadata": {},
483 | "outputs": [
484 | {
485 | "data": {
486 | "text/html": [
487 | "\n",
488 | "\n",
501 | "
\n",
502 | " \n",
503 | " \n",
504 | " | \n",
505 | " count | \n",
506 | " mean | \n",
507 | " std | \n",
508 | " min | \n",
509 | " 25% | \n",
510 | " 50% | \n",
511 | " 75% | \n",
512 | " max | \n",
513 | " missing_pct | \n",
514 | "
\n",
515 | " \n",
516 | " \n",
517 | " \n",
518 | " CUST_ID | \n",
519 | " 17241.0 | \n",
520 | " 8621.000000 | \n",
521 | " 4977.192331 | \n",
522 | " 1.0 | \n",
523 | " 4311.0 | \n",
524 | " 8621.0 | \n",
525 | " 12931.0 | \n",
526 | " 17241.0 | \n",
527 | " 0.0 | \n",
528 | "
\n",
529 | " \n",
530 | " OPEN_ACC_DUR | \n",
531 | " 17241.0 | \n",
532 | " 80.104750 | \n",
533 | " 34.944320 | \n",
534 | " 0.0 | \n",
535 | " 56.0 | \n",
536 | " 75.0 | \n",
537 | " 104.0 | \n",
538 | " 278.0 | \n",
539 | " 0.0 | \n",
540 | "
\n",
541 | " \n",
542 | " AGE | \n",
543 | " 17241.0 | \n",
544 | " 49.339481 | \n",
545 | " 15.431282 | \n",
546 | " 4.0 | \n",
547 | " 38.0 | \n",
548 | " 47.0 | \n",
549 | " 59.0 | \n",
550 | " 110.0 | \n",
551 | " 0.0 | \n",
552 | "
\n",
553 | " \n",
554 | " LOCAL_CUR_ACCT_NUM | \n",
555 | " 17241.0 | \n",
556 | " 1.796880 | \n",
557 | " 3.503054 | \n",
558 | " 0.0 | \n",
559 | " 1.0 | \n",
560 | " 1.0 | \n",
561 | " 2.0 | \n",
562 | " 399.0 | \n",
563 | " 0.0 | \n",
564 | "
\n",
565 | " \n",
566 | " LOCAL_CUR_TRANS_TX_NUM | \n",
567 | " 17241.0 | \n",
568 | " 0.177890 | \n",
569 | " 4.125816 | \n",
570 | " 0.0 | \n",
571 | " 0.0 | \n",
572 | " 0.0 | \n",
573 | " 0.0 | \n",
574 | " 429.0 | \n",
575 | " 0.0 | \n",
576 | "
\n",
577 | " \n",
578 | " LOCAL_CUR_LASTSAV_TX_NUM | \n",
579 | " 17241.0 | \n",
580 | " 0.425381 | \n",
581 | " 2.319546 | \n",
582 | " 0.0 | \n",
583 | " 0.0 | \n",
584 | " 0.0 | \n",
585 | " 0.0 | \n",
586 | " 112.0 | \n",
587 | " 0.0 | \n",
588 | "
\n",
589 | " \n",
590 | " LOCAL_CUR_WITHDRAW_TX_NUM | \n",
591 | " 17241.0 | \n",
592 | " 0.823560 | \n",
593 | " 2.577986 | \n",
594 | " 0.0 | \n",
595 | " 0.0 | \n",
596 | " 0.0 | \n",
597 | " 1.0 | \n",
598 | " 138.0 | \n",
599 | " 0.0 | \n",
600 | "
\n",
601 | " \n",
602 | " LOCAL_FIX_OPEN_ACC_TX_NUM | \n",
603 | " 17241.0 | \n",
604 | " 0.159910 | \n",
605 | " 0.470605 | \n",
606 | " 0.0 | \n",
607 | " 0.0 | \n",
608 | " 0.0 | \n",
609 | " 0.0 | \n",
610 | " 7.0 | \n",
611 | " 0.0 | \n",
612 | "
\n",
613 | " \n",
614 | " LOCAL_FIX_WITHDRAW_TX_NUM | \n",
615 | " 17241.0 | \n",
616 | " 0.061539 | \n",
617 | " 0.496607 | \n",
618 | " 0.0 | \n",
619 | " 0.0 | \n",
620 | " 0.0 | \n",
621 | " 0.0 | \n",
622 | " 32.0 | \n",
623 | " 0.0 | \n",
624 | "
\n",
625 | " \n",
626 | " LOCAL_FIX_CLOSE_ACC_TX_NUM | \n",
627 | " 17241.0 | \n",
628 | " 0.108578 | \n",
629 | " 0.452035 | \n",
630 | " 0.0 | \n",
631 | " 0.0 | \n",
632 | " 0.0 | \n",
633 | " 0.0 | \n",
634 | " 15.0 | \n",
635 | " 0.0 | \n",
636 | "
\n",
637 | " \n",
638 | " L6M_INDFINA_ALL_TX_NUM | \n",
639 | " 17241.0 | \n",
640 | " 0.067571 | \n",
641 | " 0.923849 | \n",
642 | " 0.0 | \n",
643 | " 0.0 | \n",
644 | " 0.0 | \n",
645 | " 0.0 | \n",
646 | " 82.0 | \n",
647 | " 0.0 | \n",
648 | "
\n",
649 | " \n",
650 | " POS_CONSUME_TX_AMT | \n",
651 | " 17241.0 | \n",
652 | " 1050.976915 | \n",
653 | " 23755.577944 | \n",
654 | " 0.0 | \n",
655 | " 0.0 | \n",
656 | " 0.0 | \n",
657 | " 0.0 | \n",
658 | " 1794863.0 | \n",
659 | " 0.0 | \n",
660 | "
\n",
661 | " \n",
662 | " POS_CONSUME_TX_NUM | \n",
663 | " 17241.0 | \n",
664 | " 0.106896 | \n",
665 | " 0.819569 | \n",
666 | " 0.0 | \n",
667 | " 0.0 | \n",
668 | " 0.0 | \n",
669 | " 0.0 | \n",
670 | " 50.0 | \n",
671 | " 0.0 | \n",
672 | "
\n",
673 | " \n",
674 | " ATM_ACCT_TX_NUM | \n",
675 | " 17241.0 | \n",
676 | " 0.409605 | \n",
677 | " 4.681921 | \n",
678 | " 0.0 | \n",
679 | " 0.0 | \n",
680 | " 0.0 | \n",
681 | " 0.0 | \n",
682 | " 257.0 | \n",
683 | " 0.0 | \n",
684 | "
\n",
685 | " \n",
686 | " ATM_ACCT_TX_AMT | \n",
687 | " 17241.0 | \n",
688 | " 5632.666551 | \n",
689 | " 208094.147877 | \n",
690 | " 0.0 | \n",
691 | " 0.0 | \n",
692 | " 0.0 | \n",
693 | " 0.0 | \n",
694 | " 18501490.0 | \n",
695 | " 0.0 | \n",
696 | "
\n",
697 | " \n",
698 | " ATM_NOT_ACCT_TX_NUM | \n",
699 | " 17241.0 | \n",
700 | " 0.188156 | \n",
701 | " 0.872419 | \n",
702 | " 0.0 | \n",
703 | " 0.0 | \n",
704 | " 0.0 | \n",
705 | " 0.0 | \n",
706 | " 31.0 | \n",
707 | " 0.0 | \n",
708 | "
\n",
709 | " \n",
710 | " ATM_ALL_TX_NUM | \n",
711 | " 17241.0 | \n",
712 | " 0.597761 | \n",
713 | " 4.974549 | \n",
714 | " 0.0 | \n",
715 | " 0.0 | \n",
716 | " 0.0 | \n",
717 | " 0.0 | \n",
718 | " 261.0 | \n",
719 | " 0.0 | \n",
720 | "
\n",
721 | " \n",
722 | " COUNTER_NOT_ACCT_TX_NUM | \n",
723 | " 17241.0 | \n",
724 | " 0.322313 | \n",
725 | " 0.870298 | \n",
726 | " 0.0 | \n",
727 | " 0.0 | \n",
728 | " 0.0 | \n",
729 | " 0.0 | \n",
730 | " 24.0 | \n",
731 | " 0.0 | \n",
732 | "
\n",
733 | " \n",
734 | " COUNTER_ACCT_TX_AMT | \n",
735 | " 17241.0 | \n",
736 | " 32726.390987 | \n",
737 | " 153197.540483 | \n",
738 | " 0.0 | \n",
739 | " 0.0 | \n",
740 | " 0.0 | \n",
741 | " 12000.0 | \n",
742 | " 6402993.0 | \n",
743 | " 0.0 | \n",
744 | "
\n",
745 | " \n",
746 | " COUNTER_ACCT_TX_NUM | \n",
747 | " 17241.0 | \n",
748 | " 1.303347 | \n",
749 | " 4.093381 | \n",
750 | " 0.0 | \n",
751 | " 0.0 | \n",
752 | " 0.0 | \n",
753 | " 2.0 | \n",
754 | " 307.0 | \n",
755 | " 0.0 | \n",
756 | "
\n",
757 | " \n",
758 | " COUNTER_ALL_TX_NUM | \n",
759 | " 17241.0 | \n",
760 | " 1.625660 | \n",
761 | " 4.560893 | \n",
762 | " 0.0 | \n",
763 | " 0.0 | \n",
764 | " 0.0 | \n",
765 | " 2.0 | \n",
766 | " 331.0 | \n",
767 | " 0.0 | \n",
768 | "
\n",
769 | " \n",
770 | " NAT_DEBT_OPEN_ACC_DUR | \n",
771 | " 17241.0 | \n",
772 | " -1262.148367 | \n",
773 | " 10929.030591 | \n",
774 | " -95877.0 | \n",
775 | " 0.0 | \n",
776 | " 0.0 | \n",
777 | " 0.0 | \n",
778 | " 85.0 | \n",
779 | " 0.0 | \n",
780 | "
\n",
781 | " \n",
782 | " FINA_OPEN_ACC_DUR | \n",
783 | " 17241.0 | \n",
784 | " -1017.557914 | \n",
785 | " 9825.499880 | \n",
786 | " -95877.0 | \n",
787 | " 0.0 | \n",
788 | " 0.0 | \n",
789 | " 0.0 | \n",
790 | " 24.0 | \n",
791 | " 0.0 | \n",
792 | "
\n",
793 | " \n",
794 | " FUND_OPEN_ACC_DUR | \n",
795 | " 17241.0 | \n",
796 | " -967.307001 | \n",
797 | " 9583.391244 | \n",
798 | " -95877.0 | \n",
799 | " 0.0 | \n",
800 | " 0.0 | \n",
801 | " 0.0 | \n",
802 | " 43.0 | \n",
803 | " 0.0 | \n",
804 | "
\n",
805 | " \n",
806 | " TELEBANK_ALL_TX_NUM | \n",
807 | " 17241.0 | \n",
808 | " 0.000000 | \n",
809 | " 0.000000 | \n",
810 | " 0.0 | \n",
811 | " 0.0 | \n",
812 | " 0.0 | \n",
813 | " 0.0 | \n",
814 | " 0.0 | \n",
815 | " 0.0 | \n",
816 | "
\n",
817 | " \n",
818 | " CHURN_CUST_IND | \n",
819 | " 17241.0 | \n",
820 | " 0.100980 | \n",
821 | " 0.301311 | \n",
822 | " 0.0 | \n",
823 | " 0.0 | \n",
824 | " 0.0 | \n",
825 | " 0.0 | \n",
826 | " 1.0 | \n",
827 | " 0.0 | \n",
828 | "
\n",
829 | " \n",
830 | " unan_vce_Range | \n",
831 | " 17241.0 | \n",
832 | " 21.107650 | \n",
833 | " 35.917786 | \n",
834 | " 0.0 | \n",
835 | " 4.0 | \n",
836 | " 11.0 | \n",
837 | " 25.0 | \n",
838 | " 1395.0 | \n",
839 | " 0.0 | \n",
840 | "
\n",
841 | " \n",
842 | " unan_dat_Range | \n",
843 | " 17241.0 | \n",
844 | " 0.064961 | \n",
845 | " 0.899576 | \n",
846 | " 0.0 | \n",
847 | " 0.0 | \n",
848 | " 0.0 | \n",
849 | " 0.0 | \n",
850 | " 83.0 | \n",
851 | " 0.0 | \n",
852 | "
\n",
853 | " \n",
854 | " callfwdv_Range | \n",
855 | " 17241.0 | \n",
856 | " 0.025927 | \n",
857 | " 0.809077 | \n",
858 | " 0.0 | \n",
859 | " 0.0 | \n",
860 | " 0.0 | \n",
861 | " 0.0 | \n",
862 | " 59.0 | \n",
863 | " 0.0 | \n",
864 | "
\n",
865 | " \n",
866 | " totcalls | \n",
867 | " 17241.0 | \n",
868 | " 2976.027377 | \n",
869 | " 4147.062069 | \n",
870 | " 0.0 | \n",
871 | " 853.0 | \n",
872 | " 1792.0 | \n",
873 | " 3564.0 | \n",
874 | " 98874.0 | \n",
875 | " 0.0 | \n",
876 | "
\n",
877 | " \n",
878 | " avg3mou | \n",
879 | " 17241.0 | \n",
880 | " 546.483441 | \n",
881 | " 554.652467 | \n",
882 | " 0.0 | \n",
883 | " 166.0 | \n",
884 | " 376.0 | \n",
885 | " 746.0 | \n",
886 | " 5821.0 | \n",
887 | " 0.0 | \n",
888 | "
\n",
889 | " \n",
890 | " avg3qty | \n",
891 | " 17241.0 | \n",
892 | " 190.337799 | \n",
893 | " 205.371296 | \n",
894 | " 0.0 | \n",
895 | " 59.0 | \n",
896 | " 131.0 | \n",
897 | " 249.0 | \n",
898 | " 3261.0 | \n",
899 | " 0.0 | \n",
900 | "
\n",
901 | " \n",
902 | " avg3rev | \n",
903 | " 17241.0 | \n",
904 | " 60.086074 | \n",
905 | " 46.046675 | \n",
906 | " 1.0 | \n",
907 | " 34.0 | \n",
908 | " 49.0 | \n",
909 | " 72.0 | \n",
910 | " 835.0 | \n",
911 | " 0.0 | \n",
912 | "
\n",
913 | " \n",
914 | "
\n",
915 | "
"
916 | ],
917 | "text/plain": [
918 | " count mean std min \\\n",
919 | "CUST_ID 17241.0 8621.000000 4977.192331 1.0 \n",
920 | "OPEN_ACC_DUR 17241.0 80.104750 34.944320 0.0 \n",
921 | "AGE 17241.0 49.339481 15.431282 4.0 \n",
922 | "LOCAL_CUR_ACCT_NUM 17241.0 1.796880 3.503054 0.0 \n",
923 | "LOCAL_CUR_TRANS_TX_NUM 17241.0 0.177890 4.125816 0.0 \n",
924 | "LOCAL_CUR_LASTSAV_TX_NUM 17241.0 0.425381 2.319546 0.0 \n",
925 | "LOCAL_CUR_WITHDRAW_TX_NUM 17241.0 0.823560 2.577986 0.0 \n",
926 | "LOCAL_FIX_OPEN_ACC_TX_NUM 17241.0 0.159910 0.470605 0.0 \n",
927 | "LOCAL_FIX_WITHDRAW_TX_NUM 17241.0 0.061539 0.496607 0.0 \n",
928 | "LOCAL_FIX_CLOSE_ACC_TX_NUM 17241.0 0.108578 0.452035 0.0 \n",
929 | "L6M_INDFINA_ALL_TX_NUM 17241.0 0.067571 0.923849 0.0 \n",
930 | "POS_CONSUME_TX_AMT 17241.0 1050.976915 23755.577944 0.0 \n",
931 | "POS_CONSUME_TX_NUM 17241.0 0.106896 0.819569 0.0 \n",
932 | "ATM_ACCT_TX_NUM 17241.0 0.409605 4.681921 0.0 \n",
933 | "ATM_ACCT_TX_AMT 17241.0 5632.666551 208094.147877 0.0 \n",
934 | "ATM_NOT_ACCT_TX_NUM 17241.0 0.188156 0.872419 0.0 \n",
935 | "ATM_ALL_TX_NUM 17241.0 0.597761 4.974549 0.0 \n",
936 | "COUNTER_NOT_ACCT_TX_NUM 17241.0 0.322313 0.870298 0.0 \n",
937 | "COUNTER_ACCT_TX_AMT 17241.0 32726.390987 153197.540483 0.0 \n",
938 | "COUNTER_ACCT_TX_NUM 17241.0 1.303347 4.093381 0.0 \n",
939 | "COUNTER_ALL_TX_NUM 17241.0 1.625660 4.560893 0.0 \n",
940 | "NAT_DEBT_OPEN_ACC_DUR 17241.0 -1262.148367 10929.030591 -95877.0 \n",
941 | "FINA_OPEN_ACC_DUR 17241.0 -1017.557914 9825.499880 -95877.0 \n",
942 | "FUND_OPEN_ACC_DUR 17241.0 -967.307001 9583.391244 -95877.0 \n",
943 | "TELEBANK_ALL_TX_NUM 17241.0 0.000000 0.000000 0.0 \n",
944 | "CHURN_CUST_IND 17241.0 0.100980 0.301311 0.0 \n",
945 | "unan_vce_Range 17241.0 21.107650 35.917786 0.0 \n",
946 | "unan_dat_Range 17241.0 0.064961 0.899576 0.0 \n",
947 | "callfwdv_Range 17241.0 0.025927 0.809077 0.0 \n",
948 | "totcalls 17241.0 2976.027377 4147.062069 0.0 \n",
949 | "avg3mou 17241.0 546.483441 554.652467 0.0 \n",
950 | "avg3qty 17241.0 190.337799 205.371296 0.0 \n",
951 | "avg3rev 17241.0 60.086074 46.046675 1.0 \n",
952 | "\n",
953 | " 25% 50% 75% max missing_pct \n",
954 | "CUST_ID 4311.0 8621.0 12931.0 17241.0 0.0 \n",
955 | "OPEN_ACC_DUR 56.0 75.0 104.0 278.0 0.0 \n",
956 | "AGE 38.0 47.0 59.0 110.0 0.0 \n",
957 | "LOCAL_CUR_ACCT_NUM 1.0 1.0 2.0 399.0 0.0 \n",
958 | "LOCAL_CUR_TRANS_TX_NUM 0.0 0.0 0.0 429.0 0.0 \n",
959 | "LOCAL_CUR_LASTSAV_TX_NUM 0.0 0.0 0.0 112.0 0.0 \n",
960 | "LOCAL_CUR_WITHDRAW_TX_NUM 0.0 0.0 1.0 138.0 0.0 \n",
961 | "LOCAL_FIX_OPEN_ACC_TX_NUM 0.0 0.0 0.0 7.0 0.0 \n",
962 | "LOCAL_FIX_WITHDRAW_TX_NUM 0.0 0.0 0.0 32.0 0.0 \n",
963 | "LOCAL_FIX_CLOSE_ACC_TX_NUM 0.0 0.0 0.0 15.0 0.0 \n",
964 | "L6M_INDFINA_ALL_TX_NUM 0.0 0.0 0.0 82.0 0.0 \n",
965 | "POS_CONSUME_TX_AMT 0.0 0.0 0.0 1794863.0 0.0 \n",
966 | "POS_CONSUME_TX_NUM 0.0 0.0 0.0 50.0 0.0 \n",
967 | "ATM_ACCT_TX_NUM 0.0 0.0 0.0 257.0 0.0 \n",
968 | "ATM_ACCT_TX_AMT 0.0 0.0 0.0 18501490.0 0.0 \n",
969 | "ATM_NOT_ACCT_TX_NUM 0.0 0.0 0.0 31.0 0.0 \n",
970 | "ATM_ALL_TX_NUM 0.0 0.0 0.0 261.0 0.0 \n",
971 | "COUNTER_NOT_ACCT_TX_NUM 0.0 0.0 0.0 24.0 0.0 \n",
972 | "COUNTER_ACCT_TX_AMT 0.0 0.0 12000.0 6402993.0 0.0 \n",
973 | "COUNTER_ACCT_TX_NUM 0.0 0.0 2.0 307.0 0.0 \n",
974 | "COUNTER_ALL_TX_NUM 0.0 0.0 2.0 331.0 0.0 \n",
975 | "NAT_DEBT_OPEN_ACC_DUR 0.0 0.0 0.0 85.0 0.0 \n",
976 | "FINA_OPEN_ACC_DUR 0.0 0.0 0.0 24.0 0.0 \n",
977 | "FUND_OPEN_ACC_DUR 0.0 0.0 0.0 43.0 0.0 \n",
978 | "TELEBANK_ALL_TX_NUM 0.0 0.0 0.0 0.0 0.0 \n",
979 | "CHURN_CUST_IND 0.0 0.0 0.0 1.0 0.0 \n",
980 | "unan_vce_Range 4.0 11.0 25.0 1395.0 0.0 \n",
981 | "unan_dat_Range 0.0 0.0 0.0 83.0 0.0 \n",
982 | "callfwdv_Range 0.0 0.0 0.0 59.0 0.0 \n",
983 | "totcalls 853.0 1792.0 3564.0 98874.0 0.0 \n",
984 | "avg3mou 166.0 376.0 746.0 5821.0 0.0 \n",
985 | "avg3qty 59.0 131.0 249.0 3261.0 0.0 \n",
986 | "avg3rev 34.0 49.0 72.0 835.0 0.0 "
987 | ]
988 | },
989 | "execution_count": 6,
990 | "metadata": {},
991 | "output_type": "execute_result"
992 | }
993 | ],
994 | "source": [
995 | "int_df = model_data.select_dtypes(include=['int64']).describe().T.assign(missing_pct=model_data.apply(lambda x: (len(x) - x.count())/float(len(x))))\n",
996 | "int_df"
997 | ]
998 | },
999 | {
1000 | "cell_type": "code",
1001 | "execution_count": 7,
1002 | "metadata": {},
1003 | "outputs": [],
1004 | "source": [
1005 | "del model_data['CUST_ID']"
1006 | ]
1007 | },
1008 | {
1009 | "cell_type": "code",
1010 | "execution_count": 8,
1011 | "metadata": {},
1012 | "outputs": [
1013 | {
1014 | "data": {
1015 | "text/html": [
1016 | "\n",
1017 | "\n",
1030 | "
\n",
1031 | " \n",
1032 | " \n",
1033 | " | \n",
1034 | " count | \n",
1035 | " mean | \n",
1036 | " std | \n",
1037 | " min | \n",
1038 | " 25% | \n",
1039 | " 50% | \n",
1040 | " 75% | \n",
1041 | " max | \n",
1042 | " missing_pct | \n",
1043 | "
\n",
1044 | " \n",
1045 | " \n",
1046 | " \n",
1047 | " LOCAL_CUR_SAV_SLOPE | \n",
1048 | " 17241.0 | \n",
1049 | " -0.011627 | \n",
1050 | " 0.407344 | \n",
1051 | " -1.000000 | \n",
1052 | " -0.087996 | \n",
1053 | " 0.000000 | \n",
1054 | " 0.074506 | \n",
1055 | " 2.000000e+00 | \n",
1056 | " 0.000000 | \n",
1057 | "
\n",
1058 | " \n",
1059 | " totmou | \n",
1060 | " 17241.0 | \n",
1061 | " 7842.995152 | \n",
1062 | " 9244.876680 | \n",
1063 | " 0.000000 | \n",
1064 | " 2450.000000 | \n",
1065 | " 5098.000000 | \n",
1066 | " 9868.000000 | \n",
1067 | " 1.736084e+05 | \n",
1068 | " 0.000000 | \n",
1069 | "
\n",
1070 | " \n",
1071 | " mou_opkd_Mean | \n",
1072 | " 17241.0 | \n",
1073 | " 1.230280 | \n",
1074 | " 24.995118 | \n",
1075 | " 0.000000 | \n",
1076 | " 0.000000 | \n",
1077 | " 0.000000 | \n",
1078 | " 0.000000 | \n",
1079 | " 2.922043e+03 | \n",
1080 | " 0.000000 | \n",
1081 | "
\n",
1082 | " \n",
1083 | " mou_opkv_Mean | \n",
1084 | " 17241.0 | \n",
1085 | " 175.394446 | \n",
1086 | " 243.886548 | \n",
1087 | " 0.000000 | \n",
1088 | " 21.876667 | \n",
1089 | " 83.176667 | \n",
1090 | " 229.656667 | \n",
1091 | " 2.687313e+03 | \n",
1092 | " 0.000000 | \n",
1093 | "
\n",
1094 | " \n",
1095 | " opk_dat_Mean | \n",
1096 | " 17241.0 | \n",
1097 | " 0.418402 | \n",
1098 | " 4.264581 | \n",
1099 | " 0.000000 | \n",
1100 | " 0.000000 | \n",
1101 | " 0.000000 | \n",
1102 | " 0.000000 | \n",
1103 | " 2.456667e+02 | \n",
1104 | " 0.000000 | \n",
1105 | "
\n",
1106 | " \n",
1107 | " opk_vce_Mean | \n",
1108 | " 17241.0 | \n",
1109 | " 70.563444 | \n",
1110 | " 98.128628 | \n",
1111 | " 0.000000 | \n",
1112 | " 11.666667 | \n",
1113 | " 36.666667 | \n",
1114 | " 91.666667 | \n",
1115 | " 1.438000e+03 | \n",
1116 | " 0.000000 | \n",
1117 | "
\n",
1118 | " \n",
1119 | " mou_pead_Mean | \n",
1120 | " 17241.0 | \n",
1121 | " 0.697439 | \n",
1122 | " 6.788500 | \n",
1123 | " 0.000000 | \n",
1124 | " 0.000000 | \n",
1125 | " 0.000000 | \n",
1126 | " 0.000000 | \n",
1127 | " 2.902433e+02 | \n",
1128 | " 0.000000 | \n",
1129 | "
\n",
1130 | " \n",
1131 | " mou_peav_Mean | \n",
1132 | " 17241.0 | \n",
1133 | " 183.094184 | \n",
1134 | " 213.011899 | \n",
1135 | " 0.000000 | \n",
1136 | " 43.940000 | \n",
1137 | " 122.703333 | \n",
1138 | " 242.296667 | \n",
1139 | " 2.994580e+03 | \n",
1140 | " 0.000000 | \n",
1141 | "
\n",
1142 | " \n",
1143 | " peak_dat_Mean | \n",
1144 | " 17241.0 | \n",
1145 | " 0.357829 | \n",
1146 | " 3.878087 | \n",
1147 | " 0.000000 | \n",
1148 | " 0.000000 | \n",
1149 | " 0.000000 | \n",
1150 | " 0.000000 | \n",
1151 | " 2.806667e+02 | \n",
1152 | " 0.000000 | \n",
1153 | "
\n",
1154 | " \n",
1155 | " peak_vce_Mean | \n",
1156 | " 17241.0 | \n",
1157 | " 93.384316 | \n",
1158 | " 107.881729 | \n",
1159 | " 0.000000 | \n",
1160 | " 24.333333 | \n",
1161 | " 63.666667 | \n",
1162 | " 124.333333 | \n",
1163 | " 1.921333e+03 | \n",
1164 | " 0.000000 | \n",
1165 | "
\n",
1166 | " \n",
1167 | " mouiwylisv_Mean | \n",
1168 | " 17241.0 | \n",
1169 | " 19.708691 | \n",
1170 | " 42.675059 | \n",
1171 | " 0.000000 | \n",
1172 | " 0.000000 | \n",
1173 | " 3.983333 | \n",
1174 | " 20.666667 | \n",
1175 | " 9.210700e+02 | \n",
1176 | " 0.000000 | \n",
1177 | "
\n",
1178 | " \n",
1179 | " iwylis_vce_Mean | \n",
1180 | " 17241.0 | \n",
1181 | " 8.615722 | \n",
1182 | " 17.234011 | \n",
1183 | " 0.000000 | \n",
1184 | " 0.000000 | \n",
1185 | " 2.333333 | \n",
1186 | " 9.666667 | \n",
1187 | " 3.446667e+02 | \n",
1188 | " 0.000000 | \n",
1189 | "
\n",
1190 | " \n",
1191 | " mouowylisv_Mean | \n",
1192 | " 17241.0 | \n",
1193 | " 30.144799 | \n",
1194 | " 50.859667 | \n",
1195 | " 0.000000 | \n",
1196 | " 2.860000 | \n",
1197 | " 13.016667 | \n",
1198 | " 36.880000 | \n",
1199 | " 1.490253e+03 | \n",
1200 | " 0.000000 | \n",
1201 | "
\n",
1202 | " \n",
1203 | " owylis_vce_Mean | \n",
1204 | " 17241.0 | \n",
1205 | " 26.186648 | \n",
1206 | " 35.982014 | \n",
1207 | " 0.000000 | \n",
1208 | " 3.666667 | \n",
1209 | " 14.000000 | \n",
1210 | " 35.000000 | \n",
1211 | " 6.443333e+02 | \n",
1212 | " 0.000000 | \n",
1213 | "
\n",
1214 | " \n",
1215 | " mou_rvce_Mean | \n",
1216 | " 17241.0 | \n",
1217 | " 119.739875 | \n",
1218 | " 170.938653 | \n",
1219 | " 0.000000 | \n",
1220 | " 9.466667 | \n",
1221 | " 54.290000 | \n",
1222 | " 161.336667 | \n",
1223 | " 2.138510e+03 | \n",
1224 | " 0.000000 | \n",
1225 | "
\n",
1226 | " \n",
1227 | " mou_cdat_Mean | \n",
1228 | " 17241.0 | \n",
1229 | " 1.930040 | \n",
1230 | " 28.330685 | \n",
1231 | " 0.000000 | \n",
1232 | " 0.000000 | \n",
1233 | " 0.000000 | \n",
1234 | " 0.000000 | \n",
1235 | " 3.032050e+03 | \n",
1236 | " 0.000000 | \n",
1237 | "
\n",
1238 | " \n",
1239 | " unan_vce_Mean | \n",
1240 | " 17241.0 | \n",
1241 | " 28.921389 | \n",
1242 | " 39.442498 | \n",
1243 | " 0.000000 | \n",
1244 | " 5.333333 | \n",
1245 | " 16.666667 | \n",
1246 | " 37.000000 | \n",
1247 | " 8.143333e+02 | \n",
1248 | " 0.000000 | \n",
1249 | "
\n",
1250 | " \n",
1251 | " unan_dat_Mean | \n",
1252 | " 17241.0 | \n",
1253 | " 0.031417 | \n",
1254 | " 0.458737 | \n",
1255 | " 0.000000 | \n",
1256 | " 0.000000 | \n",
1257 | " 0.000000 | \n",
1258 | " 0.000000 | \n",
1259 | " 4.133333e+01 | \n",
1260 | " 0.000000 | \n",
1261 | "
\n",
1262 | " \n",
1263 | " comp_vce_Mean | \n",
1264 | " 17241.0 | \n",
1265 | " 114.545889 | \n",
1266 | " 122.214639 | \n",
1267 | " 0.000000 | \n",
1268 | " 31.666667 | \n",
1269 | " 79.666667 | \n",
1270 | " 157.333333 | \n",
1271 | " 1.376667e+03 | \n",
1272 | " 0.000000 | \n",
1273 | "
\n",
1274 | " \n",
1275 | " comp_dat_Mean | \n",
1276 | " 17241.0 | \n",
1277 | " 0.776231 | \n",
1278 | " 7.644959 | \n",
1279 | " 0.000000 | \n",
1280 | " 0.000000 | \n",
1281 | " 0.000000 | \n",
1282 | " 0.000000 | \n",
1283 | " 5.263333e+02 | \n",
1284 | " 0.000000 | \n",
1285 | "
\n",
1286 | " \n",
1287 | " custcare_Mean | \n",
1288 | " 17241.0 | \n",
1289 | " 1.949056 | \n",
1290 | " 5.958278 | \n",
1291 | " 0.000000 | \n",
1292 | " 0.000000 | \n",
1293 | " 0.000000 | \n",
1294 | " 1.666667 | \n",
1295 | " 3.656667e+02 | \n",
1296 | " 0.000000 | \n",
1297 | "
\n",
1298 | " \n",
1299 | " cc_mou_Mean | \n",
1300 | " 17241.0 | \n",
1301 | " 3.995940 | \n",
1302 | " 11.376720 | \n",
1303 | " 0.000000 | \n",
1304 | " 0.000000 | \n",
1305 | " 0.000000 | \n",
1306 | " 3.190000 | \n",
1307 | " 3.091133e+02 | \n",
1308 | " 0.000000 | \n",
1309 | "
\n",
1310 | " \n",
1311 | " avgmou | \n",
1312 | " 17241.0 | \n",
1313 | " 494.788836 | \n",
1314 | " 452.298412 | \n",
1315 | " 0.000000 | \n",
1316 | " 179.710000 | \n",
1317 | " 365.000000 | \n",
1318 | " 667.330000 | \n",
1319 | " 5.164290e+03 | \n",
1320 | " 0.000000 | \n",
1321 | "
\n",
1322 | " \n",
1323 | " avgqty | \n",
1324 | " 17241.0 | \n",
1325 | " 179.168816 | \n",
1326 | " 179.200014 | \n",
1327 | " 0.000000 | \n",
1328 | " 64.300000 | \n",
1329 | " 129.170000 | \n",
1330 | " 234.680000 | \n",
1331 | " 2.500890e+03 | \n",
1332 | " 0.000000 | \n",
1333 | "
\n",
1334 | " \n",
1335 | " L6M_INDFINA_ALL_TX_AMT | \n",
1336 | " 17241.0 | \n",
1337 | " 4303.363895 | \n",
1338 | " 76935.023181 | \n",
1339 | " 0.000000 | \n",
1340 | " 0.000000 | \n",
1341 | " 0.000000 | \n",
1342 | " 0.000000 | \n",
1343 | " 3.987167e+06 | \n",
1344 | " 0.000000 | \n",
1345 | "
\n",
1346 | " \n",
1347 | " LOCAL_FIX_CLOSE_ACC_TX_AMT | \n",
1348 | " 17241.0 | \n",
1349 | " 3453.773522 | \n",
1350 | " 20665.105788 | \n",
1351 | " 0.000000 | \n",
1352 | " 0.000000 | \n",
1353 | " 0.000000 | \n",
1354 | " 0.000000 | \n",
1355 | " 7.945000e+05 | \n",
1356 | " 0.000000 | \n",
1357 | "
\n",
1358 | " \n",
1359 | " LOCAL_CUR_MON_AVG_BAL | \n",
1360 | " 17241.0 | \n",
1361 | " 27735.417899 | \n",
1362 | " 98608.195518 | \n",
1363 | " 0.000000 | \n",
1364 | " 51.530000 | \n",
1365 | " 3232.020000 | \n",
1366 | " 23113.460000 | \n",
1367 | " 7.610110e+06 | \n",
1368 | " 0.000000 | \n",
1369 | "
\n",
1370 | " \n",
1371 | " LOCAL_CUR_MON_AVG_BAL_PROP | \n",
1372 | " 17241.0 | \n",
1373 | " 0.373216 | \n",
1374 | " 0.438130 | \n",
1375 | " 0.000000 | \n",
1376 | " 0.005796 | \n",
1377 | " 0.091914 | \n",
1378 | " 1.000000 | \n",
1379 | " 1.000000e+00 | \n",
1380 | " 0.000000 | \n",
1381 | "
\n",
1382 | " \n",
1383 | " LOCAL_OVEONEYR_FF_MON_AVG_BAL | \n",
1384 | " 17241.0 | \n",
1385 | " 72023.920103 | \n",
1386 | " 128981.784041 | \n",
1387 | " 0.000000 | \n",
1388 | " 0.000000 | \n",
1389 | " 50000.000000 | \n",
1390 | " 95000.000000 | \n",
1391 | " 5.561334e+06 | \n",
1392 | " 0.000000 | \n",
1393 | "
\n",
1394 | " \n",
1395 | " LOCAL_FIX_MON_AVG_BAL | \n",
1396 | " 17241.0 | \n",
1397 | " 83907.649074 | \n",
1398 | " 360269.646860 | \n",
1399 | " 0.000000 | \n",
1400 | " 0.000000 | \n",
1401 | " 56677.270000 | \n",
1402 | " 101125.000000 | \n",
1403 | " 4.358232e+07 | \n",
1404 | " 0.000000 | \n",
1405 | "
\n",
1406 | " \n",
1407 | " ... | \n",
1408 | " ... | \n",
1409 | " ... | \n",
1410 | " ... | \n",
1411 | " ... | \n",
1412 | " ... | \n",
1413 | " ... | \n",
1414 | " ... | \n",
1415 | " ... | \n",
1416 | " ... | \n",
1417 | "
\n",
1418 | " \n",
1419 | " LOCAL_FIX_OPEN_ACC_TX_AMT | \n",
1420 | " 17241.0 | \n",
1421 | " 5528.391594 | \n",
1422 | " 33922.675509 | \n",
1423 | " 0.000000 | \n",
1424 | " 0.000000 | \n",
1425 | " 0.000000 | \n",
1426 | " 0.000000 | \n",
1427 | " 2.586000e+06 | \n",
1428 | " 0.000000 | \n",
1429 | "
\n",
1430 | " \n",
1431 | " LOCAL_FIX_WITHDRAW_TX_AMT | \n",
1432 | " 17241.0 | \n",
1433 | " 1706.323595 | \n",
1434 | " 18679.511773 | \n",
1435 | " 0.000000 | \n",
1436 | " 0.000000 | \n",
1437 | " 0.000000 | \n",
1438 | " 0.000000 | \n",
1439 | " 1.398000e+06 | \n",
1440 | " 0.000000 | \n",
1441 | "
\n",
1442 | " \n",
1443 | " SAV_SLOPE | \n",
1444 | " 17241.0 | \n",
1445 | " -0.004043 | \n",
1446 | " 0.256375 | \n",
1447 | " -1.000000 | \n",
1448 | " -0.000550 | \n",
1449 | " 0.000533 | \n",
1450 | " 0.035337 | \n",
1451 | " 1.904013e+00 | \n",
1452 | " 0.000000 | \n",
1453 | "
\n",
1454 | " \n",
1455 | " mou_cvce_Mean | \n",
1456 | " 17241.0 | \n",
1457 | " 238.802099 | \n",
1458 | " 267.274943 | \n",
1459 | " 0.000000 | \n",
1460 | " 56.900000 | \n",
1461 | " 157.183333 | \n",
1462 | " 329.523333 | \n",
1463 | " 3.661677e+03 | \n",
1464 | " 0.000000 | \n",
1465 | "
\n",
1466 | " \n",
1467 | " vceovr_Range | \n",
1468 | " 17205.0 | \n",
1469 | " 29.374902 | \n",
1470 | " 58.098543 | \n",
1471 | " 0.000000 | \n",
1472 | " 0.000000 | \n",
1473 | " 2.100000 | \n",
1474 | " 37.000000 | \n",
1475 | " 1.215550e+03 | \n",
1476 | " 0.002088 | \n",
1477 | "
\n",
1478 | " \n",
1479 | " vceovr_Mean | \n",
1480 | " 17205.0 | \n",
1481 | " 12.848716 | \n",
1482 | " 29.276031 | \n",
1483 | " 0.000000 | \n",
1484 | " 0.000000 | \n",
1485 | " 0.600000 | \n",
1486 | " 13.275000 | \n",
1487 | " 6.012125e+02 | \n",
1488 | " 0.002088 | \n",
1489 | "
\n",
1490 | " \n",
1491 | " roam_Range | \n",
1492 | " 17205.0 | \n",
1493 | " 3.127544 | \n",
1494 | " 19.692447 | \n",
1495 | " 0.000000 | \n",
1496 | " 0.000000 | \n",
1497 | " 0.000000 | \n",
1498 | " 0.780000 | \n",
1499 | " 1.561470e+03 | \n",
1500 | " 0.002088 | \n",
1501 | "
\n",
1502 | " \n",
1503 | " datovr_Range | \n",
1504 | " 17205.0 | \n",
1505 | " 0.717182 | \n",
1506 | " 5.964260 | \n",
1507 | " 0.000000 | \n",
1508 | " 0.000000 | \n",
1509 | " 0.000000 | \n",
1510 | " 0.000000 | \n",
1511 | " 3.032000e+02 | \n",
1512 | " 0.002088 | \n",
1513 | "
\n",
1514 | " \n",
1515 | " da_Mean | \n",
1516 | " 17205.0 | \n",
1517 | " 0.920998 | \n",
1518 | " 2.118237 | \n",
1519 | " 0.000000 | \n",
1520 | " 0.000000 | \n",
1521 | " 0.247500 | \n",
1522 | " 0.990000 | \n",
1523 | " 5.766750e+01 | \n",
1524 | " 0.002088 | \n",
1525 | "
\n",
1526 | " \n",
1527 | " ovrrev_Range | \n",
1528 | " 17205.0 | \n",
1529 | " 29.844381 | \n",
1530 | " 58.365586 | \n",
1531 | " 0.000000 | \n",
1532 | " 0.000000 | \n",
1533 | " 3.300000 | \n",
1534 | " 37.450000 | \n",
1535 | " 1.215550e+03 | \n",
1536 | " 0.002088 | \n",
1537 | "
\n",
1538 | " \n",
1539 | " ovrmou_Range | \n",
1540 | " 17205.0 | \n",
1541 | " 89.809416 | \n",
1542 | " 177.656676 | \n",
1543 | " 0.000000 | \n",
1544 | " 0.000000 | \n",
1545 | " 9.000000 | \n",
1546 | " 112.000000 | \n",
1547 | " 3.473000e+03 | \n",
1548 | " 0.002088 | \n",
1549 | "
\n",
1550 | " \n",
1551 | " da_Range | \n",
1552 | " 17205.0 | \n",
1553 | " 1.671578 | \n",
1554 | " 2.987940 | \n",
1555 | " 0.000000 | \n",
1556 | " 0.000000 | \n",
1557 | " 0.990000 | \n",
1558 | " 1.980000 | \n",
1559 | " 6.732000e+01 | \n",
1560 | " 0.002088 | \n",
1561 | "
\n",
1562 | " \n",
1563 | " totmrc_Range | \n",
1564 | " 17205.0 | \n",
1565 | " 8.801175 | \n",
1566 | " 27.175065 | \n",
1567 | " 0.000000 | \n",
1568 | " 0.000000 | \n",
1569 | " 0.000000 | \n",
1570 | " 0.000000 | \n",
1571 | " 5.999800e+02 | \n",
1572 | " 0.002088 | \n",
1573 | "
\n",
1574 | " \n",
1575 | " mou_Range | \n",
1576 | " 17205.0 | \n",
1577 | " 369.560622 | \n",
1578 | " 420.042160 | \n",
1579 | " 0.000000 | \n",
1580 | " 116.000000 | \n",
1581 | " 242.000000 | \n",
1582 | " 468.000000 | \n",
1583 | " 6.865000e+03 | \n",
1584 | " 0.002088 | \n",
1585 | "
\n",
1586 | " \n",
1587 | " rev_Range | \n",
1588 | " 17205.0 | \n",
1589 | " 42.799289 | \n",
1590 | " 70.035631 | \n",
1591 | " 0.000000 | \n",
1592 | " 1.980000 | \n",
1593 | " 15.750000 | \n",
1594 | " 55.570000 | \n",
1595 | " 1.527970e+03 | \n",
1596 | " 0.002088 | \n",
1597 | "
\n",
1598 | " \n",
1599 | " roam_Mean | \n",
1600 | " 17205.0 | \n",
1601 | " 1.142233 | \n",
1602 | " 6.544958 | \n",
1603 | " 0.000000 | \n",
1604 | " 0.000000 | \n",
1605 | " 0.000000 | \n",
1606 | " 0.257500 | \n",
1607 | " 4.162575e+02 | \n",
1608 | " 0.002088 | \n",
1609 | "
\n",
1610 | " \n",
1611 | " datovr_Mean | \n",
1612 | " 17205.0 | \n",
1613 | " 0.249300 | \n",
1614 | " 2.132651 | \n",
1615 | " 0.000000 | \n",
1616 | " 0.000000 | \n",
1617 | " 0.000000 | \n",
1618 | " 0.000000 | \n",
1619 | " 1.007000e+02 | \n",
1620 | " 0.002088 | \n",
1621 | "
\n",
1622 | " \n",
1623 | " ovrmou_Mean | \n",
1624 | " 17205.0 | \n",
1625 | " 39.815027 | \n",
1626 | " 92.879658 | \n",
1627 | " 0.000000 | \n",
1628 | " 0.000000 | \n",
1629 | " 2.750000 | \n",
1630 | " 41.000000 | \n",
1631 | " 1.887250e+03 | \n",
1632 | " 0.002088 | \n",
1633 | "
\n",
1634 | " \n",
1635 | " ovrrev_Mean | \n",
1636 | " 17205.0 | \n",
1637 | " 13.101876 | \n",
1638 | " 29.493960 | \n",
1639 | " 0.000000 | \n",
1640 | " 0.000000 | \n",
1641 | " 0.975000 | \n",
1642 | " 13.700000 | \n",
1643 | " 6.012125e+02 | \n",
1644 | " 0.002088 | \n",
1645 | "
\n",
1646 | " \n",
1647 | " change_mou | \n",
1648 | " 17153.0 | \n",
1649 | " -8.395409 | \n",
1650 | " 248.516433 | \n",
1651 | " -3875.000000 | \n",
1652 | " -78.250000 | \n",
1653 | " -3.500000 | \n",
1654 | " 70.000000 | \n",
1655 | " 3.712250e+03 | \n",
1656 | " 0.005104 | \n",
1657 | "
\n",
1658 | " \n",
1659 | " change_rev | \n",
1660 | " 17153.0 | \n",
1661 | " -1.693052 | \n",
1662 | " 34.284273 | \n",
1663 | " -626.357500 | \n",
1664 | " -7.082500 | \n",
1665 | " -0.312500 | \n",
1666 | " 1.612500 | \n",
1667 | " 4.191725e+02 | \n",
1668 | " 0.005104 | \n",
1669 | "
\n",
1670 | " \n",
1671 | " hnd_price | \n",
1672 | " 17043.0 | \n",
1673 | " 106.805692 | \n",
1674 | " 61.622379 | \n",
1675 | " 9.989998 | \n",
1676 | " 59.989990 | \n",
1677 | " 129.989990 | \n",
1678 | " 149.989990 | \n",
1679 | " 4.999900e+02 | \n",
1680 | " 0.011484 | \n",
1681 | "
\n",
1682 | " \n",
1683 | " forgntvl | \n",
1684 | " 16935.0 | \n",
1685 | " 0.058636 | \n",
1686 | " 0.234949 | \n",
1687 | " 0.000000 | \n",
1688 | " 0.000000 | \n",
1689 | " 0.000000 | \n",
1690 | " 0.000000 | \n",
1691 | " 1.000000e+00 | \n",
1692 | " 0.017748 | \n",
1693 | "
\n",
1694 | " \n",
1695 | " age1 | \n",
1696 | " 16935.0 | \n",
1697 | " 31.669324 | \n",
1698 | " 22.129690 | \n",
1699 | " 0.000000 | \n",
1700 | " 0.000000 | \n",
1701 | " 36.000000 | \n",
1702 | " 48.000000 | \n",
1703 | " 9.600000e+01 | \n",
1704 | " 0.017748 | \n",
1705 | "
\n",
1706 | " \n",
1707 | " age2 | \n",
1708 | " 16935.0 | \n",
1709 | " 21.225037 | \n",
1710 | " 23.903794 | \n",
1711 | " 0.000000 | \n",
1712 | " 0.000000 | \n",
1713 | " 0.000000 | \n",
1714 | " 42.000000 | \n",
1715 | " 9.800000e+01 | \n",
1716 | " 0.017748 | \n",
1717 | "
\n",
1718 | " \n",
1719 | " avg6qty | \n",
1720 | " 16688.0 | \n",
1721 | " 187.114334 | \n",
1722 | " 195.036224 | \n",
1723 | " 0.000000 | \n",
1724 | " 61.000000 | \n",
1725 | " 131.000000 | \n",
1726 | " 246.000000 | \n",
1727 | " 2.673000e+03 | \n",
1728 | " 0.032075 | \n",
1729 | "
\n",
1730 | " \n",
1731 | " avg6mou | \n",
1732 | " 16688.0 | \n",
1733 | " 531.920662 | \n",
1734 | " 517.294774 | \n",
1735 | " 0.000000 | \n",
1736 | " 173.000000 | \n",
1737 | " 374.000000 | \n",
1738 | " 726.250000 | \n",
1739 | " 5.347000e+03 | \n",
1740 | " 0.032075 | \n",
1741 | "
\n",
1742 | " \n",
1743 | " lor | \n",
1744 | " 12230.0 | \n",
1745 | " 6.377514 | \n",
1746 | " 4.781650 | \n",
1747 | " 0.000000 | \n",
1748 | " 2.000000 | \n",
1749 | " 5.000000 | \n",
1750 | " 10.000000 | \n",
1751 | " 1.500000e+01 | \n",
1752 | " 0.290644 | \n",
1753 | "
\n",
1754 | " \n",
1755 | " pre_hnd_price | \n",
1756 | " 7649.0 | \n",
1757 | " 82.761595 | \n",
1758 | " 60.803160 | \n",
1759 | " 9.989998 | \n",
1760 | " 29.989990 | \n",
1761 | " 59.989990 | \n",
1762 | " 129.989990 | \n",
1763 | " 4.999900e+02 | \n",
1764 | " 0.556348 | \n",
1765 | "
\n",
1766 | " \n",
1767 | " educ1 | \n",
1768 | " 2419.0 | \n",
1769 | " 1.868541 | \n",
1770 | " 0.847854 | \n",
1771 | " 1.000000 | \n",
1772 | " 1.000000 | \n",
1773 | " 2.000000 | \n",
1774 | " 2.000000 | \n",
1775 | " 4.000000e+00 | \n",
1776 | " 0.859695 | \n",
1777 | "
\n",
1778 | " \n",
1779 | "
\n",
1780 | "
76 rows × 9 columns
\n",
1781 | "
"
1782 | ],
1783 | "text/plain": [
1784 | " count mean std \\\n",
1785 | "LOCAL_CUR_SAV_SLOPE 17241.0 -0.011627 0.407344 \n",
1786 | "totmou 17241.0 7842.995152 9244.876680 \n",
1787 | "mou_opkd_Mean 17241.0 1.230280 24.995118 \n",
1788 | "mou_opkv_Mean 17241.0 175.394446 243.886548 \n",
1789 | "opk_dat_Mean 17241.0 0.418402 4.264581 \n",
1790 | "opk_vce_Mean 17241.0 70.563444 98.128628 \n",
1791 | "mou_pead_Mean 17241.0 0.697439 6.788500 \n",
1792 | "mou_peav_Mean 17241.0 183.094184 213.011899 \n",
1793 | "peak_dat_Mean 17241.0 0.357829 3.878087 \n",
1794 | "peak_vce_Mean 17241.0 93.384316 107.881729 \n",
1795 | "mouiwylisv_Mean 17241.0 19.708691 42.675059 \n",
1796 | "iwylis_vce_Mean 17241.0 8.615722 17.234011 \n",
1797 | "mouowylisv_Mean 17241.0 30.144799 50.859667 \n",
1798 | "owylis_vce_Mean 17241.0 26.186648 35.982014 \n",
1799 | "mou_rvce_Mean 17241.0 119.739875 170.938653 \n",
1800 | "mou_cdat_Mean 17241.0 1.930040 28.330685 \n",
1801 | "unan_vce_Mean 17241.0 28.921389 39.442498 \n",
1802 | "unan_dat_Mean 17241.0 0.031417 0.458737 \n",
1803 | "comp_vce_Mean 17241.0 114.545889 122.214639 \n",
1804 | "comp_dat_Mean 17241.0 0.776231 7.644959 \n",
1805 | "custcare_Mean 17241.0 1.949056 5.958278 \n",
1806 | "cc_mou_Mean 17241.0 3.995940 11.376720 \n",
1807 | "avgmou 17241.0 494.788836 452.298412 \n",
1808 | "avgqty 17241.0 179.168816 179.200014 \n",
1809 | "L6M_INDFINA_ALL_TX_AMT 17241.0 4303.363895 76935.023181 \n",
1810 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 17241.0 3453.773522 20665.105788 \n",
1811 | "LOCAL_CUR_MON_AVG_BAL 17241.0 27735.417899 98608.195518 \n",
1812 | "LOCAL_CUR_MON_AVG_BAL_PROP 17241.0 0.373216 0.438130 \n",
1813 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 17241.0 72023.920103 128981.784041 \n",
1814 | "LOCAL_FIX_MON_AVG_BAL 17241.0 83907.649074 360269.646860 \n",
1815 | "... ... ... ... \n",
1816 | "LOCAL_FIX_OPEN_ACC_TX_AMT 17241.0 5528.391594 33922.675509 \n",
1817 | "LOCAL_FIX_WITHDRAW_TX_AMT 17241.0 1706.323595 18679.511773 \n",
1818 | "SAV_SLOPE 17241.0 -0.004043 0.256375 \n",
1819 | "mou_cvce_Mean 17241.0 238.802099 267.274943 \n",
1820 | "vceovr_Range 17205.0 29.374902 58.098543 \n",
1821 | "vceovr_Mean 17205.0 12.848716 29.276031 \n",
1822 | "roam_Range 17205.0 3.127544 19.692447 \n",
1823 | "datovr_Range 17205.0 0.717182 5.964260 \n",
1824 | "da_Mean 17205.0 0.920998 2.118237 \n",
1825 | "ovrrev_Range 17205.0 29.844381 58.365586 \n",
1826 | "ovrmou_Range 17205.0 89.809416 177.656676 \n",
1827 | "da_Range 17205.0 1.671578 2.987940 \n",
1828 | "totmrc_Range 17205.0 8.801175 27.175065 \n",
1829 | "mou_Range 17205.0 369.560622 420.042160 \n",
1830 | "rev_Range 17205.0 42.799289 70.035631 \n",
1831 | "roam_Mean 17205.0 1.142233 6.544958 \n",
1832 | "datovr_Mean 17205.0 0.249300 2.132651 \n",
1833 | "ovrmou_Mean 17205.0 39.815027 92.879658 \n",
1834 | "ovrrev_Mean 17205.0 13.101876 29.493960 \n",
1835 | "change_mou 17153.0 -8.395409 248.516433 \n",
1836 | "change_rev 17153.0 -1.693052 34.284273 \n",
1837 | "hnd_price 17043.0 106.805692 61.622379 \n",
1838 | "forgntvl 16935.0 0.058636 0.234949 \n",
1839 | "age1 16935.0 31.669324 22.129690 \n",
1840 | "age2 16935.0 21.225037 23.903794 \n",
1841 | "avg6qty 16688.0 187.114334 195.036224 \n",
1842 | "avg6mou 16688.0 531.920662 517.294774 \n",
1843 | "lor 12230.0 6.377514 4.781650 \n",
1844 | "pre_hnd_price 7649.0 82.761595 60.803160 \n",
1845 | "educ1 2419.0 1.868541 0.847854 \n",
1846 | "\n",
1847 | " min 25% 50% \\\n",
1848 | "LOCAL_CUR_SAV_SLOPE -1.000000 -0.087996 0.000000 \n",
1849 | "totmou 0.000000 2450.000000 5098.000000 \n",
1850 | "mou_opkd_Mean 0.000000 0.000000 0.000000 \n",
1851 | "mou_opkv_Mean 0.000000 21.876667 83.176667 \n",
1852 | "opk_dat_Mean 0.000000 0.000000 0.000000 \n",
1853 | "opk_vce_Mean 0.000000 11.666667 36.666667 \n",
1854 | "mou_pead_Mean 0.000000 0.000000 0.000000 \n",
1855 | "mou_peav_Mean 0.000000 43.940000 122.703333 \n",
1856 | "peak_dat_Mean 0.000000 0.000000 0.000000 \n",
1857 | "peak_vce_Mean 0.000000 24.333333 63.666667 \n",
1858 | "mouiwylisv_Mean 0.000000 0.000000 3.983333 \n",
1859 | "iwylis_vce_Mean 0.000000 0.000000 2.333333 \n",
1860 | "mouowylisv_Mean 0.000000 2.860000 13.016667 \n",
1861 | "owylis_vce_Mean 0.000000 3.666667 14.000000 \n",
1862 | "mou_rvce_Mean 0.000000 9.466667 54.290000 \n",
1863 | "mou_cdat_Mean 0.000000 0.000000 0.000000 \n",
1864 | "unan_vce_Mean 0.000000 5.333333 16.666667 \n",
1865 | "unan_dat_Mean 0.000000 0.000000 0.000000 \n",
1866 | "comp_vce_Mean 0.000000 31.666667 79.666667 \n",
1867 | "comp_dat_Mean 0.000000 0.000000 0.000000 \n",
1868 | "custcare_Mean 0.000000 0.000000 0.000000 \n",
1869 | "cc_mou_Mean 0.000000 0.000000 0.000000 \n",
1870 | "avgmou 0.000000 179.710000 365.000000 \n",
1871 | "avgqty 0.000000 64.300000 129.170000 \n",
1872 | "L6M_INDFINA_ALL_TX_AMT 0.000000 0.000000 0.000000 \n",
1873 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 0.000000 0.000000 0.000000 \n",
1874 | "LOCAL_CUR_MON_AVG_BAL 0.000000 51.530000 3232.020000 \n",
1875 | "LOCAL_CUR_MON_AVG_BAL_PROP 0.000000 0.005796 0.091914 \n",
1876 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 0.000000 0.000000 50000.000000 \n",
1877 | "LOCAL_FIX_MON_AVG_BAL 0.000000 0.000000 56677.270000 \n",
1878 | "... ... ... ... \n",
1879 | "LOCAL_FIX_OPEN_ACC_TX_AMT 0.000000 0.000000 0.000000 \n",
1880 | "LOCAL_FIX_WITHDRAW_TX_AMT 0.000000 0.000000 0.000000 \n",
1881 | "SAV_SLOPE -1.000000 -0.000550 0.000533 \n",
1882 | "mou_cvce_Mean 0.000000 56.900000 157.183333 \n",
1883 | "vceovr_Range 0.000000 0.000000 2.100000 \n",
1884 | "vceovr_Mean 0.000000 0.000000 0.600000 \n",
1885 | "roam_Range 0.000000 0.000000 0.000000 \n",
1886 | "datovr_Range 0.000000 0.000000 0.000000 \n",
1887 | "da_Mean 0.000000 0.000000 0.247500 \n",
1888 | "ovrrev_Range 0.000000 0.000000 3.300000 \n",
1889 | "ovrmou_Range 0.000000 0.000000 9.000000 \n",
1890 | "da_Range 0.000000 0.000000 0.990000 \n",
1891 | "totmrc_Range 0.000000 0.000000 0.000000 \n",
1892 | "mou_Range 0.000000 116.000000 242.000000 \n",
1893 | "rev_Range 0.000000 1.980000 15.750000 \n",
1894 | "roam_Mean 0.000000 0.000000 0.000000 \n",
1895 | "datovr_Mean 0.000000 0.000000 0.000000 \n",
1896 | "ovrmou_Mean 0.000000 0.000000 2.750000 \n",
1897 | "ovrrev_Mean 0.000000 0.000000 0.975000 \n",
1898 | "change_mou -3875.000000 -78.250000 -3.500000 \n",
1899 | "change_rev -626.357500 -7.082500 -0.312500 \n",
1900 | "hnd_price 9.989998 59.989990 129.989990 \n",
1901 | "forgntvl 0.000000 0.000000 0.000000 \n",
1902 | "age1 0.000000 0.000000 36.000000 \n",
1903 | "age2 0.000000 0.000000 0.000000 \n",
1904 | "avg6qty 0.000000 61.000000 131.000000 \n",
1905 | "avg6mou 0.000000 173.000000 374.000000 \n",
1906 | "lor 0.000000 2.000000 5.000000 \n",
1907 | "pre_hnd_price 9.989998 29.989990 59.989990 \n",
1908 | "educ1 1.000000 1.000000 2.000000 \n",
1909 | "\n",
1910 | " 75% max missing_pct \n",
1911 | "LOCAL_CUR_SAV_SLOPE 0.074506 2.000000e+00 0.000000 \n",
1912 | "totmou 9868.000000 1.736084e+05 0.000000 \n",
1913 | "mou_opkd_Mean 0.000000 2.922043e+03 0.000000 \n",
1914 | "mou_opkv_Mean 229.656667 2.687313e+03 0.000000 \n",
1915 | "opk_dat_Mean 0.000000 2.456667e+02 0.000000 \n",
1916 | "opk_vce_Mean 91.666667 1.438000e+03 0.000000 \n",
1917 | "mou_pead_Mean 0.000000 2.902433e+02 0.000000 \n",
1918 | "mou_peav_Mean 242.296667 2.994580e+03 0.000000 \n",
1919 | "peak_dat_Mean 0.000000 2.806667e+02 0.000000 \n",
1920 | "peak_vce_Mean 124.333333 1.921333e+03 0.000000 \n",
1921 | "mouiwylisv_Mean 20.666667 9.210700e+02 0.000000 \n",
1922 | "iwylis_vce_Mean 9.666667 3.446667e+02 0.000000 \n",
1923 | "mouowylisv_Mean 36.880000 1.490253e+03 0.000000 \n",
1924 | "owylis_vce_Mean 35.000000 6.443333e+02 0.000000 \n",
1925 | "mou_rvce_Mean 161.336667 2.138510e+03 0.000000 \n",
1926 | "mou_cdat_Mean 0.000000 3.032050e+03 0.000000 \n",
1927 | "unan_vce_Mean 37.000000 8.143333e+02 0.000000 \n",
1928 | "unan_dat_Mean 0.000000 4.133333e+01 0.000000 \n",
1929 | "comp_vce_Mean 157.333333 1.376667e+03 0.000000 \n",
1930 | "comp_dat_Mean 0.000000 5.263333e+02 0.000000 \n",
1931 | "custcare_Mean 1.666667 3.656667e+02 0.000000 \n",
1932 | "cc_mou_Mean 3.190000 3.091133e+02 0.000000 \n",
1933 | "avgmou 667.330000 5.164290e+03 0.000000 \n",
1934 | "avgqty 234.680000 2.500890e+03 0.000000 \n",
1935 | "L6M_INDFINA_ALL_TX_AMT 0.000000 3.987167e+06 0.000000 \n",
1936 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 0.000000 7.945000e+05 0.000000 \n",
1937 | "LOCAL_CUR_MON_AVG_BAL 23113.460000 7.610110e+06 0.000000 \n",
1938 | "LOCAL_CUR_MON_AVG_BAL_PROP 1.000000 1.000000e+00 0.000000 \n",
1939 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 95000.000000 5.561334e+06 0.000000 \n",
1940 | "LOCAL_FIX_MON_AVG_BAL 101125.000000 4.358232e+07 0.000000 \n",
1941 | "... ... ... ... \n",
1942 | "LOCAL_FIX_OPEN_ACC_TX_AMT 0.000000 2.586000e+06 0.000000 \n",
1943 | "LOCAL_FIX_WITHDRAW_TX_AMT 0.000000 1.398000e+06 0.000000 \n",
1944 | "SAV_SLOPE 0.035337 1.904013e+00 0.000000 \n",
1945 | "mou_cvce_Mean 329.523333 3.661677e+03 0.000000 \n",
1946 | "vceovr_Range 37.000000 1.215550e+03 0.002088 \n",
1947 | "vceovr_Mean 13.275000 6.012125e+02 0.002088 \n",
1948 | "roam_Range 0.780000 1.561470e+03 0.002088 \n",
1949 | "datovr_Range 0.000000 3.032000e+02 0.002088 \n",
1950 | "da_Mean 0.990000 5.766750e+01 0.002088 \n",
1951 | "ovrrev_Range 37.450000 1.215550e+03 0.002088 \n",
1952 | "ovrmou_Range 112.000000 3.473000e+03 0.002088 \n",
1953 | "da_Range 1.980000 6.732000e+01 0.002088 \n",
1954 | "totmrc_Range 0.000000 5.999800e+02 0.002088 \n",
1955 | "mou_Range 468.000000 6.865000e+03 0.002088 \n",
1956 | "rev_Range 55.570000 1.527970e+03 0.002088 \n",
1957 | "roam_Mean 0.257500 4.162575e+02 0.002088 \n",
1958 | "datovr_Mean 0.000000 1.007000e+02 0.002088 \n",
1959 | "ovrmou_Mean 41.000000 1.887250e+03 0.002088 \n",
1960 | "ovrrev_Mean 13.700000 6.012125e+02 0.002088 \n",
1961 | "change_mou 70.000000 3.712250e+03 0.005104 \n",
1962 | "change_rev 1.612500 4.191725e+02 0.005104 \n",
1963 | "hnd_price 149.989990 4.999900e+02 0.011484 \n",
1964 | "forgntvl 0.000000 1.000000e+00 0.017748 \n",
1965 | "age1 48.000000 9.600000e+01 0.017748 \n",
1966 | "age2 42.000000 9.800000e+01 0.017748 \n",
1967 | "avg6qty 246.000000 2.673000e+03 0.032075 \n",
1968 | "avg6mou 726.250000 5.347000e+03 0.032075 \n",
1969 | "lor 10.000000 1.500000e+01 0.290644 \n",
1970 | "pre_hnd_price 129.989990 4.999900e+02 0.556348 \n",
1971 | "educ1 2.000000 4.000000e+00 0.859695 \n",
1972 | "\n",
1973 | "[76 rows x 9 columns]"
1974 | ]
1975 | },
1976 | "execution_count": 8,
1977 | "metadata": {},
1978 | "output_type": "execute_result"
1979 | }
1980 | ],
1981 | "source": [
1982 | "float_df = model_data.select_dtypes(include=['float']).describe().T.assign(missing_pct=model_data.apply(lambda x: (len(x)-x.count())/float(len(x))))\n",
1983 | "float_df = float_df.sort_values(['missing_pct'])\n",
1984 | "float_df"
1985 | ]
1986 | },
1987 | {
1988 | "cell_type": "code",
1989 | "execution_count": 9,
1990 | "metadata": {},
1991 | "outputs": [],
1992 | "source": [
1993 | "# 特征构造\n",
1994 | "model_data['AVG_LOCAL_CUR_TRANS_TX_AMT'] = model_data['LOCAL_CUR_TRANS_TX_AMT'] / model_data['LOCAL_CUR_TRANS_TX_NUM']\n",
1995 | "model_data['AVG_LOCAL_CUR_LASTSAV_TX_AMT'] = model_data['LOCAL_CUR_LASTSAV_TX_AMT'] / model_data['LOCAL_CUR_LASTSAV_TX_NUM']"
1996 | ]
1997 | },
1998 | {
1999 | "cell_type": "code",
2000 | "execution_count": 10,
2001 | "metadata": {},
2002 | "outputs": [],
2003 | "source": [
2004 | "maxValueFeatures = ['LOCAL_CUR_SAV_SLOPE','LOCAL_BELONEYR_FF_SLOPE','LOCAL_OVEONEYR_FF_SLOPE','LOCAL_SAV_SLOPE','SAV_SLOPE']\n",
2005 | "model_data['volatilityMax']= model_data[maxValueFeatures].apply(max, axis =1)"
2006 | ]
2007 | },
2008 | {
2009 | "cell_type": "code",
2010 | "execution_count": 11,
2011 | "metadata": {},
2012 | "outputs": [],
2013 | "source": [
2014 | "# 本币活期月日均余额占比 = 1 - 本币定期月日均余额占比\n",
2015 | "# 删除冗余特征\n",
2016 | "del model_data['LOCAL_CUR_MON_AVG_BAL_PROP']"
2017 | ]
2018 | },
2019 | {
2020 | "cell_type": "code",
2021 | "execution_count": 12,
2022 | "metadata": {
2023 | "collapsed": true
2024 | },
2025 | "outputs": [],
2026 | "source": [
2027 | "# 某些特征可以相加\n",
2028 | "sumup_cols0 = ['LOCAL_CUR_MON_AVG_BAL','LOCAL_FIX_MON_AVG_BAL']\n",
2029 | "sumup_cols1 = ['LOCAL_CUR_WITHDRAW_TX_NUM','LOCAL_FIX_WITHDRAW_TX_NUM']\n",
2030 | "sumup_cols2 = ['LOCAL_CUR_WITHDRAW_TX_AMT','LOCAL_FIX_WITHDRAW_TX_AMT']\n",
2031 | "sumup_cols3 = ['COUNTER_NOT_ACCT_TX_NUM','COUNTER_ACCT_TX_NUM']\n",
2032 | "sumup_cols4 = ['ATM_ALL_TX_NUM','COUNTER_ALL_TX_NUM']\n",
2033 | "sumup_cols5 = ['ATM_ACCT_TX_NUM','COUNTER_ACCT_TX_NUM']\n",
2034 | "sumup_cols6 = ['ATM_ACCT_TX_AMT','COUNTER_ACCT_TX_AMT']\n",
2035 | "sumup_cols7 = ['ATM_NOT_ACCT_TX_NUM','COUNTER_NOT_ACCT_TX_NUM']\n",
2036 | "\n",
2037 | "model_data['TOTAL_LOCAL_MON_AVG_BAL'] = model_data[sumup_cols0].apply(sum, axis = 1)\n",
2038 | "model_data['TOTAL_WITHDRAW_TX_NUM'] = model_data[sumup_cols1].apply(sum, axis = 1)\n",
2039 | "model_data['TOTAL_WITHDRAW_TX_AMT'] = model_data[sumup_cols2].apply(sum, axis = 1)\n",
2040 | "model_data['TOTAL_COUNTER_TX_NUM'] = model_data[sumup_cols3].apply(sum, axis = 1)\n",
2041 | "model_data['TOTAL_ALL_TX_NUM'] = model_data[sumup_cols4].apply(sum, axis = 1)\n",
2042 | "model_data['TOTAL_ACCT_TX_NUM'] = model_data[sumup_cols5].apply(sum, axis = 1)\n",
2043 | "model_data['TOTAL_ACCT_TX_AMT'] = model_data[sumup_cols6].apply(sum, axis = 1)\n",
2044 | "model_data['TOTAL_NOT_ACCT_TX_NUM'] = model_data[sumup_cols7].apply(sum, axis = 1)"
2045 | ]
2046 | },
2047 | {
2048 | "cell_type": "code",
2049 | "execution_count": 13,
2050 | "metadata": {},
2051 | "outputs": [
2052 | {
2053 | "data": {
2054 | "text/plain": [
2055 | "(17241, 133)"
2056 | ]
2057 | },
2058 | "execution_count": 13,
2059 | "metadata": {},
2060 | "output_type": "execute_result"
2061 | }
2062 | ],
2063 | "source": [
2064 | "model_data.shape"
2065 | ]
2066 | },
2067 | {
2068 | "cell_type": "code",
2069 | "execution_count": 14,
2070 | "metadata": {
2071 | "collapsed": true
2072 | },
2073 | "outputs": [],
2074 | "source": [
2075 | "### 特征构造: 比率\n",
2076 | "numerator_cols = ['LOCAL_SAV_CUR_ALL_BAL','SAV_CUR_ALL_BAL','ASSET_CUR_ALL_BAL','LOCAL_CUR_WITHDRAW_TX_NUM','LOCAL_CUR_WITHDRAW_TX_AMT','COUNTER_NOT_ACCT_TX_NUM',\n",
2077 | " 'ATM_ALL_TX_NUM','ATM_ACCT_TX_AMT','ATM_NOT_ACCT_TX_NUM']\n",
2078 | "denominator_cols = ['LOCAL_SAV_MON_AVG_BAL','SAV_MON_AVG_BAL','ASSET_MON_AVG_BAL','TOTAL_WITHDRAW_TX_NUM','TOTAL_WITHDRAW_TX_AMT','TOTAL_COUNTER_TX_NUM',\n",
2079 | " 'TOTAL_ACCT_TX_NUM','TOTAL_ACCT_TX_AMT','TOTAL_NOT_ACCT_TX_NUM']\n",
2080 | "\n",
2081 | "new_col_name = [\"RATIO_\"+str(i) for i in range(len(numerator_cols))]\n",
2082 | "for i in range(len(numerator_cols)):\n",
2083 | " model_data[new_col_name[i]] = model_data[numerator_cols[i]] / model_data[denominator_cols[i]]"
2084 | ]
2085 | },
2086 | {
2087 | "cell_type": "code",
2088 | "execution_count": 15,
2089 | "metadata": {},
2090 | "outputs": [
2091 | {
2092 | "data": {
2093 | "text/plain": [
2094 | "(17241, 142)"
2095 | ]
2096 | },
2097 | "execution_count": 15,
2098 | "metadata": {},
2099 | "output_type": "execute_result"
2100 | }
2101 | ],
2102 | "source": [
2103 | "model_data.shape"
2104 | ]
2105 | },
2106 | {
2107 | "cell_type": "code",
2108 | "execution_count": 16,
2109 | "metadata": {
2110 | "collapsed": true
2111 | },
2112 | "outputs": [],
2113 | "source": [
2114 | "object_df = model_data.select_dtypes(include=['object']).describe().T\n",
2115 | "int_df = model_data.select_dtypes(include=['int64']).describe().T\n",
2116 | "float_df = model_data.select_dtypes(include=['float']).describe().T"
2117 | ]
2118 | },
2119 | {
2120 | "cell_type": "code",
2121 | "execution_count": 17,
2122 | "metadata": {},
2123 | "outputs": [],
2124 | "source": [
2125 | "# 对float和int类型的特征中的缺失值不全为0\n",
2126 | "model_data_to_fillna = pd.concat([model_data[float_df.index], model_data[int_df.index]], axis=1)\n",
2127 | "model_data_to_fillna.replace(float('inf'), 1, inplace=True)\n",
2128 | "model_data_to_fillna.fillna(0, inplace=True)"
2129 | ]
2130 | },
2131 | {
2132 | "cell_type": "code",
2133 | "execution_count": 18,
2134 | "metadata": {},
2135 | "outputs": [],
2136 | "source": [
2137 | "model_data = pd.concat([model_data[object_df.index], model_data_to_fillna], axis=1)"
2138 | ]
2139 | },
2140 | {
2141 | "cell_type": "code",
2142 | "execution_count": 19,
2143 | "metadata": {},
2144 | "outputs": [],
2145 | "source": [
2146 | "# 进行One_hot编码,并且对object类型的特征中的缺失值都设定一个哑变量\n",
2147 | "model_data = pd.get_dummies(model_data, dummy_na=True)"
2148 | ]
2149 | },
2150 | {
2151 | "cell_type": "code",
2152 | "execution_count": 20,
2153 | "metadata": {},
2154 | "outputs": [
2155 | {
2156 | "data": {
2157 | "text/html": [
2158 | "\n",
2159 | "\n",
2172 | "
\n",
2173 | " \n",
2174 | " \n",
2175 | " | \n",
2176 | " LOCAL_CUR_SAV_SLOPE | \n",
2177 | " LOCAL_CUR_MON_AVG_BAL | \n",
2178 | " LOCAL_OVEONEYR_FF_MON_AVG_BAL | \n",
2179 | " LOCAL_FIX_MON_AVG_BAL | \n",
2180 | " LOCAL_FIX_MON_AVG_BAL_PROP | \n",
2181 | " LOCAL_BELONEYR_FF_SLOPE | \n",
2182 | " LOCAL_BELONEYR_FF_MON_AVG_BAL | \n",
2183 | " LOCAL_OVEONEYR_FF_SLOPE | \n",
2184 | " LOCAL_SAV_SLOPE | \n",
2185 | " LOCAL_SAV_CUR_ALL_BAL | \n",
2186 | " ... | \n",
2187 | " kid6_10_nan | \n",
2188 | " kid11_15_U | \n",
2189 | " kid11_15_Y | \n",
2190 | " kid11_15_nan | \n",
2191 | " kid16_17_U | \n",
2192 | " kid16_17_Y | \n",
2193 | " kid16_17_nan | \n",
2194 | " car_buy_New | \n",
2195 | " car_buy_UNKNOWN | \n",
2196 | " car_buy_nan | \n",
2197 | "
\n",
2198 | " \n",
2199 | " \n",
2200 | " \n",
2201 | " 0 | \n",
2202 | " 0.000000 | \n",
2203 | " 0.00 | \n",
2204 | " 50429.68 | \n",
2205 | " 50429.68 | \n",
2206 | " 1.000000 | \n",
2207 | " 0.0 | \n",
2208 | " 0.0 | \n",
2209 | " -0.032395 | \n",
2210 | " -0.032395 | \n",
2211 | " 41000.00 | \n",
2212 | " ... | \n",
2213 | " 0 | \n",
2214 | " 1 | \n",
2215 | " 0 | \n",
2216 | " 0 | \n",
2217 | " 1 | \n",
2218 | " 0 | \n",
2219 | " 0 | \n",
2220 | " 1 | \n",
2221 | " 0 | \n",
2222 | " 0 | \n",
2223 | "
\n",
2224 | " \n",
2225 | " 1 | \n",
2226 | " 1.005692 | \n",
2227 | " 20149.04 | \n",
2228 | " 56047.50 | \n",
2229 | " 56047.50 | \n",
2230 | " 0.735565 | \n",
2231 | " 0.0 | \n",
2232 | " 0.0 | \n",
2233 | " -0.148857 | \n",
2234 | " 0.003965 | \n",
2235 | " 31929.10 | \n",
2236 | " ... | \n",
2237 | " 0 | \n",
2238 | " 1 | \n",
2239 | " 0 | \n",
2240 | " 0 | \n",
2241 | " 1 | \n",
2242 | " 0 | \n",
2243 | " 0 | \n",
2244 | " 1 | \n",
2245 | " 0 | \n",
2246 | " 0 | \n",
2247 | "
\n",
2248 | " \n",
2249 | " 2 | \n",
2250 | " 0.000562 | \n",
2251 | " 17.81 | \n",
2252 | " 0.00 | \n",
2253 | " 0.00 | \n",
2254 | " 0.000000 | \n",
2255 | " 0.0 | \n",
2256 | " 0.0 | \n",
2257 | " 0.000000 | \n",
2258 | " 0.000562 | \n",
2259 | " 17.82 | \n",
2260 | " ... | \n",
2261 | " 0 | \n",
2262 | " 1 | \n",
2263 | " 0 | \n",
2264 | " 0 | \n",
2265 | " 1 | \n",
2266 | " 0 | \n",
2267 | " 0 | \n",
2268 | " 1 | \n",
2269 | " 0 | \n",
2270 | " 0 | \n",
2271 | "
\n",
2272 | " \n",
2273 | " 3 | \n",
2274 | " -0.550912 | \n",
2275 | " 29359.21 | \n",
2276 | " 0.00 | \n",
2277 | " 0.00 | \n",
2278 | " 0.000000 | \n",
2279 | " 0.0 | \n",
2280 | " 0.0 | \n",
2281 | " 0.000000 | \n",
2282 | " -0.550912 | \n",
2283 | " 33273.57 | \n",
2284 | " ... | \n",
2285 | " 0 | \n",
2286 | " 1 | \n",
2287 | " 0 | \n",
2288 | " 0 | \n",
2289 | " 1 | \n",
2290 | " 0 | \n",
2291 | " 0 | \n",
2292 | " 0 | \n",
2293 | " 1 | \n",
2294 | " 0 | \n",
2295 | "
\n",
2296 | " \n",
2297 | " 4 | \n",
2298 | " 0.000288 | \n",
2299 | " 34.68 | \n",
2300 | " 0.00 | \n",
2301 | " 0.00 | \n",
2302 | " 0.000000 | \n",
2303 | " 0.0 | \n",
2304 | " 0.0 | \n",
2305 | " 0.000000 | \n",
2306 | " 0.000288 | \n",
2307 | " 34.70 | \n",
2308 | " ... | \n",
2309 | " 0 | \n",
2310 | " 1 | \n",
2311 | " 0 | \n",
2312 | " 0 | \n",
2313 | " 0 | \n",
2314 | " 1 | \n",
2315 | " 0 | \n",
2316 | " 1 | \n",
2317 | " 0 | \n",
2318 | " 0 | \n",
2319 | "
\n",
2320 | " \n",
2321 | "
\n",
2322 | "
5 rows × 178 columns
\n",
2323 | "
"
2324 | ],
2325 | "text/plain": [
2326 | " LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL LOCAL_OVEONEYR_FF_MON_AVG_BAL \\\n",
2327 | "0 0.000000 0.00 50429.68 \n",
2328 | "1 1.005692 20149.04 56047.50 \n",
2329 | "2 0.000562 17.81 0.00 \n",
2330 | "3 -0.550912 29359.21 0.00 \n",
2331 | "4 0.000288 34.68 0.00 \n",
2332 | "\n",
2333 | " LOCAL_FIX_MON_AVG_BAL LOCAL_FIX_MON_AVG_BAL_PROP LOCAL_BELONEYR_FF_SLOPE \\\n",
2334 | "0 50429.68 1.000000 0.0 \n",
2335 | "1 56047.50 0.735565 0.0 \n",
2336 | "2 0.00 0.000000 0.0 \n",
2337 | "3 0.00 0.000000 0.0 \n",
2338 | "4 0.00 0.000000 0.0 \n",
2339 | "\n",
2340 | " LOCAL_BELONEYR_FF_MON_AVG_BAL LOCAL_OVEONEYR_FF_SLOPE LOCAL_SAV_SLOPE \\\n",
2341 | "0 0.0 -0.032395 -0.032395 \n",
2342 | "1 0.0 -0.148857 0.003965 \n",
2343 | "2 0.0 0.000000 0.000562 \n",
2344 | "3 0.0 0.000000 -0.550912 \n",
2345 | "4 0.0 0.000000 0.000288 \n",
2346 | "\n",
2347 | " LOCAL_SAV_CUR_ALL_BAL ... kid6_10_nan kid11_15_U kid11_15_Y \\\n",
2348 | "0 41000.00 ... 0 1 0 \n",
2349 | "1 31929.10 ... 0 1 0 \n",
2350 | "2 17.82 ... 0 1 0 \n",
2351 | "3 33273.57 ... 0 1 0 \n",
2352 | "4 34.70 ... 0 1 0 \n",
2353 | "\n",
2354 | " kid11_15_nan kid16_17_U kid16_17_Y kid16_17_nan car_buy_New \\\n",
2355 | "0 0 1 0 0 1 \n",
2356 | "1 0 1 0 0 1 \n",
2357 | "2 0 1 0 0 1 \n",
2358 | "3 0 1 0 0 0 \n",
2359 | "4 0 0 1 0 1 \n",
2360 | "\n",
2361 | " car_buy_UNKNOWN car_buy_nan \n",
2362 | "0 0 0 \n",
2363 | "1 0 0 \n",
2364 | "2 0 0 \n",
2365 | "3 1 0 \n",
2366 | "4 0 0 \n",
2367 | "\n",
2368 | "[5 rows x 178 columns]"
2369 | ]
2370 | },
2371 | "execution_count": 20,
2372 | "metadata": {},
2373 | "output_type": "execute_result"
2374 | }
2375 | ],
2376 | "source": [
2377 | "model_data.head()"
2378 | ]
2379 | },
2380 | {
2381 | "cell_type": "code",
2382 | "execution_count": 21,
2383 | "metadata": {},
2384 | "outputs": [
2385 | {
2386 | "data": {
2387 | "text/plain": [
2388 | "(17241, 178)"
2389 | ]
2390 | },
2391 | "execution_count": 21,
2392 | "metadata": {},
2393 | "output_type": "execute_result"
2394 | }
2395 | ],
2396 | "source": [
2397 | "model_data.shape"
2398 | ]
2399 | },
2400 | {
2401 | "cell_type": "code",
2402 | "execution_count": 22,
2403 | "metadata": {
2404 | "collapsed": true
2405 | },
2406 | "outputs": [],
2407 | "source": [
2408 | "# 处理各个特征取值的范围不一致性\n",
2409 | "# 将全部特征的取值均限制在[0,1]之间\n",
2410 | "model_data = (model_data - model_data.min()) / (model_data.max() - model_data.min())\n",
2411 | "model_data.replace(float('inf'), 1, inplace=True)\n",
2412 | "model_data.fillna(0, inplace=True)"
2413 | ]
2414 | },
2415 | {
2416 | "cell_type": "code",
2417 | "execution_count": 23,
2418 | "metadata": {},
2419 | "outputs": [
2420 | {
2421 | "name": "stdout",
2422 | "output_type": "stream",
2423 | "text": [
2424 | "False\n",
2425 | "False\n",
2426 | "False\n",
2427 | "False\n",
2428 | "False\n",
2429 | "False\n",
2430 | "False\n",
2431 | "False\n",
2432 | "False\n",
2433 | "False\n",
2434 | "False\n",
2435 | "False\n",
2436 | "False\n",
2437 | "False\n",
2438 | "False\n",
2439 | "False\n",
2440 | "False\n",
2441 | "False\n",
2442 | "False\n",
2443 | "False\n",
2444 | "False\n",
2445 | "False\n",
2446 | "False\n",
2447 | "False\n",
2448 | "False\n",
2449 | "False\n",
2450 | "False\n",
2451 | "False\n",
2452 | "False\n",
2453 | "False\n",
2454 | "False\n",
2455 | "False\n",
2456 | "False\n",
2457 | "False\n",
2458 | "False\n",
2459 | "False\n",
2460 | "False\n",
2461 | "False\n",
2462 | "False\n",
2463 | "False\n",
2464 | "False\n",
2465 | "False\n",
2466 | "False\n",
2467 | "False\n",
2468 | "False\n",
2469 | "False\n",
2470 | "False\n",
2471 | "False\n",
2472 | "False\n",
2473 | "False\n",
2474 | "False\n",
2475 | "False\n",
2476 | "False\n",
2477 | "False\n",
2478 | "False\n",
2479 | "False\n",
2480 | "False\n",
2481 | "False\n",
2482 | "False\n",
2483 | "False\n",
2484 | "False\n",
2485 | "False\n",
2486 | "False\n",
2487 | "False\n",
2488 | "False\n",
2489 | "False\n",
2490 | "False\n",
2491 | "False\n",
2492 | "False\n",
2493 | "False\n",
2494 | "False\n",
2495 | "False\n",
2496 | "False\n",
2497 | "False\n",
2498 | "False\n",
2499 | "False\n",
2500 | "False\n",
2501 | "False\n",
2502 | "False\n",
2503 | "False\n",
2504 | "False\n",
2505 | "False\n",
2506 | "False\n",
2507 | "False\n",
2508 | "False\n",
2509 | "False\n",
2510 | "False\n",
2511 | "False\n",
2512 | "False\n",
2513 | "False\n",
2514 | "False\n",
2515 | "False\n",
2516 | "False\n",
2517 | "False\n",
2518 | "False\n",
2519 | "False\n",
2520 | "False\n",
2521 | "False\n",
2522 | "False\n",
2523 | "False\n",
2524 | "False\n",
2525 | "False\n",
2526 | "False\n",
2527 | "False\n",
2528 | "False\n",
2529 | "False\n",
2530 | "False\n",
2531 | "False\n",
2532 | "False\n",
2533 | "False\n",
2534 | "False\n",
2535 | "False\n",
2536 | "False\n",
2537 | "False\n",
2538 | "False\n",
2539 | "False\n",
2540 | "False\n",
2541 | "False\n",
2542 | "False\n",
2543 | "False\n",
2544 | "False\n",
2545 | "False\n",
2546 | "False\n",
2547 | "False\n",
2548 | "False\n",
2549 | "False\n",
2550 | "False\n",
2551 | "False\n",
2552 | "False\n",
2553 | "False\n",
2554 | "False\n",
2555 | "False\n",
2556 | "False\n",
2557 | "False\n",
2558 | "False\n",
2559 | "False\n",
2560 | "False\n",
2561 | "False\n",
2562 | "False\n",
2563 | "False\n",
2564 | "False\n",
2565 | "False\n",
2566 | "False\n",
2567 | "False\n",
2568 | "False\n",
2569 | "False\n",
2570 | "False\n",
2571 | "False\n",
2572 | "False\n",
2573 | "False\n",
2574 | "False\n",
2575 | "False\n",
2576 | "False\n",
2577 | "False\n",
2578 | "False\n",
2579 | "False\n",
2580 | "False\n",
2581 | "False\n",
2582 | "False\n",
2583 | "False\n",
2584 | "False\n",
2585 | "False\n",
2586 | "False\n",
2587 | "False\n",
2588 | "False\n",
2589 | "False\n",
2590 | "False\n",
2591 | "False\n",
2592 | "False\n",
2593 | "False\n",
2594 | "False\n",
2595 | "False\n",
2596 | "False\n",
2597 | "False\n",
2598 | "False\n",
2599 | "False\n",
2600 | "False\n",
2601 | "False\n"
2602 | ]
2603 | }
2604 | ],
2605 | "source": [
2606 | "null_test = model_data.isnull().any()\n",
2607 | "for i in null_test:\n",
2608 | " print(i)"
2609 | ]
2610 | },
2611 | {
2612 | "cell_type": "code",
2613 | "execution_count": 24,
2614 | "metadata": {
2615 | "collapsed": true
2616 | },
2617 | "outputs": [],
2618 | "source": [
2619 | "model_data.to_csv('./model_data.csv', encoding='utf-8', index=False)"
2620 | ]
2621 | },
2622 | {
2623 | "cell_type": "code",
2624 | "execution_count": 25,
2625 | "metadata": {},
2626 | "outputs": [
2627 | {
2628 | "data": {
2629 | "text/plain": [
2630 | "0.0 15500\n",
2631 | "1.0 1741\n",
2632 | "Name: CHURN_CUST_IND, dtype: int64"
2633 | ]
2634 | },
2635 | "execution_count": 25,
2636 | "metadata": {},
2637 | "output_type": "execute_result"
2638 | }
2639 | ],
2640 | "source": [
2641 | "model_data['CHURN_CUST_IND'].value_counts()"
2642 | ]
2643 | },
2644 | {
2645 | "cell_type": "code",
2646 | "execution_count": 26,
2647 | "metadata": {
2648 | "collapsed": true
2649 | },
2650 | "outputs": [],
2651 | "source": [
2652 | "model_data_des = model_data.describe().T"
2653 | ]
2654 | },
2655 | {
2656 | "cell_type": "code",
2657 | "execution_count": 27,
2658 | "metadata": {},
2659 | "outputs": [
2660 | {
2661 | "data": {
2662 | "text/html": [
2663 | "\n",
2664 | "\n",
2677 | "
\n",
2678 | " \n",
2679 | " \n",
2680 | " | \n",
2681 | " count | \n",
2682 | " mean | \n",
2683 | " std | \n",
2684 | " min | \n",
2685 | " 25% | \n",
2686 | " 50% | \n",
2687 | " 75% | \n",
2688 | " max | \n",
2689 | "
\n",
2690 | " \n",
2691 | " \n",
2692 | " \n",
2693 | " LOCAL_CUR_SAV_SLOPE | \n",
2694 | " 17241.0 | \n",
2695 | " 0.329458 | \n",
2696 | " 0.135781 | \n",
2697 | " 0.0 | \n",
2698 | " 0.304001 | \n",
2699 | " 0.333333 | \n",
2700 | " 0.358169 | \n",
2701 | " 1.0 | \n",
2702 | "
\n",
2703 | " \n",
2704 | " LOCAL_CUR_MON_AVG_BAL | \n",
2705 | " 17241.0 | \n",
2706 | " 0.003645 | \n",
2707 | " 0.012958 | \n",
2708 | " 0.0 | \n",
2709 | " 0.000007 | \n",
2710 | " 0.000425 | \n",
2711 | " 0.003037 | \n",
2712 | " 1.0 | \n",
2713 | "
\n",
2714 | " \n",
2715 | " LOCAL_OVEONEYR_FF_MON_AVG_BAL | \n",
2716 | " 17241.0 | \n",
2717 | " 0.012951 | \n",
2718 | " 0.023193 | \n",
2719 | " 0.0 | \n",
2720 | " 0.000000 | \n",
2721 | " 0.008991 | \n",
2722 | " 0.017082 | \n",
2723 | " 1.0 | \n",
2724 | "
\n",
2725 | " \n",
2726 | " LOCAL_FIX_MON_AVG_BAL | \n",
2727 | " 17241.0 | \n",
2728 | " 0.001925 | \n",
2729 | " 0.008266 | \n",
2730 | " 0.0 | \n",
2731 | " 0.000000 | \n",
2732 | " 0.001300 | \n",
2733 | " 0.002320 | \n",
2734 | " 1.0 | \n",
2735 | "
\n",
2736 | " \n",
2737 | " LOCAL_FIX_MON_AVG_BAL_PROP | \n",
2738 | " 17241.0 | \n",
2739 | " 0.587804 | \n",
2740 | " 0.443246 | \n",
2741 | " 0.0 | \n",
2742 | " 0.000000 | \n",
2743 | " 0.854396 | \n",
2744 | " 0.990436 | \n",
2745 | " 1.0 | \n",
2746 | "
\n",
2747 | " \n",
2748 | " LOCAL_BELONEYR_FF_SLOPE | \n",
2749 | " 17241.0 | \n",
2750 | " 0.350814 | \n",
2751 | " 0.088405 | \n",
2752 | " 0.0 | \n",
2753 | " 0.344437 | \n",
2754 | " 0.344437 | \n",
2755 | " 0.344437 | \n",
2756 | " 1.0 | \n",
2757 | "
\n",
2758 | " \n",
2759 | " LOCAL_BELONEYR_FF_MON_AVG_BAL | \n",
2760 | " 17241.0 | \n",
2761 | " 0.000271 | \n",
2762 | " 0.007734 | \n",
2763 | " 0.0 | \n",
2764 | " 0.000000 | \n",
2765 | " 0.000000 | \n",
2766 | " 0.000000 | \n",
2767 | " 1.0 | \n",
2768 | "
\n",
2769 | " \n",
2770 | " LOCAL_OVEONEYR_FF_SLOPE | \n",
2771 | " 17241.0 | \n",
2772 | " 0.350590 | \n",
2773 | " 0.070418 | \n",
2774 | " 0.0 | \n",
2775 | " 0.344442 | \n",
2776 | " 0.344442 | \n",
2777 | " 0.345026 | \n",
2778 | " 1.0 | \n",
2779 | "
\n",
2780 | " \n",
2781 | " LOCAL_SAV_SLOPE | \n",
2782 | " 17241.0 | \n",
2783 | " 0.342997 | \n",
2784 | " 0.088393 | \n",
2785 | " 0.0 | \n",
2786 | " 0.344164 | \n",
2787 | " 0.344535 | \n",
2788 | " 0.356539 | \n",
2789 | " 1.0 | \n",
2790 | "
\n",
2791 | " \n",
2792 | " LOCAL_SAV_CUR_ALL_BAL | \n",
2793 | " 17241.0 | \n",
2794 | " 0.002509 | \n",
2795 | " 0.008524 | \n",
2796 | " 0.0 | \n",
2797 | " 0.001131 | \n",
2798 | " 0.001710 | \n",
2799 | " 0.002807 | \n",
2800 | " 1.0 | \n",
2801 | "
\n",
2802 | " \n",
2803 | " LOCAL_SAV_MON_AVG_BAL | \n",
2804 | " 17241.0 | \n",
2805 | " 0.002562 | \n",
2806 | " 0.008540 | \n",
2807 | " 0.0 | \n",
2808 | " 0.001160 | \n",
2809 | " 0.001749 | \n",
2810 | " 0.002861 | \n",
2811 | " 1.0 | \n",
2812 | "
\n",
2813 | " \n",
2814 | " SAV_SLOPE | \n",
2815 | " 17241.0 | \n",
2816 | " 0.342959 | \n",
2817 | " 0.088283 | \n",
2818 | " 0.0 | \n",
2819 | " 0.344162 | \n",
2820 | " 0.344535 | \n",
2821 | " 0.356519 | \n",
2822 | " 1.0 | \n",
2823 | "
\n",
2824 | " \n",
2825 | " SAV_CUR_ALL_BAL | \n",
2826 | " 17241.0 | \n",
2827 | " 0.002511 | \n",
2828 | " 0.008525 | \n",
2829 | " 0.0 | \n",
2830 | " 0.001131 | \n",
2831 | " 0.001713 | \n",
2832 | " 0.002809 | \n",
2833 | " 1.0 | \n",
2834 | "
\n",
2835 | " \n",
2836 | " SAV_MON_AVG_BAL | \n",
2837 | " 17241.0 | \n",
2838 | " 0.002563 | \n",
2839 | " 0.008541 | \n",
2840 | " 0.0 | \n",
2841 | " 0.001161 | \n",
2842 | " 0.001751 | \n",
2843 | " 0.002862 | \n",
2844 | " 1.0 | \n",
2845 | "
\n",
2846 | " \n",
2847 | " FR_SAV_CUR_ALL_BAL | \n",
2848 | " 17241.0 | \n",
2849 | " 0.000441 | \n",
2850 | " 0.014429 | \n",
2851 | " 0.0 | \n",
2852 | " 0.000000 | \n",
2853 | " 0.000000 | \n",
2854 | " 0.000000 | \n",
2855 | " 1.0 | \n",
2856 | "
\n",
2857 | " \n",
2858 | " ASSET_CUR_ALL_BAL | \n",
2859 | " 17241.0 | \n",
2860 | " 0.002650 | \n",
2861 | " 0.008598 | \n",
2862 | " 0.0 | \n",
2863 | " 0.001142 | \n",
2864 | " 0.001781 | \n",
2865 | " 0.002941 | \n",
2866 | " 1.0 | \n",
2867 | "
\n",
2868 | " \n",
2869 | " ASSET_MON_AVG_BAL | \n",
2870 | " 17241.0 | \n",
2871 | " 0.002701 | \n",
2872 | " 0.008613 | \n",
2873 | " 0.0 | \n",
2874 | " 0.001193 | \n",
2875 | " 0.001816 | \n",
2876 | " 0.002988 | \n",
2877 | " 1.0 | \n",
2878 | "
\n",
2879 | " \n",
2880 | " LOCAL_CUR_TRANS_TX_AMT | \n",
2881 | " 17241.0 | \n",
2882 | " 0.000417 | \n",
2883 | " 0.010761 | \n",
2884 | " 0.0 | \n",
2885 | " 0.000000 | \n",
2886 | " 0.000000 | \n",
2887 | " 0.000000 | \n",
2888 | " 1.0 | \n",
2889 | "
\n",
2890 | " \n",
2891 | " LOCAL_CUR_LASTSAV_TX_AMT | \n",
2892 | " 17241.0 | \n",
2893 | " 0.001628 | \n",
2894 | " 0.013896 | \n",
2895 | " 0.0 | \n",
2896 | " 0.000000 | \n",
2897 | " 0.000000 | \n",
2898 | " 0.000000 | \n",
2899 | " 1.0 | \n",
2900 | "
\n",
2901 | " \n",
2902 | " LOCAL_CUR_WITHDRAW_TX_AMT | \n",
2903 | " 17241.0 | \n",
2904 | " 0.002978 | \n",
2905 | " 0.018774 | \n",
2906 | " 0.0 | \n",
2907 | " 0.000000 | \n",
2908 | " 0.000000 | \n",
2909 | " 0.000492 | \n",
2910 | " 1.0 | \n",
2911 | "
\n",
2912 | " \n",
2913 | " LOCAL_FIX_OPEN_ACC_TX_AMT | \n",
2914 | " 17241.0 | \n",
2915 | " 0.002138 | \n",
2916 | " 0.013118 | \n",
2917 | " 0.0 | \n",
2918 | " 0.000000 | \n",
2919 | " 0.000000 | \n",
2920 | " 0.000000 | \n",
2921 | " 1.0 | \n",
2922 | "
\n",
2923 | " \n",
2924 | " LOCAL_FIX_WITHDRAW_TX_AMT | \n",
2925 | " 17241.0 | \n",
2926 | " 0.001221 | \n",
2927 | " 0.013362 | \n",
2928 | " 0.0 | \n",
2929 | " 0.000000 | \n",
2930 | " 0.000000 | \n",
2931 | " 0.000000 | \n",
2932 | " 1.0 | \n",
2933 | "
\n",
2934 | " \n",
2935 | " LOCAL_FIX_CLOSE_ACC_TX_AMT | \n",
2936 | " 17241.0 | \n",
2937 | " 0.004347 | \n",
2938 | " 0.026010 | \n",
2939 | " 0.0 | \n",
2940 | " 0.000000 | \n",
2941 | " 0.000000 | \n",
2942 | " 0.000000 | \n",
2943 | " 1.0 | \n",
2944 | "
\n",
2945 | " \n",
2946 | " L6M_INDFINA_ALL_TX_AMT | \n",
2947 | " 17241.0 | \n",
2948 | " 0.001079 | \n",
2949 | " 0.019296 | \n",
2950 | " 0.0 | \n",
2951 | " 0.000000 | \n",
2952 | " 0.000000 | \n",
2953 | " 0.000000 | \n",
2954 | " 1.0 | \n",
2955 | "
\n",
2956 | " \n",
2957 | " da_Mean | \n",
2958 | " 17241.0 | \n",
2959 | " 0.015937 | \n",
2960 | " 0.036701 | \n",
2961 | " 0.0 | \n",
2962 | " 0.000000 | \n",
2963 | " 0.004292 | \n",
2964 | " 0.017167 | \n",
2965 | " 1.0 | \n",
2966 | "
\n",
2967 | " \n",
2968 | " ovrmou_Mean | \n",
2969 | " 17241.0 | \n",
2970 | " 0.021053 | \n",
2971 | " 0.049172 | \n",
2972 | " 0.0 | \n",
2973 | " 0.000000 | \n",
2974 | " 0.001457 | \n",
2975 | " 0.021592 | \n",
2976 | " 1.0 | \n",
2977 | "
\n",
2978 | " \n",
2979 | " ovrrev_Mean | \n",
2980 | " 17241.0 | \n",
2981 | " 0.021747 | \n",
2982 | " 0.049016 | \n",
2983 | " 0.0 | \n",
2984 | " 0.000000 | \n",
2985 | " 0.001622 | \n",
2986 | " 0.022766 | \n",
2987 | " 1.0 | \n",
2988 | "
\n",
2989 | " \n",
2990 | " vceovr_Mean | \n",
2991 | " 17241.0 | \n",
2992 | " 0.021327 | \n",
2993 | " 0.048654 | \n",
2994 | " 0.0 | \n",
2995 | " 0.000000 | \n",
2996 | " 0.000873 | \n",
2997 | " 0.021976 | \n",
2998 | " 1.0 | \n",
2999 | "
\n",
3000 | " \n",
3001 | " datovr_Mean | \n",
3002 | " 17241.0 | \n",
3003 | " 0.002470 | \n",
3004 | " 0.021156 | \n",
3005 | " 0.0 | \n",
3006 | " 0.000000 | \n",
3007 | " 0.000000 | \n",
3008 | " 0.000000 | \n",
3009 | " 1.0 | \n",
3010 | "
\n",
3011 | " \n",
3012 | " roam_Mean | \n",
3013 | " 17241.0 | \n",
3014 | " 0.002738 | \n",
3015 | " 0.015707 | \n",
3016 | " 0.0 | \n",
3017 | " 0.000000 | \n",
3018 | " 0.000000 | \n",
3019 | " 0.000619 | \n",
3020 | " 1.0 | \n",
3021 | "
\n",
3022 | " \n",
3023 | " ... | \n",
3024 | " ... | \n",
3025 | " ... | \n",
3026 | " ... | \n",
3027 | " ... | \n",
3028 | " ... | \n",
3029 | " ... | \n",
3030 | " ... | \n",
3031 | " ... | \n",
3032 | "
\n",
3033 | " \n",
3034 | " marital_nan | \n",
3035 | " 17241.0 | \n",
3036 | " 0.017748 | \n",
3037 | " 0.132039 | \n",
3038 | " 0.0 | \n",
3039 | " 0.000000 | \n",
3040 | " 0.000000 | \n",
3041 | " 0.000000 | \n",
3042 | " 1.0 | \n",
3043 | "
\n",
3044 | " \n",
3045 | " wrkwoman_Y | \n",
3046 | " 17241.0 | \n",
3047 | " 0.129343 | \n",
3048 | " 0.335589 | \n",
3049 | " 0.0 | \n",
3050 | " 0.000000 | \n",
3051 | " 0.000000 | \n",
3052 | " 0.000000 | \n",
3053 | " 1.0 | \n",
3054 | "
\n",
3055 | " \n",
3056 | " wrkwoman_nan | \n",
3057 | " 17241.0 | \n",
3058 | " 0.870657 | \n",
3059 | " 0.335589 | \n",
3060 | " 0.0 | \n",
3061 | " 1.000000 | \n",
3062 | " 1.000000 | \n",
3063 | " 1.000000 | \n",
3064 | " 1.0 | \n",
3065 | "
\n",
3066 | " \n",
3067 | " proptype_A | \n",
3068 | " 17241.0 | \n",
3069 | " 0.259092 | \n",
3070 | " 0.438149 | \n",
3071 | " 0.0 | \n",
3072 | " 0.000000 | \n",
3073 | " 0.000000 | \n",
3074 | " 1.000000 | \n",
3075 | " 1.0 | \n",
3076 | "
\n",
3077 | " \n",
3078 | " proptype_B | \n",
3079 | " 17241.0 | \n",
3080 | " 0.015370 | \n",
3081 | " 0.123024 | \n",
3082 | " 0.0 | \n",
3083 | " 0.000000 | \n",
3084 | " 0.000000 | \n",
3085 | " 0.000000 | \n",
3086 | " 1.0 | \n",
3087 | "
\n",
3088 | " \n",
3089 | " proptype_D | \n",
3090 | " 17241.0 | \n",
3091 | " 0.006902 | \n",
3092 | " 0.082794 | \n",
3093 | " 0.0 | \n",
3094 | " 0.000000 | \n",
3095 | " 0.000000 | \n",
3096 | " 0.000000 | \n",
3097 | " 1.0 | \n",
3098 | "
\n",
3099 | " \n",
3100 | " proptype_E | \n",
3101 | " 17241.0 | \n",
3102 | " 0.003770 | \n",
3103 | " 0.061287 | \n",
3104 | " 0.0 | \n",
3105 | " 0.000000 | \n",
3106 | " 0.000000 | \n",
3107 | " 0.000000 | \n",
3108 | " 1.0 | \n",
3109 | "
\n",
3110 | " \n",
3111 | " proptype_G | \n",
3112 | " 17241.0 | \n",
3113 | " 0.000870 | \n",
3114 | " 0.029484 | \n",
3115 | " 0.0 | \n",
3116 | " 0.000000 | \n",
3117 | " 0.000000 | \n",
3118 | " 0.000000 | \n",
3119 | " 1.0 | \n",
3120 | "
\n",
3121 | " \n",
3122 | " proptype_M | \n",
3123 | " 17241.0 | \n",
3124 | " 0.001972 | \n",
3125 | " 0.044365 | \n",
3126 | " 0.0 | \n",
3127 | " 0.000000 | \n",
3128 | " 0.000000 | \n",
3129 | " 0.000000 | \n",
3130 | " 1.0 | \n",
3131 | "
\n",
3132 | " \n",
3133 | " proptype_nan | \n",
3134 | " 17241.0 | \n",
3135 | " 0.712024 | \n",
3136 | " 0.452833 | \n",
3137 | " 0.0 | \n",
3138 | " 0.000000 | \n",
3139 | " 1.000000 | \n",
3140 | " 1.000000 | \n",
3141 | " 1.0 | \n",
3142 | "
\n",
3143 | " \n",
3144 | " pcowner_Y | \n",
3145 | " 17241.0 | \n",
3146 | " 0.187460 | \n",
3147 | " 0.390292 | \n",
3148 | " 0.0 | \n",
3149 | " 0.000000 | \n",
3150 | " 0.000000 | \n",
3151 | " 0.000000 | \n",
3152 | " 1.0 | \n",
3153 | "
\n",
3154 | " \n",
3155 | " pcowner_nan | \n",
3156 | " 17241.0 | \n",
3157 | " 0.812540 | \n",
3158 | " 0.390292 | \n",
3159 | " 0.0 | \n",
3160 | " 1.000000 | \n",
3161 | " 1.000000 | \n",
3162 | " 1.000000 | \n",
3163 | " 1.0 | \n",
3164 | "
\n",
3165 | " \n",
3166 | " kid0_2_U | \n",
3167 | " 17241.0 | \n",
3168 | " 0.943623 | \n",
3169 | " 0.230655 | \n",
3170 | " 0.0 | \n",
3171 | " 1.000000 | \n",
3172 | " 1.000000 | \n",
3173 | " 1.000000 | \n",
3174 | " 1.0 | \n",
3175 | "
\n",
3176 | " \n",
3177 | " kid0_2_Y | \n",
3178 | " 17241.0 | \n",
3179 | " 0.038629 | \n",
3180 | " 0.192714 | \n",
3181 | " 0.0 | \n",
3182 | " 0.000000 | \n",
3183 | " 0.000000 | \n",
3184 | " 0.000000 | \n",
3185 | " 1.0 | \n",
3186 | "
\n",
3187 | " \n",
3188 | " kid0_2_nan | \n",
3189 | " 17241.0 | \n",
3190 | " 0.017748 | \n",
3191 | " 0.132039 | \n",
3192 | " 0.0 | \n",
3193 | " 0.000000 | \n",
3194 | " 0.000000 | \n",
3195 | " 0.000000 | \n",
3196 | " 1.0 | \n",
3197 | "
\n",
3198 | " \n",
3199 | " kid3_5_U | \n",
3200 | " 17241.0 | \n",
3201 | " 0.937591 | \n",
3202 | " 0.241905 | \n",
3203 | " 0.0 | \n",
3204 | " 1.000000 | \n",
3205 | " 1.000000 | \n",
3206 | " 1.000000 | \n",
3207 | " 1.0 | \n",
3208 | "
\n",
3209 | " \n",
3210 | " kid3_5_Y | \n",
3211 | " 17241.0 | \n",
3212 | " 0.044661 | \n",
3213 | " 0.206564 | \n",
3214 | " 0.0 | \n",
3215 | " 0.000000 | \n",
3216 | " 0.000000 | \n",
3217 | " 0.000000 | \n",
3218 | " 1.0 | \n",
3219 | "
\n",
3220 | " \n",
3221 | " kid3_5_nan | \n",
3222 | " 17241.0 | \n",
3223 | " 0.017748 | \n",
3224 | " 0.132039 | \n",
3225 | " 0.0 | \n",
3226 | " 0.000000 | \n",
3227 | " 0.000000 | \n",
3228 | " 0.000000 | \n",
3229 | " 1.0 | \n",
3230 | "
\n",
3231 | " \n",
3232 | " kid6_10_U | \n",
3233 | " 17241.0 | \n",
3234 | " 0.899020 | \n",
3235 | " 0.301311 | \n",
3236 | " 0.0 | \n",
3237 | " 1.000000 | \n",
3238 | " 1.000000 | \n",
3239 | " 1.000000 | \n",
3240 | " 1.0 | \n",
3241 | "
\n",
3242 | " \n",
3243 | " kid6_10_Y | \n",
3244 | " 17241.0 | \n",
3245 | " 0.083232 | \n",
3246 | " 0.276240 | \n",
3247 | " 0.0 | \n",
3248 | " 0.000000 | \n",
3249 | " 0.000000 | \n",
3250 | " 0.000000 | \n",
3251 | " 1.0 | \n",
3252 | "
\n",
3253 | " \n",
3254 | " kid6_10_nan | \n",
3255 | " 17241.0 | \n",
3256 | " 0.017748 | \n",
3257 | " 0.132039 | \n",
3258 | " 0.0 | \n",
3259 | " 0.000000 | \n",
3260 | " 0.000000 | \n",
3261 | " 0.000000 | \n",
3262 | " 1.0 | \n",
3263 | "
\n",
3264 | " \n",
3265 | " kid11_15_U | \n",
3266 | " 17241.0 | \n",
3267 | " 0.891944 | \n",
3268 | " 0.310461 | \n",
3269 | " 0.0 | \n",
3270 | " 1.000000 | \n",
3271 | " 1.000000 | \n",
3272 | " 1.000000 | \n",
3273 | " 1.0 | \n",
3274 | "
\n",
3275 | " \n",
3276 | " kid11_15_Y | \n",
3277 | " 17241.0 | \n",
3278 | " 0.090308 | \n",
3279 | " 0.286631 | \n",
3280 | " 0.0 | \n",
3281 | " 0.000000 | \n",
3282 | " 0.000000 | \n",
3283 | " 0.000000 | \n",
3284 | " 1.0 | \n",
3285 | "
\n",
3286 | " \n",
3287 | " kid11_15_nan | \n",
3288 | " 17241.0 | \n",
3289 | " 0.017748 | \n",
3290 | " 0.132039 | \n",
3291 | " 0.0 | \n",
3292 | " 0.000000 | \n",
3293 | " 0.000000 | \n",
3294 | " 0.000000 | \n",
3295 | " 1.0 | \n",
3296 | "
\n",
3297 | " \n",
3298 | " kid16_17_U | \n",
3299 | " 17241.0 | \n",
3300 | " 0.886085 | \n",
3301 | " 0.317717 | \n",
3302 | " 0.0 | \n",
3303 | " 1.000000 | \n",
3304 | " 1.000000 | \n",
3305 | " 1.000000 | \n",
3306 | " 1.0 | \n",
3307 | "
\n",
3308 | " \n",
3309 | " kid16_17_Y | \n",
3310 | " 17241.0 | \n",
3311 | " 0.096166 | \n",
3312 | " 0.294827 | \n",
3313 | " 0.0 | \n",
3314 | " 0.000000 | \n",
3315 | " 0.000000 | \n",
3316 | " 0.000000 | \n",
3317 | " 1.0 | \n",
3318 | "
\n",
3319 | " \n",
3320 | " kid16_17_nan | \n",
3321 | " 17241.0 | \n",
3322 | " 0.017748 | \n",
3323 | " 0.132039 | \n",
3324 | " 0.0 | \n",
3325 | " 0.000000 | \n",
3326 | " 0.000000 | \n",
3327 | " 0.000000 | \n",
3328 | " 1.0 | \n",
3329 | "
\n",
3330 | " \n",
3331 | " car_buy_New | \n",
3332 | " 17241.0 | \n",
3333 | " 0.422017 | \n",
3334 | " 0.493896 | \n",
3335 | " 0.0 | \n",
3336 | " 0.000000 | \n",
3337 | " 0.000000 | \n",
3338 | " 1.000000 | \n",
3339 | " 1.0 | \n",
3340 | "
\n",
3341 | " \n",
3342 | " car_buy_UNKNOWN | \n",
3343 | " 17241.0 | \n",
3344 | " 0.560234 | \n",
3345 | " 0.496373 | \n",
3346 | " 0.0 | \n",
3347 | " 0.000000 | \n",
3348 | " 1.000000 | \n",
3349 | " 1.000000 | \n",
3350 | " 1.0 | \n",
3351 | "
\n",
3352 | " \n",
3353 | " car_buy_nan | \n",
3354 | " 17241.0 | \n",
3355 | " 0.017748 | \n",
3356 | " 0.132039 | \n",
3357 | " 0.0 | \n",
3358 | " 0.000000 | \n",
3359 | " 0.000000 | \n",
3360 | " 0.000000 | \n",
3361 | " 1.0 | \n",
3362 | "
\n",
3363 | " \n",
3364 | "
\n",
3365 | "
178 rows × 8 columns
\n",
3366 | "
"
3367 | ],
3368 | "text/plain": [
3369 | " count mean std min 25% \\\n",
3370 | "LOCAL_CUR_SAV_SLOPE 17241.0 0.329458 0.135781 0.0 0.304001 \n",
3371 | "LOCAL_CUR_MON_AVG_BAL 17241.0 0.003645 0.012958 0.0 0.000007 \n",
3372 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 17241.0 0.012951 0.023193 0.0 0.000000 \n",
3373 | "LOCAL_FIX_MON_AVG_BAL 17241.0 0.001925 0.008266 0.0 0.000000 \n",
3374 | "LOCAL_FIX_MON_AVG_BAL_PROP 17241.0 0.587804 0.443246 0.0 0.000000 \n",
3375 | "LOCAL_BELONEYR_FF_SLOPE 17241.0 0.350814 0.088405 0.0 0.344437 \n",
3376 | "LOCAL_BELONEYR_FF_MON_AVG_BAL 17241.0 0.000271 0.007734 0.0 0.000000 \n",
3377 | "LOCAL_OVEONEYR_FF_SLOPE 17241.0 0.350590 0.070418 0.0 0.344442 \n",
3378 | "LOCAL_SAV_SLOPE 17241.0 0.342997 0.088393 0.0 0.344164 \n",
3379 | "LOCAL_SAV_CUR_ALL_BAL 17241.0 0.002509 0.008524 0.0 0.001131 \n",
3380 | "LOCAL_SAV_MON_AVG_BAL 17241.0 0.002562 0.008540 0.0 0.001160 \n",
3381 | "SAV_SLOPE 17241.0 0.342959 0.088283 0.0 0.344162 \n",
3382 | "SAV_CUR_ALL_BAL 17241.0 0.002511 0.008525 0.0 0.001131 \n",
3383 | "SAV_MON_AVG_BAL 17241.0 0.002563 0.008541 0.0 0.001161 \n",
3384 | "FR_SAV_CUR_ALL_BAL 17241.0 0.000441 0.014429 0.0 0.000000 \n",
3385 | "ASSET_CUR_ALL_BAL 17241.0 0.002650 0.008598 0.0 0.001142 \n",
3386 | "ASSET_MON_AVG_BAL 17241.0 0.002701 0.008613 0.0 0.001193 \n",
3387 | "LOCAL_CUR_TRANS_TX_AMT 17241.0 0.000417 0.010761 0.0 0.000000 \n",
3388 | "LOCAL_CUR_LASTSAV_TX_AMT 17241.0 0.001628 0.013896 0.0 0.000000 \n",
3389 | "LOCAL_CUR_WITHDRAW_TX_AMT 17241.0 0.002978 0.018774 0.0 0.000000 \n",
3390 | "LOCAL_FIX_OPEN_ACC_TX_AMT 17241.0 0.002138 0.013118 0.0 0.000000 \n",
3391 | "LOCAL_FIX_WITHDRAW_TX_AMT 17241.0 0.001221 0.013362 0.0 0.000000 \n",
3392 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 17241.0 0.004347 0.026010 0.0 0.000000 \n",
3393 | "L6M_INDFINA_ALL_TX_AMT 17241.0 0.001079 0.019296 0.0 0.000000 \n",
3394 | "da_Mean 17241.0 0.015937 0.036701 0.0 0.000000 \n",
3395 | "ovrmou_Mean 17241.0 0.021053 0.049172 0.0 0.000000 \n",
3396 | "ovrrev_Mean 17241.0 0.021747 0.049016 0.0 0.000000 \n",
3397 | "vceovr_Mean 17241.0 0.021327 0.048654 0.0 0.000000 \n",
3398 | "datovr_Mean 17241.0 0.002470 0.021156 0.0 0.000000 \n",
3399 | "roam_Mean 17241.0 0.002738 0.015707 0.0 0.000000 \n",
3400 | "... ... ... ... ... ... \n",
3401 | "marital_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n",
3402 | "wrkwoman_Y 17241.0 0.129343 0.335589 0.0 0.000000 \n",
3403 | "wrkwoman_nan 17241.0 0.870657 0.335589 0.0 1.000000 \n",
3404 | "proptype_A 17241.0 0.259092 0.438149 0.0 0.000000 \n",
3405 | "proptype_B 17241.0 0.015370 0.123024 0.0 0.000000 \n",
3406 | "proptype_D 17241.0 0.006902 0.082794 0.0 0.000000 \n",
3407 | "proptype_E 17241.0 0.003770 0.061287 0.0 0.000000 \n",
3408 | "proptype_G 17241.0 0.000870 0.029484 0.0 0.000000 \n",
3409 | "proptype_M 17241.0 0.001972 0.044365 0.0 0.000000 \n",
3410 | "proptype_nan 17241.0 0.712024 0.452833 0.0 0.000000 \n",
3411 | "pcowner_Y 17241.0 0.187460 0.390292 0.0 0.000000 \n",
3412 | "pcowner_nan 17241.0 0.812540 0.390292 0.0 1.000000 \n",
3413 | "kid0_2_U 17241.0 0.943623 0.230655 0.0 1.000000 \n",
3414 | "kid0_2_Y 17241.0 0.038629 0.192714 0.0 0.000000 \n",
3415 | "kid0_2_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n",
3416 | "kid3_5_U 17241.0 0.937591 0.241905 0.0 1.000000 \n",
3417 | "kid3_5_Y 17241.0 0.044661 0.206564 0.0 0.000000 \n",
3418 | "kid3_5_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n",
3419 | "kid6_10_U 17241.0 0.899020 0.301311 0.0 1.000000 \n",
3420 | "kid6_10_Y 17241.0 0.083232 0.276240 0.0 0.000000 \n",
3421 | "kid6_10_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n",
3422 | "kid11_15_U 17241.0 0.891944 0.310461 0.0 1.000000 \n",
3423 | "kid11_15_Y 17241.0 0.090308 0.286631 0.0 0.000000 \n",
3424 | "kid11_15_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n",
3425 | "kid16_17_U 17241.0 0.886085 0.317717 0.0 1.000000 \n",
3426 | "kid16_17_Y 17241.0 0.096166 0.294827 0.0 0.000000 \n",
3427 | "kid16_17_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n",
3428 | "car_buy_New 17241.0 0.422017 0.493896 0.0 0.000000 \n",
3429 | "car_buy_UNKNOWN 17241.0 0.560234 0.496373 0.0 0.000000 \n",
3430 | "car_buy_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n",
3431 | "\n",
3432 | " 50% 75% max \n",
3433 | "LOCAL_CUR_SAV_SLOPE 0.333333 0.358169 1.0 \n",
3434 | "LOCAL_CUR_MON_AVG_BAL 0.000425 0.003037 1.0 \n",
3435 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 0.008991 0.017082 1.0 \n",
3436 | "LOCAL_FIX_MON_AVG_BAL 0.001300 0.002320 1.0 \n",
3437 | "LOCAL_FIX_MON_AVG_BAL_PROP 0.854396 0.990436 1.0 \n",
3438 | "LOCAL_BELONEYR_FF_SLOPE 0.344437 0.344437 1.0 \n",
3439 | "LOCAL_BELONEYR_FF_MON_AVG_BAL 0.000000 0.000000 1.0 \n",
3440 | "LOCAL_OVEONEYR_FF_SLOPE 0.344442 0.345026 1.0 \n",
3441 | "LOCAL_SAV_SLOPE 0.344535 0.356539 1.0 \n",
3442 | "LOCAL_SAV_CUR_ALL_BAL 0.001710 0.002807 1.0 \n",
3443 | "LOCAL_SAV_MON_AVG_BAL 0.001749 0.002861 1.0 \n",
3444 | "SAV_SLOPE 0.344535 0.356519 1.0 \n",
3445 | "SAV_CUR_ALL_BAL 0.001713 0.002809 1.0 \n",
3446 | "SAV_MON_AVG_BAL 0.001751 0.002862 1.0 \n",
3447 | "FR_SAV_CUR_ALL_BAL 0.000000 0.000000 1.0 \n",
3448 | "ASSET_CUR_ALL_BAL 0.001781 0.002941 1.0 \n",
3449 | "ASSET_MON_AVG_BAL 0.001816 0.002988 1.0 \n",
3450 | "LOCAL_CUR_TRANS_TX_AMT 0.000000 0.000000 1.0 \n",
3451 | "LOCAL_CUR_LASTSAV_TX_AMT 0.000000 0.000000 1.0 \n",
3452 | "LOCAL_CUR_WITHDRAW_TX_AMT 0.000000 0.000492 1.0 \n",
3453 | "LOCAL_FIX_OPEN_ACC_TX_AMT 0.000000 0.000000 1.0 \n",
3454 | "LOCAL_FIX_WITHDRAW_TX_AMT 0.000000 0.000000 1.0 \n",
3455 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 0.000000 0.000000 1.0 \n",
3456 | "L6M_INDFINA_ALL_TX_AMT 0.000000 0.000000 1.0 \n",
3457 | "da_Mean 0.004292 0.017167 1.0 \n",
3458 | "ovrmou_Mean 0.001457 0.021592 1.0 \n",
3459 | "ovrrev_Mean 0.001622 0.022766 1.0 \n",
3460 | "vceovr_Mean 0.000873 0.021976 1.0 \n",
3461 | "datovr_Mean 0.000000 0.000000 1.0 \n",
3462 | "roam_Mean 0.000000 0.000619 1.0 \n",
3463 | "... ... ... ... \n",
3464 | "marital_nan 0.000000 0.000000 1.0 \n",
3465 | "wrkwoman_Y 0.000000 0.000000 1.0 \n",
3466 | "wrkwoman_nan 1.000000 1.000000 1.0 \n",
3467 | "proptype_A 0.000000 1.000000 1.0 \n",
3468 | "proptype_B 0.000000 0.000000 1.0 \n",
3469 | "proptype_D 0.000000 0.000000 1.0 \n",
3470 | "proptype_E 0.000000 0.000000 1.0 \n",
3471 | "proptype_G 0.000000 0.000000 1.0 \n",
3472 | "proptype_M 0.000000 0.000000 1.0 \n",
3473 | "proptype_nan 1.000000 1.000000 1.0 \n",
3474 | "pcowner_Y 0.000000 0.000000 1.0 \n",
3475 | "pcowner_nan 1.000000 1.000000 1.0 \n",
3476 | "kid0_2_U 1.000000 1.000000 1.0 \n",
3477 | "kid0_2_Y 0.000000 0.000000 1.0 \n",
3478 | "kid0_2_nan 0.000000 0.000000 1.0 \n",
3479 | "kid3_5_U 1.000000 1.000000 1.0 \n",
3480 | "kid3_5_Y 0.000000 0.000000 1.0 \n",
3481 | "kid3_5_nan 0.000000 0.000000 1.0 \n",
3482 | "kid6_10_U 1.000000 1.000000 1.0 \n",
3483 | "kid6_10_Y 0.000000 0.000000 1.0 \n",
3484 | "kid6_10_nan 0.000000 0.000000 1.0 \n",
3485 | "kid11_15_U 1.000000 1.000000 1.0 \n",
3486 | "kid11_15_Y 0.000000 0.000000 1.0 \n",
3487 | "kid11_15_nan 0.000000 0.000000 1.0 \n",
3488 | "kid16_17_U 1.000000 1.000000 1.0 \n",
3489 | "kid16_17_Y 0.000000 0.000000 1.0 \n",
3490 | "kid16_17_nan 0.000000 0.000000 1.0 \n",
3491 | "car_buy_New 0.000000 1.000000 1.0 \n",
3492 | "car_buy_UNKNOWN 1.000000 1.000000 1.0 \n",
3493 | "car_buy_nan 0.000000 0.000000 1.0 \n",
3494 | "\n",
3495 | "[178 rows x 8 columns]"
3496 | ]
3497 | },
3498 | "execution_count": 27,
3499 | "metadata": {},
3500 | "output_type": "execute_result"
3501 | }
3502 | ],
3503 | "source": [
3504 | "model_data_des"
3505 | ]
3506 | },
3507 | {
3508 | "cell_type": "code",
3509 | "execution_count": null,
3510 | "metadata": {
3511 | "collapsed": true
3512 | },
3513 | "outputs": [],
3514 | "source": []
3515 | },
3516 | {
3517 | "cell_type": "code",
3518 | "execution_count": null,
3519 | "metadata": {
3520 | "collapsed": true
3521 | },
3522 | "outputs": [],
3523 | "source": []
3524 | }
3525 | ],
3526 | "metadata": {
3527 | "kernelspec": {
3528 | "display_name": "Python 3",
3529 | "language": "python",
3530 | "name": "python3"
3531 | },
3532 | "language_info": {
3533 | "codemirror_mode": {
3534 | "name": "ipython",
3535 | "version": 3
3536 | },
3537 | "file_extension": ".py",
3538 | "mimetype": "text/x-python",
3539 | "name": "python",
3540 | "nbconvert_exporter": "python",
3541 | "pygments_lexer": "ipython3",
3542 | "version": "3.6.3"
3543 | }
3544 | },
3545 | "nbformat": 4,
3546 | "nbformat_minor": 2
3547 | }
3548 |
--------------------------------------------------------------------------------